# Import Libraries

In [None]:
import tensorflow as tf
import numpy as np
import pandas as pd
import librosa
import os
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm
from datetime import datetime
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, Dropout, Conv2D, MaxPool2D, Flatten
from tensorflow.keras.utils import to_categorical

# Load Data

In [None]:
audio_files_path = "/media/alper/B00A1E730A1E36B6/Datasets/urbansound8k/audio/"
df = pd.read_csv("/media/alper/B00A1E730A1E36B6/Datasets/urbansound8k/UrbanSound8K.csv")
df.head()

In [None]:
data = []
labels = []

for index, row in tqdm(df.iterrows()):
    file_name = os.path.join(os.path.abspath(audio_files_path), "fold" + str(row["fold"]) + "/", str(row["slice_file_name"]))
    audio, sample_rate = librosa.load(file_name, res_type="kaiser_fast")
    mfccs_features = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)
    mfccs_features_scaled = np.mean(mfccs_features.T, axis=0)
    data.append(mfccs_features_scaled)
    labels.append(row["class"])

In [None]:
feature_df = pd.DataFrame({"features": data, "class": labels})
feature_df.head()

In [None]:
def label_encoder(column):
    le = LabelEncoder().fit(column)
    print(column.name, le.classes_)
    return le.transform(column)

In [None]:
feature_df["class"] = label_encoder(feature_df["class"])

In [None]:
feature_df.head()

In [None]:
X = np.array(feature_df["features"].tolist())
y = np.array(feature_df["class"].tolist())

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)

In [None]:
num_labels = len(feature_df["class"].unique())
num_labels

In [None]:
input_shape = feature_df["features"][0].shape
input_shape

In [None]:
model = Sequential()
model.add(Dense(128, input_shape=input_shape))
model.add(Activation(activation="relu"))
model.add(Dropout(0.5))
model.add(Dense(256))
model.add(Activation(activation="relu"))
model.add(Dropout(0.5))
model.add(Dense(128))
model.add(Activation(activation="relu"))
model.add(Dropout(0.5))
model.add(Dense(num_labels))
model.add(Activation(activation="softmax"))

In [None]:
model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])

In [None]:
model.summary()

In [None]:
history = model.fit(X_train, y_train, validation_data=(X_test, y_test), batch_size=32, epochs=100)

In [None]:
model.save("urban.h5")

In [None]:
test_loss, test_acc = model.evaluate(X_test, y_test)
print("Test Loss:", test_loss)
print("Test Accuracy:", test_acc)

In [None]:
plt.figure()
plt.title("Model Accuracy")
plt.plot(history.history["accuracy"], label="train")
plt.plot(history.history["val_accuracy"], label="validation")
plt.legend()
plt.show()

In [None]:
plt.figure()
plt.title("Model Loss")
plt.plot(history.history["loss"], label="train")
plt.plot(history.history["val_loss"], label="validation")
plt.legend()
plt.show()

In [None]:
filename = "/media/alper/B00A1E730A1E36B6/Datasets/urbansound8k/dog.wav"
sound_signal, sample_rate = librosa.load(filename, res_type="kaiser_fast")
mfcc_features = librosa.feature.mfcc(y=sound_signal, sr=sample_rate, n_mfcc=40)
mfccs_features_scaled = np.mean(mfcc_features.T, axis=0)
mfccs_features_scaled = mfccs_features_scaled.reshape(1, -1)

In [None]:
result_array = model.predict(mfccs_features_scaled)

In [None]:
result_classes = ['air_conditioner', 'car_horn', 'children_playing', 'dog_bark', 'drilling',
                  'engine_idling', 'gun_shot', 'jackhammer', 'siren', 'street_music']

In [None]:
result = np.argmax(result_array[0])

In [None]:
result_classes[result]