In [None]:
# Basic Libraries

import pandas as pd
import numpy as np

pd.plotting.register_matplotlib_converters()
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.model_selection import GridSearchCV

from sklearn.preprocessing import MinMaxScaler

In [None]:
# Libraries for Classification and building Models

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, Flatten, Dense, MaxPool2D, Dropout
from tensorflow.keras.utils import to_categorical 

from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC

In [None]:
# Project Specific Libraries

import os
import librosa
import librosa.display
import glob 
import skimage

In [None]:
df = pd.read_csv("../input/urbansound8k/UrbanSound8K.csv")

'''We will extract classes from this metadata.'''

df.head()

In [None]:
fold = df['fold'].to_numpy()
fname = df['slice_file_name'].to_numpy()
cname = df['class'].to_numpy()
i = 303
path1 = '../input/urbansound8k/fold' + str(fold[i]) + '/' + fname[i]
j = 152
path2 = '../input/urbansound8k/fold' + str(fold[j]) + '/' + fname[j]
dat1, sampling_rate1 = librosa.load(path1)
dat2, sampling_rate2 = librosa.load(path2)

In [None]:
plt.figure(figsize=(7, 2))
# why abs ?
D = np.abs(librosa.stft(dat1))
# what is np.max ?
S_db = librosa.amplitude_to_db(D, ref=np.max) 
librosa.display.specshow(S_db, x_axis='time', y_axis='log')
plt.colorbar(format='%+2.0f dB')
plt.title(f'Power spectrogram: {cname[i]}')

In [None]:
plt.figure(figsize=(7, 2))
D = np.abs(librosa.stft(dat2))
S_db = librosa.amplitude_to_db(D, ref=np.max) 
librosa.display.specshow(S_db, x_axis='time', y_axis='log')
plt.colorbar(format='%+2.0f dB')
plt.title(f'Power spectrogram: {cname[j]}')

In [None]:
def parser(data):
    features = []
    label = []
    for i in range(df.shape[0]):
        path3 = '../input/urbansound8k/fold' + str(data['fold'][i]) + '/' + data["slice_file_name"][i]
        X, sampr = librosa.load(path3, res_type='kaiser_fast')
        mfccs = librosa.feature.mfcc(y=X, sr=sampr, n_mfcc=64)
        mfccs_processed = np.mean(mfccs.T,axis=0)  
        features.append(mfccs_processed)
        label.append(df['classID'][i])
    return features, label

In [None]:
temp = parser(df)
data = np.array(temp[0])
label = np.array(temp[1])
label = to_categorical(label)
print(data.shape)
print(label.shape)

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(data, label, test_size=0.2, random_state=42)

In [None]:
X_train = X_train.reshape(X_train.shape[0], 16, 4, 1)
X_test = X_test.reshape(X_test.shape[0], 16, 4, 1)

In [None]:
input_dim = (16, 4, 1)

In [None]:
model = Sequential()
model.add(Conv2D(64, (3, 3), padding = "same", activation = "tanh", input_shape = input_dim))
model.add(MaxPool2D(pool_size=(2, 2)))
model.add(Conv2D(128, (3, 3), padding = "same", activation = "tanh"))
model.add(MaxPool2D(pool_size=(2, 2)))
model.add(Dropout(0.1))
model.add(Flatten())
model.add(Dense(1024, activation = "tanh"))
model.add(Dense(10, activation = "softmax"))

In [None]:
model.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])

In [None]:
model.fit(X_train, Y_train, epochs = 90, batch_size = 50, validation_data = (X_test, Y_test))

In [None]:
predictions = model.predict(X_test)
score = model.evaluate(X_test, Y_test)
print(score)

In [None]:
preds = np.argmax(predictions, axis = 1)

In [None]:
result = pd.DataFrame(preds)
result.to_csv("UrbanSound8kResults.csv")