In [1]:

import keras
from keras.layers import Activation, Dense, Dropout, Conv2D, \
                         Flatten, MaxPooling2D
from keras.models import Sequential
import librosa
import pandas as pd
import numpy as np
import librosa.display
import random

import warnings
warnings.filterwarnings('ignore')

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
data = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/UrbanSound8K/metadata/UrbanSound8K.csv')
data.head(5)


In [None]:
data.shape

In [None]:
# Filter the data files only greater than 3 seconds long
valid_data = data[['slice_file_name', 'fold' ,'classID', 'class']][ data['end']-data['start'] >= 2.99 ]
valid_data.shape

In [None]:
# Sample spectrogram
y, sr = librosa.load('/content/drive/MyDrive/Colab Notebooks/UrbanSound8K/audio/fold6/101281-3-0-5.wav', duration=2.95)
ps = librosa.feature.melspectrogram(y=y, sr=sr)
ps.shape

In [None]:
librosa.display.specshow(ps, y_axis='mel', x_axis='time')

In [None]:
# Another sample spectrogram
y, sr = librosa.load('/content/drive/MyDrive/Colab Notebooks/UrbanSound8K/audio/fold5/100263-2-0-143.wav', duration=2.95)
ps = librosa.feature.melspectrogram(y=y, sr=sr)
ps.shape

In [None]:
librosa.display.specshow(ps, y_axis='mel', x_axis='time')

In [None]:
# Sample spectrogram
y, sr = librosa.load('/content/drive/MyDrive/Colab Notebooks/UrbanSound8K/audio/fold5/104421-2-0-1.wav', duration=2.95)
ps = librosa.feature.melspectrogram(y=y, sr=sr)
ps.shape

In [None]:
librosa.display.specshow(ps, y_axis='mel', x_axis='time')

In [None]:
# spectrogram
y, sr = librosa.load('/content/drive/MyDrive/Colab Notebooks/UrbanSound8K/audio/fold10/100795-3-1-0.wav', duration=2.95)
ps = librosa.feature.melspectrogram(y=y, sr=sr)
ps.shape

In [None]:
librosa.display.specshow(ps, y_axis='mel', x_axis='time')

In [None]:
# spectrogram
y, sr = librosa.load('/content/drive/MyDrive/Colab Notebooks/UrbanSound8K/audio/fold5/100263-2-0-117.wav', duration=2.95)
ps = librosa.feature.melspectrogram(y=y, sr=sr)
ps.shape

In [None]:
librosa.display.specshow(ps, y_axis='mel', x_axis='time')

In [None]:
#Iterate over  all samples and construct a 128x128 spectrogram for each
valid_data['path'] = 'fold' + valid_data['fold'].astype('str') + '/' + valid_data['slice_file_name'].astype('str')
valid_data.head(5)

In [None]:
# continued

D = [] # Dataset

for row in valid_data.itertuples():
    y, sr = librosa.load('/content/drive/MyDrive/Colab Notebooks/UrbanSound8K/audio/' + row.path, duration=2.97)  
    ps = librosa.feature.melspectrogram(y=y, sr=sr)
    if ps.shape != (128, 128): continue
    D.append( (ps, row.classID) )


In [None]:
print("Number of samples: ", len(D))

In [None]:
dataset = D
random.shuffle(dataset)

train = dataset[:7000]
test = dataset[7000:]

X_train, y_train = zip(*train)
X_test, y_test = zip(*test)

# Reshape for CNN input
X_train = np.array([x.reshape( (128, 128, 1) ) for x in X_train])
X_test = np.array([x.reshape( (128, 128, 1) ) for x in X_test])

# One-Hot encoding for classes
y_train = np.array(keras.utils.to_categorical(y_train, 10))
y_test = np.array(keras.utils.to_categorical(y_test, 10))

In [None]:
model = Sequential()
input_shape=(128, 128, 1)

model.add(Conv2D(24, (5, 5), strides=(1, 1), input_shape=input_shape))
model.add(MaxPooling2D((4, 2), strides=(4, 2)))
model.add(Activation('relu'))

model.add(Conv2D(48, (5, 5), padding="valid"))
model.add(MaxPooling2D((4, 2), strides=(4, 2)))
model.add(Activation('relu'))

model.add(Conv2D(48, (5, 5), padding="valid"))
model.add(Activation('relu'))

model.add(Flatten())
model.add(Dropout(rate=0.5))

model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dropout(rate=0.5))

model.add(Dense(10))
model.add(Activation('softmax'))

In [None]:

model.compile(
	optimizer="Adam",
	loss="categorical_crossentropy",
	metrics=['accuracy'])

model.fit(
	x=X_train, 
	y=y_train,
    epochs=12,
    batch_size=128,
    validation_data= (X_test, y_test))

score = model.evaluate(
	x=X_test,
	y=y_test)

print('Test loss:', score[0])
print('Test accuracy:', score[1])