In [2]:
import os
import pandas as pd
import numpy as np
from glob import glob
import matplotlib.pyplot as plt
import librosa
import librosa.display

In [2]:
def create_spectrogram(filename, name):
    plt.interactive(False)
    clip, sample_rate = librosa.load(filename)
    fig = plt.figure(figsize=[0.72, 0.72])
    ax = fig.add_subplot(111)
    ax.axes.get_xaxis().set_visible(False)
    ax.axes.get_yaxis().set_visible(False)
    ax.set_frame_on(False)
    S = librosa.feature.melspectrogram(y=clip, sr=sample_rate)
    librosa.display.specshow(librosa.power_to_db(S, ref=np.max))
    filename = 'Spectrograms/Train/{}.jpg'.format(name)
    plt.savefig(filename, dpi=400, bbox_inches='tight', pad_inches=0)
    plt.close()
    fig.clf()
    plt.close(fig)
    plt.close('all')
    del filename, name, clip, sample_rate, fig, ax, S

In [3]:
def create_spectrogram_test(filename, name):
    plt.interactive(False)
    clip, sample_rate = librosa.load(filename)
    fig = plt.figure(figsize=[0.72, 0.72])
    ax = fig.add_subplot(111)
    ax.axes.get_xaxis().set_visible(False)
    ax.axes.get_yaxis().set_visible(False)
    ax.set_frame_on(False)
    S = librosa.feature.melspectrogram(y=clip, sr=sample_rate)
    librosa.display.specshow(librosa.power_to_db(S, ref=np.max))
    filename = 'Spectrograms/Test/{}.jpg'.format(name)
    plt.savefig(filename, dpi=400, bbox_inches='tight', pad_inches=0)
    plt.close()
    fig.clf()
    plt.close(fig)
    plt.close('all')
    del filename, name, clip, sample_rate, fig, ax, S

In [4]:
create_spectrogram('Dataset/Train/0.wav', '0')

In [11]:
data = pd.read_csv('Dataset/train.csv')

In [6]:
for i in data[:10].iterrows():
    create_spectrogram('Dataset/Train/{}.wav'.format(i[1].ID), i[1].Class+str(i[1].ID))

In [5]:
data[data['Class']=="street_music"][:10]

Unnamed: 0,ID,Class
1,1,street_music
6,10,street_music
23,38,street_music
42,65,street_music
47,72,street_music
48,74,street_music
55,82,street_music
57,84,street_music
63,93,street_music
70,105,street_music


In [None]:
for j in data.Class.unique():
    for i in data[data['Class']==j][:10].iterrows():
        create_spectrogram('Dataset/Train/{}.wav'.format(i[1].ID), i[1].Class+str(i[1].ID))

In [6]:
data.Class.unique()

array(['siren', 'street_music', 'drilling', 'dog_bark',
       'children_playing', 'gun_shot', 'engine_idling', 'air_conditioner',
       'jackhammer', 'car_horn'], dtype=object)

In [7]:
np.max

<function numpy.amax(a, axis=None, out=None, keepdims=<no value>, initial=<no value>)>

In [None]:
for i in data.iterrows():
    create_spectrogram('Dataset/Train/{}.wav'.format(i[1].ID), i[1].Class+str(i[1].ID))

In [None]:
for i in test_data.iterrows():
    create_spectrogram_test('Dataset/Test/{}.wav'.format(i[1].ID), str(i[1].ID))

In [7]:
test_data = pd.read_csv('Dataset/test.csv')

In [5]:
from keras_preprocessing.image import ImageDataGenerator

In [12]:
data.ID = data.ID.apply(lambda x: str(x))
test_data.ID = test_data.ID.apply(lambda x: str(x))

In [13]:
data['File'] = data.Class + data.ID + ".jpg"

In [14]:
test_data['File'] = test_data.ID + ".jpg"

In [17]:
datagen = ImageDataGenerator(rescale=1./255., validation_split=0.25)

train_generator = datagen.flow_from_dataframe(dataframe=data, 
                                              directory='Spectrograms/Train/', 
                                              x_col="File", 
                                              y_col="Class", 
                                              subset='training', 
                                              batch_size=32, 
                                              seed=42, 
                                              shuffle=True, 
                                              class_mode='categorical', 
                                              target_size=(64, 64))

valid_generator = datagen.flow_from_dataframe(dataframe=data, 
                                              directory='Spectrograms/Train/', 
                                              x_col='File', 
                                              y_col="Class", 
                                              subset='validation', 
                                              batch_size=32, 
                                              seed=42, 
                                              shuffle=True, 
                                              class_mode='categorical', 
                                              target_size=(64, 64))

Found 4077 validated image filenames belonging to 10 classes.
Found 1358 validated image filenames belonging to 10 classes.


In [14]:
from keras.layers import Dense, Activation, Flatten, Dropout, BatchNormalization
from keras.models import Sequential, Model
from keras.layers import Conv2D, MaxPooling2D
from keras import regularizers, optimizers

Using TensorFlow backend.


In [15]:
model = Sequential()

model.add(Conv2D(filters=32, kernel_size=(3,3), padding='same', input_shape=(64, 64, 3), activation='relu'))

model.add(Conv2D(filters=64, kernel_size=(3,3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.25))

model.add(Conv2D(filters=64, kernel_size=(3,3), padding='same', activation='relu'))

model.add(Conv2D(filters=64, kernel_size=(3,3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.5))

model.add(Conv2D(filters=128, kernel_size=(3,3), padding='same', activation='relu'))

model.add(Conv2D(filters=128, kernel_size=(3,3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.5))

model.add(Flatten())

model.add(Dense(512, activation='relu'))
model.add(Dropout(0.5))

model.add(Dense(10, activation='softmax'))

model.compile(optimizer='adam', loss="categorical_crossentropy", metrics=['accuracy'])

model.summary()






Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 64, 64, 32)        896       
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 62, 62, 64)        18496     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 31, 31, 64)        0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 31, 31, 64)        0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 31, 31, 64)        36928     
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 29, 29, 64)        36928    

In [16]:
step_size_train = train_generator.n//train_generator.batch_size
step_size_validation = valid_generator.n//valid_generator.batch_size

model.fit_generator(generator=train_generator, 
                    steps_per_epoch=step_size_train, 
                    validation_data=valid_generator, 
                    validation_steps=step_size_validation, 
                    epochs=150
                   )

model.evaluate_generator(generator=valid_generator, steps=step_size_validation)

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
Epoch 1/150
Epoch 2/150
Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 10/150
Epoch 11/150
Epoch 12/150
Epoch 13/150
Epoch 14/150
Epoch 15/150
Epoch 16/150
Epoch 17/150
Epoch 18/150
Epoch 19/150
Epoch 20/150
Epoch 21/150
Epoch 22/150
Epoch 23/150
Epoch 24/150
Epoch 25/150
Epoch 26/150
Epoch 27/150
Epoch 28/150
Epoch 29/150
Epoch 30/150
Epoch 31/150
Epoch 32/150
Epoch 33/150
Epoch 34/150
Epoch 35/150
Epoch 36/150
Epoch 37/150
Epoch 38/150
Epoch 39/150
Epoch 40/150
Epoch 41/150
Epoch 42/150
Epoch 43/150
Epoch 44/150
Epoch 45/150
Epoch 46/150
Epoch 47/150
Epoch 48/150
Epoch 49/150
Epoch 50/150
Epoch 51/150
Epoch 52/150
Epoch 53/150
Epoch 54/150
Epoch 55/150
Epoch 56/150
Epoch 57/150
Epoch 58/150
Epoch 59/150
Epoch 60/150
Epoch 61/150
Epoch 62/150
Epoch 63/150
Epoch 64/150
Epoch 65/150
Epoch 66/150
Epoch 67/150
Epoch 68/150
Epoch 69/150
Epoch 70/150
Epoch

Epoch 118/150
Epoch 119/150
Epoch 120/150
Epoch 121/150
Epoch 122/150
Epoch 123/150
Epoch 124/150
Epoch 125/150
Epoch 126/150
Epoch 127/150
Epoch 128/150
Epoch 129/150
Epoch 130/150
Epoch 131/150
Epoch 132/150
Epoch 133/150
Epoch 134/150
Epoch 135/150
Epoch 136/150
Epoch 137/150
Epoch 138/150
Epoch 139/150
Epoch 140/150
Epoch 141/150
Epoch 142/150
Epoch 143/150
Epoch 144/150
Epoch 145/150
Epoch 146/150
Epoch 147/150
Epoch 148/150
Epoch 149/150
Epoch 150/150


[0.44749876764780355, 0.921875]

In [3]:
from keras.models import load_model
# model.save('model.h5')
# print("Model Saved")
model = load_model('model.h5')






Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


In [15]:
# x_col takes in the file name
test_datagen = ImageDataGenerator(rescale=1./255.)
test_generator = test_datagen.flow_from_dataframe(dataframe=test_data, 
                                                  directory='Spectrograms/Test/', 
                                                  x_col='File', 
                                                  y_col=None, 
                                                  batch_size=32, 
                                                  seed=42, 
                                                  shuffle=False, 
                                                  class_mode=None, 
                                                  target_size=(64,64))

step_size_test = test_generator.n//test_generator.batch_size

Found 3297 validated image filenames.


In [24]:
test_generator.reset()
pred = model.predict_generator(test_generator, steps=step_size_test, verbose=1)

predicted_class_indices = np.argmax(pred, axis=1)

#Fetch labels from train gen for testing
labels = (train_generator.class_indices)
labels = dict((j, i) for (i, j) in labels.items())
predictions = [labels[i] for i in predicted_class_indices]
print(predictions[:6])

['jackhammer', 'dog_bark', 'drilling', 'dog_bark', 'street_music', 'jackhammer']


In [19]:
predictions[:10]

['jackhammer',
 'dog_bark',
 'drilling',
 'dog_bark',
 'street_music',
 'jackhammer',
 'air_conditioner',
 'children_playing',
 'dog_bark',
 'drilling']

In [20]:
labels

{0: 'air_conditioner',
 1: 'car_horn',
 2: 'children_playing',
 3: 'dog_bark',
 4: 'drilling',
 5: 'engine_idling',
 6: 'gun_shot',
 7: 'jackhammer',
 8: 'siren',
 9: 'street_music'}

In [21]:
data.head()

Unnamed: 0,ID,Class,File
0,0,siren,siren0.jpg
1,1,street_music,street_music1.jpg
2,2,drilling,drilling2.jpg
3,3,siren,siren3.jpg
4,4,dog_bark,dog_bark4.jpg


In [22]:
predicted_class_indices

array([7, 3, 4, ..., 6, 7, 4])