### Audio Event Classifier with Deep Learning

Build a CNN sound classifier using melspectograms from ESC-50 data. Refer to *save_melspectorgrams.ipynb* for feature extraction.

In [None]:
%matplotlib inline
from keras.applications.vgg16 import VGG16, preprocess_input
from keras.preprocessing import image
from keras.models import Sequential, Model
from keras.layers import Dense, Dropout, Flatten, Input
from keras.applications.vgg16 import preprocess_input
from keras.utils import multi_gpu_model
import numpy as np
import json

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


# Set parameter values

In [2]:
batch_size = 40
epochs = 200

# dimensions of our images.
img_width, img_height = 224, 224

input_tensor = Input(shape=(224,224,3))

nb_training_samples = 1600
nb_validation_samples = 400# Set parameter values

# Configure training and validation data generators

Provide paths to training and testing set directores

In [3]:
# training generator configuration
training_data_dir = '/path/to/ESC-50-master/melspectrograms/training'

training_datagen = image.ImageDataGenerator(
    rescale=1./255)

training_generator = training_datagen.flow_from_directory(
    training_data_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size)

# validation generator configuration
validation_data_dir ='/path/to/ESC-50-master/melspectrograms/testing/'

validation_datagen = image.ImageDataGenerator(
    rescale=1./255)

validation_generator = validation_datagen.flow_from_directory(
    validation_data_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size)

Found 1600 images belonging to 50 classes.
Found 400 images belonging to 50 classes.


# Load base model

In [4]:
base_model = VGG16(weights='imagenet', include_top=False, input_tensor=input_tensor)
print('Model loaded.')
base_model.summary()

Model loaded.
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 224, 224, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0      

# Build top model

In [5]:
# build a classifier model to put on top of the convolutional model
top_model = Sequential()
top_model.add(Flatten(input_shape=base_model.output_shape[1:]))
top_model.add(Dense(256, activation='relu'))
top_model.add(Dropout(0.5))
top_model.add(Dense(50, activation='softmax'))
top_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_1 (Flatten)          (None, 25088)             0         
_________________________________________________________________
dense_1 (Dense)              (None, 256)               6422784   
_________________________________________________________________
dropout_1 (Dropout)          (None, 256)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 50)                12850     
Total params: 6,435,634
Trainable params: 6,435,634
Non-trainable params: 0
_________________________________________________________________


# Combine base model with top model

In [6]:
# top_model.load_weights('bootlneck_fc_model.h5')
model = Model(inputs=base_model.input, outputs=top_model(base_model.output))
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 224, 224, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0         
__________

# Configure model training

In [7]:
num_layers_to_freeze = 15

In [8]:
from keras import metrics, optimizers

def top_5_accuracy(y_true, y_pred):
    return metrics.top_k_categorical_accuracy(y_true, y_pred, k=5)

for layer in model.layers[:num_layers_to_freeze]:
    layer.trainable = False

# use nesterov accelrated gradient descent ??
# optimizer=optimizers.SGD(lr=1e-4, momentum=0.9, decay=1e-6, nesterov=True)
model.compile(optimizer=optimizers.SGD(lr=1e-4, momentum=0.9), 
                      loss='categorical_crossentropy', 
                      metrics=['accuracy', top_5_accuracy])

# parallel_model.compile(optimizer=optimizers.SGD(lr=1e-4, momentum=0.9), 
#                       loss='categorical_crossentropy', 
#                       metrics=['accuracy', top_5_accuracy])

# serialize model to JSON
model_json = model.to_json()
model_filename = "vgg16_model_{}_frozen_layers.json".format(num_layers_to_freeze)
with open(model_filename, "w") as json_file:
    json_file.write(model_json)

# Fine-tune the model

In [9]:
from keras.callbacks import ModelCheckpoint, TensorBoard
from time import time

tensorboard = TensorBoard(log_dir="logs/layers_frozen_{}".format(num_layers_to_freeze))

# checkpoint
filepath="esc50_vgg16_stft_weights_train_last_2_base_layers.best.hdf5"
best_model_checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max')
callbacks_list = [best_model_checkpoint, tensorboard]

# parallel_model.fit_generator(
#     training_generator,
#     steps_per_epoch=nb_training_samples/batch_size,
#     epochs=epochs,
#     validation_data=validation_generator,
#     validation_steps=nb_validation_samples/batch_size,
#     callbacks=callbacks_list)

model.fit_generator(
    training_generator,
    steps_per_epoch=nb_training_samples/batch_size,
    epochs=epochs,
    validation_data=validation_generator,
    validation_steps=nb_validation_samples/batch_size,
    callbacks=callbacks_list)
# parallel_model.fit_generator(
#     training_generator,
#     samples_per_epoch=nb_training_samples,
#     epochs=epochs,
#     validation_data=validation_generator,
#     validation_steps=nb_validation_samples/batch_size,)
#     nb_val_samples=nb_validation_samples)

Epoch 1/200

Epoch 00001: val_acc improved from -inf to 0.03750, saving model to esc50_vgg16_stft_weights_train_last_2_base_layers.best.hdf5
Epoch 2/200

Epoch 00002: val_acc improved from 0.03750 to 0.05000, saving model to esc50_vgg16_stft_weights_train_last_2_base_layers.best.hdf5
Epoch 3/200

Epoch 00003: val_acc improved from 0.05000 to 0.06750, saving model to esc50_vgg16_stft_weights_train_last_2_base_layers.best.hdf5
Epoch 4/200

Epoch 00004: val_acc improved from 0.06750 to 0.07250, saving model to esc50_vgg16_stft_weights_train_last_2_base_layers.best.hdf5
Epoch 5/200

Epoch 00005: val_acc did not improve
Epoch 6/200

Epoch 00006: val_acc improved from 0.07250 to 0.08500, saving model to esc50_vgg16_stft_weights_train_last_2_base_layers.best.hdf5
Epoch 7/200

Epoch 00007: val_acc did not improve
Epoch 8/200

Epoch 00008: val_acc improved from 0.08500 to 0.15500, saving model to esc50_vgg16_stft_weights_train_last_2_base_layers.best.hdf5
Epoch 9/200

Epoch 00009: val_acc impro


Epoch 00029: val_acc did not improve
Epoch 30/200

Epoch 00030: val_acc improved from 0.57500 to 0.59500, saving model to esc50_vgg16_stft_weights_train_last_2_base_layers.best.hdf5
Epoch 31/200

Epoch 00031: val_acc improved from 0.59500 to 0.60000, saving model to esc50_vgg16_stft_weights_train_last_2_base_layers.best.hdf5
Epoch 32/200

Epoch 00032: val_acc improved from 0.60000 to 0.60750, saving model to esc50_vgg16_stft_weights_train_last_2_base_layers.best.hdf5
Epoch 33/200

Epoch 00033: val_acc did not improve
Epoch 34/200

Epoch 00034: val_acc did not improve
Epoch 35/200

Epoch 00035: val_acc improved from 0.60750 to 0.62250, saving model to esc50_vgg16_stft_weights_train_last_2_base_layers.best.hdf5
Epoch 36/200

Epoch 00036: val_acc did not improve
Epoch 37/200

Epoch 00037: val_acc did not improve
Epoch 38/200

Epoch 00038: val_acc improved from 0.62250 to 0.66750, saving model to esc50_vgg16_stft_weights_train_last_2_base_layers.best.hdf5
Epoch 39/200

Epoch 00039: val_ac


Epoch 00060: val_acc did not improve
Epoch 61/200

Epoch 00061: val_acc did not improve
Epoch 62/200

Epoch 00062: val_acc did not improve
Epoch 63/200

Epoch 00063: val_acc did not improve
Epoch 64/200

Epoch 00064: val_acc improved from 0.71750 to 0.72250, saving model to esc50_vgg16_stft_weights_train_last_2_base_layers.best.hdf5
Epoch 65/200

Epoch 00065: val_acc did not improve
Epoch 66/200

Epoch 00066: val_acc did not improve
Epoch 67/200

Epoch 00067: val_acc did not improve
Epoch 68/200

Epoch 00068: val_acc did not improve
Epoch 69/200

Epoch 00069: val_acc did not improve
Epoch 70/200

Epoch 00070: val_acc did not improve
Epoch 71/200

Epoch 00071: val_acc did not improve
Epoch 72/200

Epoch 00072: val_acc did not improve
Epoch 73/200

Epoch 00073: val_acc did not improve
Epoch 74/200

Epoch 00074: val_acc did not improve
Epoch 75/200

Epoch 00075: val_acc did not improve
Epoch 76/200

Epoch 00076: val_acc did not improve
Epoch 77/200

Epoch 00077: val_acc improved from 0.7


Epoch 00093: val_acc did not improve
Epoch 94/200

Epoch 00094: val_acc did not improve
Epoch 95/200

Epoch 00095: val_acc did not improve
Epoch 96/200

Epoch 00096: val_acc did not improve
Epoch 97/200

Epoch 00097: val_acc did not improve
Epoch 98/200

Epoch 00098: val_acc did not improve
Epoch 99/200

Epoch 00099: val_acc did not improve
Epoch 100/200

Epoch 00100: val_acc did not improve
Epoch 101/200

Epoch 00101: val_acc did not improve
Epoch 102/200

Epoch 00102: val_acc did not improve
Epoch 103/200

Epoch 00103: val_acc did not improve
Epoch 104/200

Epoch 00104: val_acc did not improve
Epoch 105/200

Epoch 00105: val_acc did not improve
Epoch 106/200

Epoch 00106: val_acc improved from 0.73750 to 0.75000, saving model to esc50_vgg16_stft_weights_train_last_2_base_layers.best.hdf5
Epoch 107/200

Epoch 00107: val_acc did not improve
Epoch 108/200

Epoch 00108: val_acc did not improve
Epoch 109/200

Epoch 00109: val_acc did not improve
Epoch 110/200

Epoch 00110: val_acc did no


Epoch 00128: val_acc did not improve
Epoch 129/200

Epoch 00129: val_acc did not improve
Epoch 130/200

Epoch 00130: val_acc did not improve
Epoch 131/200

Epoch 00131: val_acc improved from 0.75750 to 0.76500, saving model to esc50_vgg16_stft_weights_train_last_2_base_layers.best.hdf5
Epoch 132/200

Epoch 00132: val_acc did not improve
Epoch 133/200

Epoch 00133: val_acc did not improve
Epoch 134/200

Epoch 00134: val_acc did not improve
Epoch 135/200

Epoch 00135: val_acc did not improve
Epoch 136/200

Epoch 00136: val_acc did not improve
Epoch 137/200

Epoch 00137: val_acc did not improve
Epoch 138/200

Epoch 00138: val_acc did not improve
Epoch 139/200

Epoch 00139: val_acc did not improve
Epoch 140/200

Epoch 00140: val_acc did not improve
Epoch 141/200

Epoch 00141: val_acc did not improve
Epoch 142/200

Epoch 00142: val_acc did not improve
Epoch 143/200

Epoch 00143: val_acc did not improve
Epoch 144/200

Epoch 00144: val_acc did not improve
Epoch 145/200

Epoch 00145: val_acc 


Epoch 00200: val_acc did not improve


<keras.callbacks.History at 0x7ff4e4d70e10>

# Get top k predictions for selected test files

In [10]:
def get_top_k_predictions(preds, label_map, k=5, print_flag=False):
    sorted_array = np.argsort(preds)[::-1]
    top_k = sorted_array[:k]
    label_map_flip = dict((v,k) for k,v in label_map.iteritems())
    
    y_pred = []
    for label_index in top_k:
        if print_flag:
            print "{} ({})".format(label_map_flip[label_index], preds[label_index])
        y_pred.append(label_map_flip[label_index])
        
    return y_pred

In [11]:
label_map = (training_generator.class_indices)
 
json = json.dumps(label_map)
f = open("cough_label_map.json","w")
f.write(json)
f.close()

img_path = '/data/datasets/ESC-50/ESC-50-master/spectrograms/testing/vacuum_cleaner/5-182007-A-36.png'

img = image.load_img(img_path, target_size=(224, 224))
x = image.img_to_array(img)
x = np.expand_dims(x, axis=0)* 1./255

preds = model.predict(x)[0]

get_top_k_predictions(preds, label_map, k=3)

['vacuum_cleaner', 'crickets', 'engine']

# Calculate and plot confusion matrix

In [None]:
import itertools
import matplotlib.pyplot as plt
def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

#     print(cm)
    plt.figure(figsize=(24,24))
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=90)
    plt.yticks(tick_marks, classes)

    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    

In [None]:
import os
from sklearn.metrics import confusion_matrix

testing_dir = '/data/datasets/ESC-50/ESC-50-master/melspectrograms/testing/'

y_true = []
y_pred = []
for label in label_map.keys():
    file_list = os.listdir(testing_dir + label)
    for file_name in file_list:
        img_path = testing_dir + label + '/' + file_name
        
        img = image.load_img(img_path, target_size=(224, 224))
        
        x = image.img_to_array(img)
        x = np.expand_dims(x, axis=0)* 1./255
        
        preds = model.predict(x)[0]
        
        y_true.append(label)
        y_pred.append(get_top_k_predictions(preds, label_map, k=1)[0])
        
cm = confusion_matrix(y_true, y_pred)
plot_confusion_matrix(cm, sorted(label_map.keys()), normalize=True)