In [1]:
# Retrieve previously stored variables
%store -r x_train
%store -r x_test 
%store -r y_train
%store -r y_test
%store -r yy
%store -r le
%store -r max_pad_length
print(x_train.shape)
#print(x_train[1345])

(1346, 40, 173)


In [2]:
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Convolution2D, Conv2D, MaxPooling2D, GlobalAveragePooling2D
from keras.optimizers import Adam
from keras.utils import np_utils
from sklearn import metrics 

num_rows = 40
num_columns = max_pad_length
num_channels = 1

x_train = x_train.reshape(x_train.shape[0], num_rows, num_columns, num_channels)
x_test = x_test.reshape(x_test.shape[0], num_rows, num_columns, num_channels)

num_labels = yy.shape[1]

# Construct CNN model
model = Sequential()
model.add(Conv2D(filters=16, kernel_size=2, input_shape=(num_rows, num_columns, num_channels), activation='relu'))
model.add(MaxPooling2D(pool_size=2))
model.add(Dropout(0.2))

# Increasing nodes from 16, 32, 64, 128
model.add(Conv2D(filters=32, kernel_size=2, activation='relu'))
model.add(MaxPooling2D(pool_size=2))
model.add(Dropout(0.2))

model.add(Conv2D(filters=64, kernel_size=2, activation='relu'))
model.add(MaxPooling2D(pool_size=2))
model.add(Dropout(0.2))

model.add(Conv2D(filters=128, kernel_size=2, activation='relu'))
model.add(MaxPooling2D(pool_size=2))
model.add(Dropout(0.2))
model.add(GlobalAveragePooling2D())

# Output based on number of category labels
model.add(Dense(num_labels, activation='softmax'))

Using TensorFlow backend.


In [3]:
# Compile the model
# Optimizer "adam" is a typical optimizer used - variation SGD (stochastic gradient descent)
# SGD utilizes the gradient of the loss function with respects to the weight
# loss -> typical loss function 
# metrics is output to be displayed (accuracy is the output of the loss function (?))
model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='adam')

In [4]:
# Display model architecture summary 
model.summary()

# Calculate pre-training accuracy 
# Verbose - displays info if desired (verbose = 0 means silent, just print accuracy value)
# evaluate returns loss value and score value
score = model.evaluate(x_test, y_test, verbose=0)
accuracy = 100*score[1]

# Accuracy - the metrics value evaluated based on loss function
print("Pre-training accuracy: %.4f%%" % accuracy)

display(score)

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 39, 172, 16)       80        
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 19, 86, 16)        0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 19, 86, 16)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 18, 85, 32)        2080      
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 9, 42, 32)         0         
_________________________________________________________________
dropout_2 (Dropout)          (None, 9, 42, 32)         0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 8, 41, 64)        

[5.299432442982991, 0.1133333370089531]

In [5]:
from keras.callbacks import ModelCheckpoint 
from datetime import datetime 

num_epochs = 100
num_batch_size = 8 # Arbitrarily chose the value 8

checkpointer = ModelCheckpoint(filepath='saved_models/weights.best.basic_mlp.hdf5', 
                               verbose=1, save_best_only=True)

start = datetime.now()

# Train the model for a fix number of epochs
# validation_data - data to evaluate the loss at the end of each epoch
# callbacks - display ModelCheckpoint
model.fit(x_train, y_train, batch_size=num_batch_size, epochs=num_epochs, validation_data=(x_test, y_test), callbacks=[checkpointer], verbose=1)


duration = datetime.now() - start
print("Training completed in time: ", duration)

Train on 1346 samples, validate on 150 samples
Epoch 1/100

Epoch 00001: val_loss improved from inf to 0.65823, saving model to saved_models/weights.best.basic_mlp.hdf5
Epoch 2/100

Epoch 00002: val_loss improved from 0.65823 to 0.52123, saving model to saved_models/weights.best.basic_mlp.hdf5
Epoch 3/100

Epoch 00003: val_loss improved from 0.52123 to 0.44242, saving model to saved_models/weights.best.basic_mlp.hdf5
Epoch 4/100

Epoch 00004: val_loss did not improve from 0.44242
Epoch 5/100

Epoch 00005: val_loss improved from 0.44242 to 0.35273, saving model to saved_models/weights.best.basic_mlp.hdf5
Epoch 6/100

Epoch 00006: val_loss did not improve from 0.35273
Epoch 7/100

Epoch 00007: val_loss improved from 0.35273 to 0.30093, saving model to saved_models/weights.best.basic_mlp.hdf5
Epoch 8/100

Epoch 00008: val_loss did not improve from 0.30093
Epoch 9/100

Epoch 00009: val_loss did not improve from 0.30093
Epoch 10/100

Epoch 00010: val_loss did not improve from 0.30093
Epoch 

In [6]:
# Test the Model

# Evaluating the model on the training and testing set
score = model.evaluate(x_train, y_train, verbose=0)
print("Training Accuracy: ", score[1]*100, "%")

score = model.evaluate(x_test, y_test, verbose=0)
print("Testing Accuracy: ", score[1]*100, "%")

Training Accuracy:  100.0 %
Testing Accuracy:  94.66666579246521 %


In [15]:
# Test MFCC values of Longer Vs. Shorter Samples
# Creating a function that extracts the MFCC features of an audio file
def extract_features(file_name, max_pad_len):
    
    try:
        
        # Librosa extraction of audio array and sampling rate
        audio, sample_rate = librosa.load(file_name, res_type='kaiser_fast') # resampling at a "faster rate as opposed to higher quality"
        # MFCC feature extraction of audio - mfccs is mfcc sequence (array), n_mfcc is number of MFCCs to return
        mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)
        # If the number of frames is less than the max_pad_len, zero-pad up to max_pad_len
        pad_width = max_pad_len - mfccs.shape[1]
        mfccs = np.pad(mfccs, pad_width=((0, 0), (0, pad_width)), mode='constant')

    except Exception as e:
        print("Error encountered while parsing file ", file_name)
        return None
    
    return mfccs

In [27]:
def print_prediction(file_name, max_pad_length):
    prediction_feature = extract_features(file_name, max_pad_length)
    #print(prediction_feature)
    prediction_feature = prediction_feature.reshape(1, num_rows, num_columns, num_channels)

    predicted_vector = model.predict_classes(prediction_feature)
    predicted_class = le.inverse_transform(predicted_vector) 
    print("The predicted class is:", predicted_class[0], '\n') 

    predicted_proba_vector = model.predict_proba(prediction_feature) 
    predicted_proba = predicted_proba_vector[0]
    for i in range(len(predicted_proba)): 
        category = le.inverse_transform(np.array([i]))
        print(category[0], "\t\t : ", format(predicted_proba[i], '.32f') )

In [28]:
# Validation 
import os
import librosa

# Random dog bark file
cwd = os.getcwd()
filename = cwd + '//UrbanSound8K//audio//fold1//101415-3-0-2.wav'
print_prediction(filename, max_pad_length)

# Siren file
cwd = os.getcwd()
filename = cwd + '//UrbanSound8K//audio//fold3//184623-8-0-1.wav'
print_prediction(filename, max_pad_length)

The predicted class is: dog_bark 

car_horn 		 :  0.00000000000000000000000000000000
dog_bark 		 :  1.00000000000000000000000000000000
gun_shot 		 :  0.00000000000000000000000000000000
siren 		 :  0.00000000000000000000000000000000
The predicted class is: siren 

car_horn 		 :  0.00000000000000000000000000000000
dog_bark 		 :  0.00000000000000000314474051121837
gun_shot 		 :  0.00000000000000000000000000000000
siren 		 :  1.00000000000000000000000000000000
