In [1]:
# Model Training and Evaluation
# x_train -  train samples: 2185
# x_test - testing samples: 547
# categories train: 2185
# total samples

# Retrieve previously stored variables
%store -r x_train 
%store -r x_test 
%store -r y_train
%store -r y_test
%store -r yy
%store -r le

In [2]:
display(x_train[1])
print((y_train))
display((yy.shape[1]))
print(len(x_test))
print(x_train.shape)

array([-121.22656   ,   65.431366  ,  -32.428158  ,   34.480045  ,
        -20.285666  ,    6.766245  ,  -16.782026  ,   18.733065  ,
        -13.358098  ,   11.913208  ,  -13.08202   ,   14.028469  ,
        -17.593868  ,   13.480655  ,   -7.664562  ,    2.072071  ,
         -2.1378267 ,    3.0446467 ,   -3.9934742 ,   10.67374   ,
        -13.127217  ,   11.37405   ,    2.145628  ,   -1.9967248 ,
         -0.800105  ,    3.9523966 ,   -4.335793  ,    2.2346418 ,
         -2.9265692 ,    1.1134802 ,   -6.3341794 ,    0.1592586 ,
         -4.7829237 ,   -2.4864097 ,   -5.620346  ,    4.3450346 ,
         -9.871351  ,    2.3727033 ,   -0.61368215,    3.178931  ],
      dtype=float32)

[[0. 0. 0. 1.]
 [1. 0. 0. 0.]
 [0. 0. 0. 1.]
 ...
 [0. 0. 1. 0.]
 [0. 0. 0. 1.]
 [0. 0. 0. 1.]]


4

150
(1346, 40)


In [3]:
# Import libraries
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Convolution2D, MaxPooling2D
from keras.optimizers import Adam
from keras.utils import np_utils
from sklearn import metrics 

# Extract the number of labels - 4 in our case
num_labels = yy.shape[1]

# Begin constructing ML model
# Create an object of the Sequential class
model = Sequential()

# Create input layer using the Dense function
numNodes = 128; # Trial and error - 1/4 of 256 bit - went from 64 to 32 because size dropped from 2.7k to 1.4k
numMFCC = 40;
# Input shape is the size of the input array (1-D array of 40 columns, 1 row)
model.add(Dense(numNodes, input_shape=(numMFCC,)));
# Specifying the activation function to be used - relu: Rectified Linear Activiation
model.add(Activation('relu'))
# Dropout value of 50% - means random half of neurons exluded from each update cycle. Used to prevent overfitting.
model.add(Dropout(0.5))


# Add subsequest hidden layer - DENSE function
model.add(Dense(numNodes))
model.add(Activation('relu'))
model.add(Dropout(0.5))

# Add the output layer - DENSE function
# Output nodes is the different categories
# Different usage of activation function
model.add(Dense(num_labels))
model.add(Activation('softmax'))

Using TensorFlow backend.


In [4]:
# Compile the model
# Optimizer "adam" is a typical optimizer used - variation SGD (stochastic gradient descent)
# SGD utilizes the gradient of the loss function with respects to the weight
# loss -> typical loss function 
# metrics is output to be displayed (accuracy is the output of the loss function (?))
model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='adam')

In [5]:
# Display model architecture summary 
model.summary()

# Calculate pre-training accuracy 
# Verbose - displays info if desired (verbose = 0 means silent, just print accuracy value)
# evaluate returns loss value and score value
score = model.evaluate(x_test, y_test, verbose=0)
accuracy = 100*score[1]

# Accuracy - the metrics value evaluated based on loss function
print("Pre-training accuracy: %.4f%%" % accuracy)

display(score)

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 128)               5248      
_________________________________________________________________
activation_1 (Activation)    (None, 128)               0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 128)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 128)               16512     
_________________________________________________________________
activation_2 (Activation)    (None, 128)               0         
_________________________________________________________________
dropout_2 (Dropout)          (None, 128)               0         
_________________________________________________________________
dense_3 (Dense)              (None, 4)                

[18.44668312072754, 0.35333332419395447]

In [6]:
from keras.callbacks import ModelCheckpoint 
from datetime import datetime 

num_epochs = 100
num_batch_size = 8 # Arbitrarily chose the value 8

checkpointer = ModelCheckpoint(filepath='saved_models/weights.best.basic_mlp.hdf5', 
                               verbose=1, save_best_only=True)

start = datetime.now()

# Train the model for a fix number of epochs
# validation_data - data to evaluate the loss at the end of each epoch
# callbacks - display ModelCheckpoint
model.fit(x_train, y_train, batch_size=num_batch_size, epochs=num_epochs, validation_data=(x_test, y_test), callbacks=[checkpointer], verbose=1)


duration = datetime.now() - start
print("Training completed in time: ", duration)

Train on 1346 samples, validate on 150 samples
Epoch 1/100

Epoch 00001: val_loss improved from inf to 1.56267, saving model to saved_models/weights.best.basic_mlp.hdf5
Epoch 2/100

Epoch 00002: val_loss improved from 1.56267 to 0.87693, saving model to saved_models/weights.best.basic_mlp.hdf5
Epoch 3/100

Epoch 00003: val_loss improved from 0.87693 to 0.85727, saving model to saved_models/weights.best.basic_mlp.hdf5
Epoch 4/100

Epoch 00004: val_loss improved from 0.85727 to 0.84789, saving model to saved_models/weights.best.basic_mlp.hdf5
Epoch 5/100

Epoch 00005: val_loss did not improve from 0.84789
Epoch 6/100

Epoch 00006: val_loss improved from 0.84789 to 0.84105, saving model to saved_models/weights.best.basic_mlp.hdf5
Epoch 7/100

Epoch 00007: val_loss improved from 0.84105 to 0.77692, saving model to saved_models/weights.best.basic_mlp.hdf5
Epoch 8/100

Epoch 00008: val_loss did not improve from 0.77692
Epoch 9/100

Epoch 00009: val_loss improved from 0.77692 to 0.75997, savi

In [7]:
# Test the Model

# Evaluating the model on the training and testing set
score = model.evaluate(x_train, y_train, verbose=0)
print("Training Accuracy: ", score[1]*100, "%")

score = model.evaluate(x_test, y_test, verbose=0)
print("Testing Accuracy: ", score[1]*100, "%")

Training Accuracy:  96.73105478286743 %
Testing Accuracy:  88.66666555404663 %


In [8]:
import librosa
import numpy as np

# Creating a function that extracts the MFCC features of an audio file
def extract_feature(file_name):
   
    try:
        audio_data, sample_rate = librosa.load(file_name, res_type='kaiser_fast') 
        mfccs = librosa.feature.mfcc(y=audio_data, sr=sample_rate, n_mfcc=40)
        mfccsscaled = np.mean(mfccs.T,axis=0)
        
    except Exception as e:
        print("Error encountered while parsing file: ", file)
        return None, None

    return np.array([mfccsscaled])

In [9]:
# Creating a function that will 
def print_prediction(file_name):
    
    # MFCCs of the specifc file contained in prediction_feature
    prediction_feature = extract_feature(file_name)

    # Directly maps to output
    predicted_vector = model.predict_classes(prediction_feature)
    
    # Inverse transform is used to convert encoded LabelEncoder() values back to strings
    predicted_class = le.inverse_transform(predicted_vector) 
    print("The predicted class is:", predicted_class[0], '\n') 

    # Given new instance, model return probability (of belonging to each class) between 0 and 1 
    predicted_proba_vector = model.predict_proba(prediction_feature) 
    # Extract first array from array of arrays
    predicted_proba = predicted_proba_vector[0]
    
    for i in range(len(predicted_proba)): 
        category = le.inverse_transform(np.array([i]))
        print(category[0], "\t\t : ", format(predicted_proba[i], '.32f') )

In [10]:
# Validation 
import os

# Random dog bark file
cwd = os.getcwd()
filename = cwd + '//UrbanSound8K//audio//fold1//101415-3-0-2.wav'
print_prediction(filename)

# Siren file
cwd = os.getcwd()
filename = cwd + '//UrbanSound8K//audio//fold3//184623-8-0-1.wav'
print_prediction(filename)

The predicted class is: dog_bark 

car_horn 		 :  0.00000000001011129461409021601526
dog_bark 		 :  0.99999952316284179687500000000000
gun_shot 		 :  0.00000045024393102721660397946835
siren 		 :  0.00000000071334588236382501236221
The predicted class is: siren 

car_horn 		 :  0.00000000000008221627046701984742
dog_bark 		 :  0.00000000004464676892079744163766
gun_shot 		 :  0.00000000000000000069769081263304
siren 		 :  1.00000000000000000000000000000000
