# env-sound-classify -2

## Part 2 - Training Our Deep Learning Model

First, start by uploading your x.npy and y.npy files into your Google Drive, and place it into a folder of your choice. You can choose to use your own local machine if you have a powerful GPU that can be used for training.



In [None]:
# Run this only if you need to use Google Drive

from google.colab import drive
drive.mount('/content/drive')


In [None]:
# Run the following code as it is

import numpy as np
import scipy.io.wavfile
import pandas as pd 
import os
from scipy.fftpack import dct
import matplotlib.pyplot as plt
from matplotlib import cm
from sklearn.model_selection import KFold
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
import seaborn as sns
from datetime import datetime
import cv2


import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten, Input
from tensorflow.keras.layers import Conv2D, MaxPooling2D, AveragePooling2D, GlobalAveragePooling2D, GlobalMaxPooling2D
from tensorflow.keras.layers import Conv1D, MaxPooling1D, GlobalAveragePooling1D
from tensorflow.keras.models import Model
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.optimizers import SGD

# Make sure that we are using the GPU version on Colab
#
print (tf.test.gpu_device_name())


Let's set the folder on Google Drive or your local computer to where the *.npy files are.


In [None]:
# Set the folder paths. If you are not using Google Drive,
# please change the path to point the correct folder containing
# your *.npy files.
#
npy_folder = '/content/drive/My Drive/Data/A1'     # on google drive


### Load Our Processed Data

Here, write all the necessary codes to load the .npy files that we processed in Part 1 of our work into memory. Once done, reshape the data so that it can be fed into our Keras models.

In [None]:
num_classes = 10
input_size_0 = 44
input_size_1 = 40

# TODO:
# Load the training and test data
# NOTE: If you see the allow_pickle error when using Google Drive, just ignore 
#       it, and wait for a few minutes or re-upload the files again. 
#
# ..................... CODES START HERE ..................... #

xmfcc_tr= np.load(npy_folder + "/x_mfcc_train.npy', encoding='bytes')
xmfcc_te= np.load(npy_folder + "/x_mfcc_test.npy', encoding='bytes')

xspec_tr= np.load(npy_folder + "/x_spec_train.npy', encoding='bytes')
xspec_te= np.load(npy_folder + "/x_spec_test.npy', encoding='bytes')
# ...................... CODES END HERE ...................... #


# TODO:
# Reshape the x_spec arrays to (n, w, h, 1) and
# reshape the x_mfcc arrays to (n, w, h, 1) and
def reshape_x(x):
    return x.reshape((x.shape[0], x.shape[1], x.shape[2], 1))

# ..................... CODES START HERE ..................... #
x_spec_train=reshape_x(xspec_tr)
x_spec_test=reshape_x(xspec_te)

x_mfcc_train=reshape_x(xmfcc_tr)
x_mfcc_test=reshape_x(xmfcc_te)

# ..................... CODES START HERE ..................... #

In [None]:
print (x_spec_train.shape)
print (x_spec_test.shape)
print (x_mfcc_train.shape)
print (x_mfcc_test.shape)

### Begin Training Our Different Convolutional Networks

Here are going to create 2 different Convolutional Neural Network models. One model will predict using the spectrogram as the input. The second model will predict using the MFCC as the input.

### CNN for Spectrogram 

Let's design the CNN model. For a start, you can use the following design:
1. 2D Convolutional Layer, n1 3x3 filters
2. 2D Max Pooling 2x2
3. 2D Convolutional Layer, n2 3x3 filters
4. 2D Max Pooling 2x2
5. 2D Convolutional Layer, n3 3x3 filters
6. 2D Max Pooling 2x2
7. Dropout (0.0 - 1.0)
8. 2D Global Max Pooling
9. Dense (10 classes)

NOTE: You are free to add or remove layers as long as the validation accuracy is reasonable. A reasonable model should yield about >75% validation accuracy.

In [None]:
# In this ConvNet model, we will have a few Convolutional layers.
#
# Go ahead a modify the network structure to try to improve performance.
#
def create_spec_model():

    # TODO:
    # Define your best sequential model here.
    #
    # ..................... CODES START HERE ..................... #
    model = Sequential()
    model.add(Conv2D(64, kernel_size=(3, 3), activation='relu', input_shape=(input_size_0, input_size_1, 1)))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Conv2D(128, kernel_size=(3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Conv2D(256, kernel_size=(3, 3), activation='relu'))model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.5))
    model.add(GlobalMaxPooling2D())
 
    model.add(Dense(num_classes, activation='softmax'))

    # ...................... CODES END HERE ...................... #
    

# Create the training folder
#
training_session_id = datetime.now().strftime("%Y-%m-%d %H-%M-%S")
training_session_folder = npy_folder + '/a1_spec_train_%s' % (training_session_id)
os.makedirs(training_session_folder, exist_ok=True)
print (training_session_folder)


# Create the model and compile it.
#
spec_model = create_spec_model()
spec_model.compile(loss=keras.losses.categorical_crossentropy,
              optimizer='adam',
              metrics=['accuracy'])


# Configure the checkpoint and stop point.
# This allows the training to save the best models and also stop the
# training early if it detects that there are no improvements after
# a long time.
#
callbacks_list = [
    keras.callbacks.ModelCheckpoint(
        filepath=training_session_folder + '/a1_spec_model.{epoch:04d}-acc-{acc:4.2f}-val_acc-{val_acc:4.2f}-loss-{val_loss:4.2f}.h5',
        monitor='val_loss', save_best_only=True),
    keras.callbacks.EarlyStopping(monitor='val_loss', patience=100)
]

# Start training!
#
history_spec = spec_model.fit(x_spec_train, y_train, epochs=500, verbose=True, validation_data=(x_spec_test, y_test), callbacks=callbacks_list, batch_size=10)

    

### CNN for MFCC 

Let's design the CNN model. For a start, you can use the following design:
1. 2D Convolutional Layer, n1 3x3 filters
2. 2D Max Pooling 2x2
3. 2D Convolutional Layer, n2 3x3 filters
4. 2D Max Pooling 2x2
5. 2D Convolutional Layer, n3 3x3 filters
6. 2D Max Pooling 2x2
7. Dropout (0.0 - 1.0)
8. 2D Global Max Pooling
9. Dense (10 classes)

NOTE: You are free to add or remove layers as long as the validation accuracy is reasonable. A reasonable model should yield >75% validation accuracy.

In [None]:
def create_mfcc_model():
    # TODO:
    # Design your best sequential model for MFCC here
    #
    # ..................... CODES START HERE ..................... #
    model = Sequential()
    model.add(Conv2D(64, kernel_size=(3, 3), activation='relu', input_shape=(input_size_0, input_size_1, 1)))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Conv2D(128, kernel_size=(3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Conv2D(256, kernel_size=(3, 3), activation='relu'))model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.5))
    model.add(GlobalMaxPooling2D())
 
    model.add(Dense(num_classes, activation='softmax'))

    return model

    # ...................... CODES END HERE ...................... #


# Create the training folder
#
training_session_id = datetime.now().strftime("%Y-%m-%d %H-%M-%S")
training_session_folder = npy_folder + '/a1_mfcc_train_%s' % (training_session_id)
os.makedirs(training_session_folder, exist_ok=True)
print (training_session_folder)


# Create the model and compile it.
#
mfcc_model = create_mfcc_model()
mfcc_model.compile(loss=keras.losses.categorical_crossentropy,
              optimizer='adam',
              metrics=['accuracy'])


# Configure the checkpoint and stop point.
# This allows the training to save the best models and also stop the
# training early if it detects that there are no improvements after
# a long time.
#
callbacks_list = [
    keras.callbacks.ModelCheckpoint(
        filepath=training_session_folder + '/a1_mfcc_model.{epoch:04d}-acc-{acc:4.2f}-val_acc-{val_acc:4.2f}-loss-{val_loss:4.2f}.h5',
        monitor='val_loss', save_best_only=True),
    keras.callbacks.EarlyStopping(monitor='val_loss', patience=100)
]

history_mfcc = mfcc_model.fit(x_mfcc_train, y_train, epochs=500, verbose=True, validation_data=(x_mfcc_test, y_test), callbacks=callbacks_list, batch_size=10)

### Evaluate Our Model

Let's take a look at your training process and see how well our model has performed on the both training and test set.

In [None]:
# Let's first initialize the labels for visualization
#
labels = ["chainsaw", "clock_tick", "crackling_fire", "crying_baby", "dog", "helicopter", "rain", "rooster", "sea_waves", "sneezing"]


Let's load up our best model to evaluate its performance.

In [None]:
# TODO:
# Change the name of the best performing h5 file to load it up.
#
mfcc_model = keras.models.load_model(npy_folder + "/.../a1_mfcc_model....h5")
spec_model = keras.models.load_model(npy_folder + "/.../a1_spec_model....h5")

Then, run the following code, as is, to evaluate the full performance of your training and test data.


In [None]:
# Run this only if you need to use Google Drive

#------------------------------------------------------------------------------------------

import matplotlib.pyplot as plt

# This function is used to display the history the train/test accuracy/loss
# of the Keras training.
#
#   history - Pass in the history returned from the model.fit(...) method.
#
def display_training_loss_and_accuracy(history):
    
    plt.figure(figsize=(20,4))
    
    # summarize history for accuracy
    plt.subplot(1, 2, 1)
    plt.plot(history.history['acc'])
    plt.plot(history.history['val_acc'])
    plt.title('model accuracy')
    plt.ylabel('accuracy')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='upper left')
    
    # summarize history for loss
    plt.subplot(1, 2, 2)
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title('model loss')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='upper right')
    plt.show()    

print ("Training #1 - Spectrogram model")
display_training_loss_and_accuracy(history_spec)
print ("Training #2 - MFCC model")
display_training_loss_and_accuracy(history_mfcc)


#------------------------------------------------------------------------------------------

import numpy as np
import pandas as pd
from sklearn.metrics import confusion_matrix, classification_report
import seaborn as sns
import matplotlib.pyplot as plt
from functools import reduce

def plot_confusion_matrix(actual_y, y, plot_title, plot_nrows, plot_ncols, plot_index, labels, mask=None):
    y_index = y.argmax(axis=1)
    actual_y_index = actual_y.argmax(axis=1)

    labels = np.array(labels)
    if mask is not None:
        labels_masked = labels[mask]
    else:
        labels_masked = labels
    
    # Print the first Confusion Matrix for the training data
    #
    cm = confusion_matrix(y_index, actual_y_index)
    if mask is not None:
        cm = cm[:, mask][mask, :]

    cm_df = pd.DataFrame(cm, labels_masked, labels_masked)          
    plt.subplot(plot_nrows, plot_ncols, plot_index)
    plt.title(plot_title)
    sns.heatmap(cm_df, annot=True)
    plt.ylabel('Actual')
    plt.xlabel('Predicted')   

 
def display_classification_confusion_matrix(keras_model_list, x_train_list, y_train, x_test_list, y_test, labels, mask=None):

    # Get predictions for all models
    actual_y_train_list = [model.predict(x_train_list[i]) for i,model in enumerate(keras_model_list)]

    # Ensemble by averaging the prediction from all models
    actual_y_train = reduce(lambda a, b: a + b, actual_y_train_list) / len(actual_y_train_list)

    # Get predictions for all models
    actual_y_test_list = [model.predict(x_test_list[i]) for i,model in enumerate(keras_model_list)]

    # Ensemble by averaging the prediction from all models
    actual_y_test = reduce(lambda a, b: a + b, actual_y_test_list) / len(actual_y_test_list)

     

    for i in range(0, len(keras_model_list)):
        print ("Model %d" % (i))
        plt.figure(figsize=(20,5)) 
        plot_confusion_matrix(actual_y_train_list[i], y_train, "Model %d (Train)" % (i), 1, 2, 1, labels, mask)
        plot_confusion_matrix(actual_y_test_list[i], y_test, "Model %d (Test)" % (i), 1, 2, 2, labels, mask)
        plt.show()   

        print ("Train Data:")
        print(classification_report(actual_y_train_list[i].argmax(axis = 1), y_train.argmax(axis = 1), target_names=labels))
        print ("Test Data:")
        print(classification_report(actual_y_test_list[i].argmax(axis = 1), y_test.argmax(axis = 1), target_names=labels))
        print ("--------------------------------------------------------------------------------------------------------------")

    print ("Ensemble")
    plt.figure(figsize=(20,5)) 
    plot_confusion_matrix(actual_y_train, y_train, "Ensemble (Train)", 1, 2, 1, labels, mask)
    plot_confusion_matrix(actual_y_test, y_test, "Ensemble (Test)", 1, 2, 2, labels, mask)
    plt.show()   

    print ("Train Data:")
    print(classification_report(actual_y_train.argmax(axis = 1), y_train.argmax(axis = 1), target_names=labels))
    print ("Test Data:")
    print(classification_report(actual_y_test.argmax(axis = 1), y_test.argmax(axis = 1), target_names=labels))

    

# Display confusion matrix for all models
#
display_classification_confusion_matrix([spec_model, mfcc_model], [x_spec_train, x_mfcc_train], y_train, [x_spec_test, x_mfcc_test], y_test, labels)

Ensure that the ensemble results perform better than the individual MFCC and Spectral models.

Proceed to download both of your best .h5 models and incorporate it into Part 3 of the assignment.

