## CNN Model for Audio Tagging

In [1]:
import numpy
import os
import librosa
import librosa.display
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from tensorflow.keras.models import load_model

In [2]:
# Initialize all path variables
dir_path = os.getcwd()
x_data = os.path.join(dir_path, "X")
y_data = os.path.join(dir_path, "Y")
mfcc_data = os.path.join(dir_path, "MFCC")

In [3]:
def getData(dirpath):
    X_train = numpy.load(os.path.join(dir_path, 'X_train.npy'))
    X_test = numpy.load(os.path.join(dir_path, 'X_test.npy'))
    y_train = numpy.load(os.path.join(dir_path, 'y_train.npy'))
    y_test = numpy.load(os.path.join(dir_path, 'y_test.npy'))
    return X_train, y_train, X_test, y_test

In [4]:
def getModel(X_train):

    pool_size = (2, 2)
    kernel_size = (3, 3)
    input_shape = (X_train.shape[1], X_train.shape[2], X_train.shape[3])
    num_classes = 10
#     '''CNN1: '''    
#     model = tf.keras.models.Sequential([
#     #first_convolution
#     tf.keras.layers.Conv2D(32, kernel_size,
#                 padding="same", input_shape=input_shape),
#     tf.keras.layers.BatchNormalization(),
#     tf.keras.layers.Activation('relu'),
#     tf.keras.layers.MaxPooling2D(2, 2),
#     # tf.keras.layers.Dropout(0.25),
#     #second_convolution
#     tf.keras.layers.Conv2D(128, kernel_size,
#                                   padding="same"),
#     tf.keras.layers.BatchNormalization(),
#     tf.keras.layers.Activation('relu'),
#     tf.keras.layers.MaxPooling2D(2,2),
# #     tf.keras.layers.Dropout(0.25),
#     #third_convolution
#     tf.keras.layers.Conv2D(128, kernel_size,
#                                   padding="same"),
#     tf.keras.layers.BatchNormalization(),
#     tf.keras.layers.Activation('relu'),
#     tf.keras.layers.MaxPooling2D(2,2),
#     # tf.keras.layers.Dropout(0.25),
#     #fifth_convolution
#     tf.keras.layers.Conv2D(256, kernel_size,
#                                   padding="same"),
#     tf.keras.layers.BatchNormalization(),
#     tf.keras.layers.Activation('relu'),
#     tf.keras.layers.GlobalMaxPooling2D(),
#     # tf.keras.layers.Dropout(0.25),
#     #Fully connected 1st layer
#     tf.keras.layers.Flatten(),
#     tf.keras.layers.Dense(128, activation="relu"),
#     tf.keras.layers.Dense(10, activation="sigmoid") 
# ]) 

    '''CNN2:'''
    model = tf.keras.models.Sequential()
    model.add(tf.keras.layers.Conv2D(128, kernel_size=(2, 2), activation='relu', input_shape=input_shape))
    model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))
    model.add(tf.keras.layers.Dropout(0.25))
    model.add(tf.keras.layers.Conv2D(128, kernel_size=(2, 2), activation = "relu"))
    model.add(tf.keras.layers.Conv2D(64, kernel_size=(2, 2), activation = "relu"))
    model.add(tf.keras.layers.Flatten())
    model.add(tf.keras.layers.Dense(128, activation='relu'))
    model.add(tf.keras.layers.Dropout(0.25))
    model.add(tf.keras.layers.Dense(10, activation='sigmoid'))

    model.compile(loss = 'binary_crossentropy', optimizer=tf.keras.optimizers.Adadelta(), metrics=['accuracy'])

    return model

In [5]:
X_train, y_train, X_test, y_test = getData(dir_path)

In [6]:
model = getModel(X_train)

In [7]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 19, 999, 128)      640       
                                                                 
 max_pooling2d (MaxPooling2D  (None, 9, 499, 128)      0         
 )                                                               
                                                                 
 dropout (Dropout)           (None, 9, 499, 128)       0         
                                                                 
 conv2d_1 (Conv2D)           (None, 8, 498, 128)       65664     
                                                                 
 conv2d_2 (Conv2D)           (None, 7, 497, 64)        32832     
                                                                 
 flatten (Flatten)           (None, 222656)            0         
                                                        

In [8]:
model.compile(optimizer = tf.keras.optimizers.Adam(1e-1), loss = 'binary_crossentropy', metrics = ['accuracy']) 

In [9]:
# To keep track of the best metrices obtained while training the model
from keras.callbacks import ModelCheckpoint
filepath = dir_path + 'my_best_model_cnn.hdf5'
checkpoint = ModelCheckpoint(filepath=filepath, monitor='accuracy', verbose=1, save_best_only=True, mode='max')

In [10]:
history=model.fit(X_train, y_train, validation_data=(X_test, y_test), batch_size=64, epochs=20, verbose = 1, callbacks=[checkpoint])

Epoch 1/20
Epoch 1: accuracy improved from -inf to 0.11300, saving model to C:\Users\Hp\OneDrive\Desktop\IITK\SEM5\EE603\Audio_Classification-MLSP_Assignment-2my_best_model_cnn.hdf5
Epoch 2/20
Epoch 2: accuracy did not improve from 0.11300
Epoch 3/20

KeyboardInterrupt: 

In [None]:
from matplotlib import pyplot as plt
model.metrics_names
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'val'], loc='upper left')
plt.show()
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'val'], loc='upper left')
plt.show()

In [None]:
print(model.predict(X_train[0]))
print(y_train[0])