## RNN Model for Audio Tagging

In [1]:
import numpy
import os
import librosa
import librosa.display
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from tensorflow.keras.models import load_model

In [2]:
# Initialize all path variables
dir_path = os.getcwd()
x_data = os.path.join(dir_path, "X")
y_data = os.path.join(dir_path, "Y")
mfcc_data = os.path.join(dir_path, "MFCC")

In [3]:
def getData(dirpath):
    X_train = numpy.load(os.path.join(dir_path, 'X_train.npy'))
    X_test = numpy.load(os.path.join(dir_path, 'X_test.npy'))
    y_train = numpy.load(os.path.join(dir_path, 'y_train.npy'))
    y_test = numpy.load(os.path.join(dir_path, 'y_test.npy'))
    return X_train, y_train, X_test, y_test

In [4]:
from keras.layers.recurrent import LSTM
from keras.callbacks import EarlyStopping

def getModel(X_train):
    model = Sequential()

    model.add(LSTM(units=128, recurrent_dropout=0.35, return_sequences=True, input_shape = [99,13]))
    model.add(Dropout(0.25))
    model.add(LSTM(units=64, recurrent_dropout=0.35, return_sequences=False))
    model.add(Dropout(0.25))
    model.add(Dense(64, activation = 'relu'))
    model.add(Dropout(0.25))
    model.add(Dense(units= y_train.shape[1], activation="softmax"))

    model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])

    return model

In [5]:
X_train, y_train, X_test, y_test = getData(dir_path)

In [6]:
X_test.shape

(3000, 20, 1000, 1)

In [7]:
model = getModel(X_train)

In [8]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 20, 1000, 32)      320       
                                                                 
 batch_normalization (BatchN  (None, 20, 1000, 32)     128       
 ormalization)                                                   
                                                                 
 activation (Activation)     (None, 20, 1000, 32)      0         
                                                                 
 max_pooling2d (MaxPooling2D  (None, 10, 500, 32)      0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 10, 500, 128)      36992     
                                                                 
 batch_normalization_1 (Batc  (None, 10, 500, 128)     5

In [12]:
model.compile(optimizer = tf.keras.optimizers.Adam(1e-2), loss = 'binary_crossentropy', metrics = ['accuracy']) 

In [13]:
# To keep track of the best metrices obtained while training the model
from keras.callbacks import ModelCheckpoint
filepath = dir_path + 'my_best_model_cnn.hdf5'
checkpoint = ModelCheckpoint(filepath=filepath, monitor='accuracy', verbose=1, save_best_only=True, mode='max')

In [None]:
history=model.fit(X_train, y_train, validation_data=(X_test, y_test), batch_size=32, epochs=20, verbose = 1, callbacks=[checkpoint])

Epoch 1/20
Epoch 1: accuracy improved from -inf to 0.26629, saving model to C:\Users\Hp\OneDrive\Desktop\IITK\SEM5\EE603\Audio_Classification-MLSP_Assignment-2my_best_model_cnn.hdf5
Epoch 2/20
Epoch 2: accuracy improved from 0.26629 to 0.29857, saving model to C:\Users\Hp\OneDrive\Desktop\IITK\SEM5\EE603\Audio_Classification-MLSP_Assignment-2my_best_model_cnn.hdf5
Epoch 3/20
Epoch 3: accuracy improved from 0.29857 to 0.30686, saving model to C:\Users\Hp\OneDrive\Desktop\IITK\SEM5\EE603\Audio_Classification-MLSP_Assignment-2my_best_model_cnn.hdf5
Epoch 4/20
Epoch 4: accuracy improved from 0.30686 to 0.32414, saving model to C:\Users\Hp\OneDrive\Desktop\IITK\SEM5\EE603\Audio_Classification-MLSP_Assignment-2my_best_model_cnn.hdf5
Epoch 5/20
Epoch 5: accuracy improved from 0.32414 to 0.33343, saving model to C:\Users\Hp\OneDrive\Desktop\IITK\SEM5\EE603\Audio_Classification-MLSP_Assignment-2my_best_model_cnn.hdf5
Epoch 6/20
Epoch 6: accuracy improved from 0.33343 to 0.34543, saving model to

In [None]:
from matplotlib import pyplot as plt
model.metrics_names
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'val'], loc='upper left')
plt.show()
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'val'], loc='upper left')
plt.show()