# Genre Classifier
## This is the code for the genre classification model. It classifies samples of songs in the GTZAN dataset into 10 different genres using a neural network.

In [5]:
import json
import numpy as np
from sklearn.model_selection import train_test_split
import tensorflow.keras as keras
from tensorflow.keras.layers import LSTM, Dropout, Dense

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
DATA_PATH = "/content/drive/MyDrive/Colab Notebooks/data_10.json"

Created the X and y variables for the dataset, which include the mfcc features and their labels. For example, mfcc features for one song sample would be labelled 'Blues'.

In [6]:
with open(DATA_PATH, "r") as fp:
    data = json.load(fp)
    # convert lists to numpy arrays
    X = np.array(data["mfcc"])
    y = np.array(data["labels"])

Split data into train and test set

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.35)

Building the neural network using a LSTM input layer. The hidden layer has a default rectified linear unit activation function. Output layer of 10 neurons for each genre in the dataset with a softmax activation function. Dropout layers were placed after the input layer and last 2 hidden layers to reduce overfitting.

In [8]:
# build network topology
input_shape=(X.shape[1],X.shape[2])
model = keras.Sequential()
model.add(LSTM(128,input_shape=input_shape))
model.add(Dropout(0.2))
model.add(Dense(128, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.4))
model.add(Dense(48, activation='relu'))
model.add(Dropout(0.4))
model.add(Dense(10, activation='softmax'))
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 128)               72704     
                                                                 
 dropout (Dropout)           (None, 128)               0         
                                                                 
 dense (Dense)               (None, 128)               16512     
                                                                 
 dense_1 (Dense)             (None, 64)                8256      
                                                                 
 dropout_1 (Dropout)         (None, 64)                0         
                                                                 
 dense_2 (Dense)             (None, 48)                3120      
                                                                 
 dropout_2 (Dropout)         (None, 48)                0

Used adam optimization algorithm along with sparse categorical cross entropy loss function.

In [9]:
optimiser = keras.optimizers.Adam(learning_rate=0.0001)
model.compile(optimizer=optimiser, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 128)               72704     
                                                                 
 dropout (Dropout)           (None, 128)               0         
                                                                 
 dense (Dense)               (None, 128)               16512     
                                                                 
 dense_1 (Dense)             (None, 64)                8256      
                                                                 
 dropout_1 (Dropout)         (None, 64)                0         
                                                                 
 dense_2 (Dense)             (None, 48)                3120      
                                                                 
 dropout_2 (Dropout)         (None, 48)                0

Fit the model to the training set, and validated on the test set. The overall accuracy after 50 epochs was 72.40% with validation accuracy of 66%. There is slight overfitting however the model seems fine to present at this point.

In [12]:
history = model.fit(X_train, y_train, validation_data=(X_test, y_test), batch_size=32, epochs=50)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


Make predictions using the model and print out the confusion matrix and classification report results

In [13]:
y_pred = model.predict(X_test)

In [26]:
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

print(confusion_matrix(y_test,np.argmax(y_pred,axis=1)))
print(classification_report(y_test,np.argmax(y_pred,axis=1)))

[[276   0  17   0  16   1   7  21  13   2]
 [  0 312   7  18   4   0  15   9   1   1]
 [ 15  12 190  16  18   4  28  43  19   0]
 [  0  11  15 223  12   1  11  14  35  23]
 [  2   2  27  18 223   0   8  40  18   6]
 [  0   0   0   2   0 335   6   0   5  18]
 [  7  17  59  43  19   1 110   5  71   9]
 [ 17  16  32   6  51   0  13 195   7   3]
 [  6   3  20  32  30   1  25   3 197  27]
 [  0   0   3  29  12  25   3   0  43 236]]
              precision    recall  f1-score   support

           0       0.85      0.78      0.82       353
           1       0.84      0.85      0.84       367
           2       0.51      0.55      0.53       345
           3       0.58      0.65      0.61       345
           4       0.58      0.65      0.61       344
           5       0.91      0.92      0.91       366
           6       0.49      0.32      0.39       341
           7       0.59      0.57      0.58       340
           8       0.48      0.57      0.52       344
           9       0.73     

Manipulating the confusion matrix to print out average sensitivity and specificity scores for the model.

In [51]:
cnf_matrix = confusion_matrix(y_test,np.argmax(y_pred,axis=1))

FP = cnf_matrix.sum(axis=1) - np.diag(cnf_matrix) 
FN = cnf_matrix.sum(axis=0) - np.diag(cnf_matrix)
TP = np.diag(cnf_matrix)
TN = cnf_matrix.sum(axis=1) + cnf_matrix.sum(axis=0) - (FP + FN + TP)

In [52]:
FP = FP.astype(float)
FN = FN.astype(float)
TP = TP.astype(float)
TN = TN.astype(float)
# Sensitivity, hit rate, recall, or true positive rate
TPR = TP/(TP+FN)
# Specificity or true negative rate
TNR = TN/(TN+FP) 
# Precision or positive predictive value
PPV = TP/(TP+FP)
# Negative predictive value
NPV = TN/(TN+FN)
# Fall out or false positive rate
FPR = FP/(FP+TN)
# False negative rate
FNR = FN/(TP+FN)
# False discovery rate
FDR = FP/(TP+FP)
# Overall accuracy for each class
ACC = (TP+TN)/(TP+FP+FN+TN)

In [57]:
print(TPR.mean(),TNR.mean())

0.6555689252430534 0.6533812885739544
