In [None]:
import numpy as np
import json
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Dropout, Conv2D, MaxPooling2D, Flatten, BatchNormalization, Bidirectional, LSTM, Reshape
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [None]:
#Get data and split into train, test, validate
#Data was processed as in https://github.com/crlandsc/Music-Genre-Classification-Using-Convolutional-Neural-Networks/blob/main/code/01_data_processing.ipynb
filepath = '/content/drive/MyDrive/QTM_347_Proj/Data/data.json'
with open(filepath, "r") as fp:
    data = json.load(fp)
X = np.array(data["mfcc"])
y = np.array(data["genre_num"])
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.3, random_state=42, stratify=y_train)

print(f"X training data shape: {X_train.shape}, y training data shape: {y_train.shape}")
print(f"X validation data shape: {X_val.shape}, y validation data shape: {y_val.shape}")

In [None]:
#Format data for CNN model
X_train_cnn = X_train[..., np.newaxis]
X_val_cnn = X_val[..., np.newaxis]
X_test_cnn = X_test[..., np.newaxis]

input_shape = X_train_cnn.shape[1:4]

In [None]:
#Build CNN Model 
#Credit: https://github.com/crlandsc/Music-Genre-Classification-Using-Convolutional-Neural-Networks/blob/main/code/02_modeling.ipynb
model_cnn = Sequential()

model_cnn.add(Conv2D(32, 3, activation='relu', input_shape=input_shape)) # first hidden conv layer
model_cnn.add(BatchNormalization())
model_cnn.add(MaxPooling2D(3, strides=(2,2), padding='same')) # MaxPool the results
model_cnn.add(Dropout(0.2))

model_cnn.add(Conv2D(64, 3, activation='relu'))
model_cnn.add(BatchNormalization())
model_cnn.add(MaxPooling2D(3, strides=(2,2), padding='same'))
model_cnn.add(Dropout(0.1))

model_cnn.add(Conv2D(64, 2, activation='relu'))
model_cnn.add(BatchNormalization())
model_cnn.add(MaxPooling2D(2, strides=(2,2), padding='same'))
model_cnn.add(Dropout(0.1))

model_cnn.add(Flatten())
model_cnn.add(Dense(128, activation='relu'))
model_cnn.add(Dropout(0.5))

model_cnn.add(Dense(10, activation='softmax'))

model_cnn.summary()

model_cnn.compile(
    optimizer=Adam(learning_rate=0.0001), # can also use 'adam'
    loss='sparse_categorical_crossentropy', # loss for multi-class classification
    metrics=['acc']
)

In [None]:
#Train CNN model
datagen = ImageDataGenerator(vertical_flip=True)
es_cnn = EarlyStopping(monitor='val_loss', patience=20, min_delta=0)
hist_cnn = model_cnn.fit(
    datagen.flow(X_train_cnn, y_train),
    validation_data=(X_val_cnn, y_val),
    batch_size=64,
    epochs=400,
    verbose=1,
    callbacks=[es_cnn]
)

In [None]:
#Compute test accuracy and loss
loss_cnn, acc_cnn = model_cnn.evaluate(X_test_cnn, y_test)
print(f"Test Loss: {loss_cnn}")
print(f"Test Accuracy: {acc_cnn}")

In [None]:
def plot_history(hist):
    """
    Plots the accuracy and loss for a model over the course of all epochs

    Parameters:
        hist (keras history object): The recorded history of model.fit() to be plotted
    
    Credit: https://github.com/crlandsc/Music-Genre-Classification-Using-Convolutional-Neural-Networks/blob/main/code/02_modeling.ipynb
    """
    fig, axs = plt.subplots(2, 1, figsize=(8,7))
    fig.tight_layout(pad=2)

    # Accuracy subplot
    axs[0].plot(hist.history["acc"], c='navy', label="Training Accuracy")
    axs[0].plot(hist.history["val_acc"], c='orange', label="Validation Accuracy")
    axs[0].set_ylabel("Accuracy")
    axs[0].legend(loc="lower right")
    axs[0].set_title("Accuracy")

    # Error subplot
    axs[1].plot(hist.history["loss"], c='navy', label="Training Loss")
    axs[1].plot(hist.history["val_loss"], c='orange', label="Validation Loss")
    axs[1].set_ylabel("Loss")
    axs[1].set_xlabel("Epochs")
    axs[1].legend(loc="upper right")
    axs[1].set_title("Loss")

    plt.show()

In [None]:
#Plot history of training
plot_history(hist_cnn)

In [None]:
#Save CNN model
model_cnn.save('/content/drive/MyDrive/QTM_347_Proj/Model/model_cnn.h5')

In [None]:
#Prepare CNN model for LSTM addition
for layer in model_cnn.layers:
    layer.trainable = False

model_cnn_lstm = Model(inputs=model_cnn.inputs, outputs=model_cnn.layers[-5].output)
layers_to_unfreeze = ['conv2d_8', 'batch_normalization_8', 'max_pooling2d_8', 'dropout_10']

for layer in model_cnn_lstm.layers:
    layer.trainable = layer.name in layers_to_unfreeze

In [None]:
#Build CNN-LSTM Model
x = model_cnn_lstm.output
x = Reshape((-1, 64))(x)

x = Bidirectional(LSTM(256, return_sequences=True))(x)
x = BatchNormalization()(x)

x = Bidirectional(LSTM(128, return_sequences=True))(x)
x = BatchNormalization()(x)

x = Bidirectional(LSTM(64))(x)
x = BatchNormalization()(x)

x = Dense(256, activation='relu')(x)
x = BatchNormalization()(x)
x = Dense(128, activation='relu')(x)
x = BatchNormalization()(x)
outputs = Dense(10, activation='softmax')(x)

model_cnn_lstm = Model(inputs=model_cnn_lstm.inputs, outputs=outputs)
model_cnn_lstm.compile(optimizer=Adam(learning_rate=0.0001),
                  loss='sparse_categorical_crossentropy',
                  metrics=['acc'])

model_cnn_lstm.summary()

In [None]:
#Train CNN-LSTM Model
datagen = ImageDataGenerator(vertical_flip=True)
es_cnn_lstm = EarlyStopping(monitor='val_loss', patience=20, min_delta=0)
hist_cnn_lstm = model_cnn_lstm.fit(
    datagen.flow(X_train_cnn, y_train),
    validation_data=(X_val_cnn, y_val),
    batch_size=64,
    epochs=400,
    verbose=1,
    callbacks=[es_cnn_lstm]
)

In [None]:
#Comput test accuracy and loss
loss_cnn_lstm, acc_cnn_lstm = model_cnn_lstm.evaluate(X_test_cnn, y_test)
print(f"Test Loss: {loss_cnn_lstm}")
print(f"Test Accuracy: {acc_cnn_lstm}")

In [None]:
#Plot history of CNN-LSTM training
plot_history(hist_cnn_lstm)

In [None]:
#Save CNN-LSTM model
model_cnn_lstm.save('/content/drive/MyDrive/QTM_347_Proj/Model/model_cnn_lstm.h5')

In [None]:
#Get Confusion matrices
def make_prediction(model, X):
    """
    Makes predictions given a keras model and a set of data.

    Parameters:
        model (keras model): The model to use to make predictions
        X (np.sdarray): Testing data to be fed into the model to make predictions with.

    Returns:
        preds_num (list): List of genre predictions (number format, 0-9)
        preds_name (list): List of genre predictions (str format)
    
    Credit: https://github.com/crlandsc/Music-Genre-Classification-Using-Convolutional-Neural-Networks/blob/main/code/02_modeling.ipynb
    """
    genres = ['blues', 'classical', 'country', 'disco', 'hiphop', 'jazz', 'metal', 'pop', 'reggae', 'rock']
    preds_num = []
    preds_name = []

    for X_current in X:
        X_current = X_current[np.newaxis, ...]
        pred = model.predict(X_current)
        pred = np.argmax(pred, axis=1)
        preds_num.append(pred[0])
        preds_name.append(genres[pred[0]])
    return preds_num, preds_name

preds_num_cnn, preds_name_cnn = make_prediction(model_cnn, X_test_cnn)
preds_num_cnn_lstm, preds_name_cnn_lstm = make_prediction(model_cnn_lstm, X_test_cnn)

def plot_confusion_matrix(y_test, preds_num):
    """
    Creates confusion matrix of predicted and true labels.

    Parameters:
        y_test: True output
        preds_num: Predicted output.

    Returns:
        Confustion matrix plot
    """
    cm = confusion_matrix(y_test, preds_num)
    genres = ['blues', 'classical', 'country', 'disco', 'hiphop', 'jazz', 'metal', 'pop', 'reggae', 'rock']
    fig, ax = plt.subplots(figsize=(8,8))
    disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=genres)
    disp.plot(ax=ax, cmap = 'Purples', xticks_rotation='vertical')
    plt.title('Genre Classification (3 second clips)');

plot_confusion_matrix(y_test, preds_num_cnn)
plot_confusion_matrix(y_test, preds_num_cnn_lstm)