# Q4 - Evaluation on the GTZAN Dataset

## Import Libraries

From the imports, we also import the utils directory as a module which contains both the source code for the model and for Warm Restarts and Cosine Annealing

In [None]:
import sys
import os
import time
import h5py
import tqdm
import pickle
from pathlib import Path
from IPython.display import clear_output

# in jupyter (lab / notebook), based on notebook path
module_path = str(Path.cwd().parents[0])

if module_path not in sys.path:
    sys.path.append(module_path)

from utils.SwishNet import *
from utils.SGDRScheduler import *

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

import tensorflow as tf
import tensorflow.keras.backend as K

from tensorflow.keras.optimizers import Adam
from tensorflow.keras import Model
from tensorflow.keras.callbacks import Callback, ModelCheckpoint, EarlyStopping

In [None]:
tf.__version__

In [None]:
CLIP_LEN = 2

# File path to dataset

root_path = '../data/gtzan_dataset_{}s.h5'.format(CLIP_LEN)

## Compile Datasets

25% of the GTZAN corpus is used for the training set and the remaining 75% is used for evaluation

In [None]:
%%time

# First produce a list of the file names to create splits

with open('../data/gtzan_f_list_{}s.txt'.format(CLIP_LEN), 'rb') as fp:
    f_list = pickle.load(fp)

In [None]:
idx_list = list(range(len(f_list)))

train_idx, test_idx = train_test_split(idx_list, test_size = 0.75, shuffle = True, random_state = 2021)

In [None]:
# A simple generator class in order to build the necessary datasets for training

class data_gen:
    def __init__(self, file_list, idx_list, data_path):
        self.file_list = file_list
        self.idx_list = idx_list
        self.data_path = data_path
        
    def chunker(self, lst, n, shuffle):
        list_ = lst
        if shuffle:
            np.random.shuffle(list_)

        chunks = [list_[i:i + n] for i in range(0, len(lst), n)]

        return chunks
    
    def gen(self, batch_size = 100):

        idx = self.idx_list

        batches = self.chunker(idx, batch_size, shuffle = True)

        with h5py.File(self.data_path, mode = 'r') as db:

            for batch_indexes in batches:

                batch_features = np.array([db[self.file_list[i]]['mfcc'][()] for i in batch_indexes])
                batch_labels = np.array([db[self.file_list[i]]['label'][()] for i in batch_indexes])

                yield [batch_features, batch_labels]
                    
    def build_dataset(self, chunk_size = 100):
        st = time.time()
        gen_ = self.gen(batch_size = chunk_size)
        X = []
        Y = []
        for i, (x, y) in enumerate(gen_):
            clear_output(wait = True)
            print('Batch {} / {} read'.format(str(i + 1), str(len(self.idx_list) // chunk_size)))
            X.append(x)
            Y.append(y)
        
        print("Dataset built, now converting to numpy array")
        X = np.vstack(X)
        Y = np.hstack(Y)
        et = time.time()
        
        print("Took {}s to build dataset".format(str(et - st)))

        return X, Y

In [None]:
train_gen = data_gen(f_list, train_idx, data_path = root_path)
X_train, y_train = train_gen.build_dataset()

In [None]:
print(X_train.shape)
print(y_train.shape)

input_shape = (X_train.shape[1], X_train.shape[2])

In [None]:
test_gen = data_gen(f_list, test_idx, data_path = root_path)
X_test, y_test = test_gen.build_dataset()

In [None]:
print(X_test.shape)
print(y_test.shape)

## Fine-tuning on the Train Set

The trained model is now fine-tuned on the train set with a low learning rate for 50 epochs. Note we want to avoid the accuracy during training from converging to 1 early in training

In [None]:
initial_lr = 5e-4

net = SwishNet(input_shape=input_shape, classes=3)
net.load_weights("../model_params/model_{}s.h5".format(CLIP_LEN), by_name = True)
net.compile(loss='sparse_categorical_crossentropy',
              optimizer= Adam(learning_rate = initial_lr), 
              metrics=['accuracy'])
net.summary()

In [None]:
BATCH_SIZE = 128

history = net.fit(X_train, 
                  y_train, 
                  epochs=50, 
                  batch_size = BATCH_SIZE, 
                  verbose=1, 
                  shuffle = True,
                  max_queue_size = 5, 
                  workers = 2, 
                  use_multiprocessing = True)

In [None]:
plt.plot(history.history['loss'], label = "Training Loss")
plt.title("Training Loss")
plt.savefig("../docs/plots/gtzan_loss_{}s.png".format(CLIP_LEN), dpi = 1000)

In [None]:
plt.plot(history.history['acc'], label = "Training Acc")
plt.title("Training Accuracy")
plt.savefig("../docs/plots/gtzan_acc_{}s.png".format(CLIP_LEN), dpi = 1000)

## Evaluation

For the purpose of this task we must produce metrics of Accuracy, Class-wise Recall, Average F1. This can be conveniently done using the classification_report method from the scikit-learn library

In [None]:
test_loss, test_acc = net.evaluate(X_test, y_test, verbose = 1)

In [None]:
test_acc

In [None]:
preds = net.predict(X_test, verbose = 1)

In [None]:
catg = ['noise', 'music', 'speech']
label2idx = {x : i for i, x in enumerate(catg)}
idx2label = {i : x for i, x in enumerate(catg)}

In [None]:
y_labels = [idx2label[i] for i in y_test]
pred_labels = [idx2label[np.argmax(i)] for i in preds]

In [None]:
print(classification_report(y_labels, pred_labels, target_names=catg[1:]))