# Begin model training

## Load libraries

In [1]:
import pandas as pd
import numpy as np
from collections import Counter
import os

## Setup Colab

In [2]:
COLAB = 'google.colab' in str(get_ipython())

if COLAB:
    from google.colab import drive
    drive.mount('/content/drive')
    ROOT = '/content/drive/MyDrive/fyp/collabs/'
else:
    ROOT = os.path.join(os.getcwd(), '..', '..') 

## Load Colab libraries

In [3]:
if COLAB:
    !pip install pickle5
    import pickle5 as pickle

## Load custom modules

In [4]:
from nnssa.constants import *
from nnssa.evaluate import evaluate

## Set random seed for reproducability

In [5]:
SEED = 42
np.random.seed(SEED)

## Load TensorFlow modules

In [6]:
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dropout, Dense, BatchNormalization, Input
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

In [7]:
def get_metrics():
    return [
        tf.keras.metrics.BinaryAccuracy(name='accuracy', threshold=0.15),
        tf.keras.metrics.Precision(name='precision'),
        tf.keras.metrics.Recall(name='recall'),
        tf.keras.metrics.AUC(name='auc')
    ]

## Enable TPU

In [8]:
if ('COLAB_TPU_ADDR' in os.environ.keys()):
    resolver = tf.distribute.cluster_resolver.TPUClusterResolver(tpu='grpc://' + os.environ['COLAB_TPU_ADDR'])
    tf.config.experimental_connect_to_host(resolver.master())
    tf.tpu.experimental.initialize_tpu_system(resolver)
    strategy = tf.distribute.experimental.TPUStrategy(resolver)

## Progress bars!

In [9]:
from tqdm import tqdm
tqdm.pandas()

  from pandas import Panel


## Load dataset

In [10]:
if COLAB:
    harmonix_beats = pickle.load(open(os.path.join(ROOT, SUB_DIVS_DIR, 'bars', 'harmonix.p'), 'rb'))
else:
    harmonix_beats = pd.read_pickle(os.path.join(ROOT, SUB_DIVS_DIR, 'bars', 'harmonix.p'))
harmonix_beats = harmonix_beats.head(100)
harmonix_beats.head()

Unnamed: 0,File,Sub_Divisions,Binary_Labels,Weighted_Labels,Weights,IDS,Beat_times,Labels,BPM
0,0001_12step,bars/harmonix/0001_12step.npy,"[1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, ...","[1.0, 1.0, 0.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, ...","[3.0, 0.5, 1.0, 0.5, 3.0, 0.5, 1.0, 1.0, 1.0, ...","[0001_12step, 0001_12step, 0001_12step, 0001_1...","[0.0, 0.5309729999999999, 1.0619459999999998, ...","[0.0, 8.495567999999999, 25.486704, 42.4753280...",113
1,0003_6foot7foot,bars/harmonix/0003_6foot7foot.npy,"[1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[3.0, 0.5, 3.0, 0.5, 1.0, 1.0, 1.0, 1.0, 1.0, ...","[0003_6foot7foot, 0003_6foot7foot, 0003_6foot7...","[2.857108, 3.571394, 4.28568, 4.99996600000000...","[2.857108, 8.571396, 31.428548, 37.14283599999...",84
2,0004_abc,bars/harmonix/0004_abc.npy,"[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[3.0, 0.5, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...","[0004_abc, 0004_abc, 0004_abc, 0004_abc, 0004_...","[2.666656, 3.238084, 3.952369, 4.597529, 5.242...","[2.666656, 28.300542999999998, 58.263180000000...",94
3,0006_aint2proud2beg,bars/harmonix/0006_aint2proud2beg.npy,"[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[3.0, 0.5, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...","[0006_aint2proud2beg, 0006_aint2proud2beg, 000...","[0.0, 0.572203, 1.144406, 1.716609, 2.288812, ...","[0.0, 27.4652, 45.203726, 63.518522999999995, ...",105
4,0008_america,bars/harmonix/0008_america.npy,"[1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, ...","[1.0, 1.0, 0.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, ...","[3.0, 0.5, 1.0, 0.5, 3.0, 0.5, 1.0, 1.0, 1.0, ...","[0008_america, 0008_america, 0008_america, 000...","[3.871208, 4.359011, 4.846814, 5.338616, 5.830...","[3.871208, 10.56504, 33.217138, 56.85190400000...",136


## Load Mel Spectrograms

In [11]:
def load_np(file):
    return np.load(os.path.join(ROOT, SUB_DIVS_DIR, file), allow_pickle=True)

In [12]:
harmonix_beats['Sub_Divisions'] = harmonix_beats['Sub_Divisions'].progress_map(load_np)

100%|██████████| 100/100 [00:01<00:00, 65.43it/s]


In [13]:
# harmonix_beats['Oversamples'] = harmonix_beats['Oversamples'].progress_map(load_np)

In [14]:
X = harmonix_beats.head(50).copy()
y = harmonix_beats.head(50)['Binary_Labels']

## Splits

In [15]:
X_train, X_test, _, _ = train_test_split(X, y, test_size=0.2, random_state=42)
X_train, X_val, _, _ = train_test_split(X_train, X_train['Binary_Labels'], test_size=0.2, random_state=42)

In [16]:
# y_train = np.concatenate(X_train['Over_Labels'].values)
y_train = np.concatenate(X_train['Binary_Labels'].values)
y_test = np.concatenate(X_test['Binary_Labels'].values)
y_val = np.concatenate(X_val['Binary_Labels'].values)

ids_test = np.concatenate(X_test['IDS'].values)

# w_train = np.concatenate(X_train['Over_Weights'].values)
w_train = np.concatenate(X_train['Weights'].values)
w_test = np.concatenate(X_test['Weights'].values)
w_val = np.concatenate(X_val['Weights'].values)

# X_train = np.concatenate(X_train['Oversamples'].values)
X_train = np.concatenate(X_train['Sub_Divisions'].values)
X_test = np.concatenate(X_test['Sub_Divisions'].values)
X_val = np.concatenate(X_val['Sub_Divisions'].values)

## Show class imbalance

In [17]:
Counter(y_train)

Counter({1.0: 273, 0.0: 2995})

## Calculate Initial Bias

In [18]:
count = np.bincount(y_train.astype('int64'))
neg, pos = count[0], count[1]
total = neg + pos
initial_bias = np.log([pos/neg])
print(f'Initial bias: {initial_bias}')

Initial bias: [-2.39522772]


## Load model

In [19]:
def build_model(initial_bias):
    initializer = tf.keras.initializers.HeNormal()
    bias_initializer = tf.keras.initializers.Constant(initial_bias)
    initializer = tf.keras.initializers.HeNormal()
    return Sequential([
        Input(shape=(N_MELS, 4, 33)),
        Conv2D(8, 8, activation='relu', kernel_initializer=initializer, padding="same"),
        MaxPooling2D(pool_size=(5, 2)),
        BatchNormalization(),
        Dropout(0.5),
        Conv2D(16, 6, activation='relu', padding="same", kernel_initializer=initializer),
        MaxPooling2D(pool_size=(2, 2)),
        BatchNormalization(),
        Dropout(0.5),
        Flatten(),
        Dense(128, activation='sigmoid'),
        Dropout(0.5),
        Dense(1, activation='sigmoid', bias_initializer=bias_initializer)
    ])

## Train model

In [20]:
model = build_model(initial_bias)

sgd = tf.keras.optimizers.SGD(lr=0.05, decay=1e-4, momentum=0.9, nesterov=True)
model.compile(loss='binary_crossentropy', optimizer=sgd, metrics=get_metrics())

results = model.evaluate(X_test, y_test, batch_size=32, verbose=0)
print("Loss: {:0.4f}".format(results[0]))

early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_auc', patience=20, mode='max', restore_best_weights=True)

history = model.fit(
    X_train, 
    y_train, 
    batch_size=128,
    epochs=80, 
    shuffle=True,
    verbose=1,
    sample_weight=w_train,
    validation_data=(X_val, y_val),
    callbacks=[early_stopping],
) 

Loss: 0.3038
Epoch 1/80
Epoch 2/80
Epoch 3/80
Epoch 4/80
Epoch 5/80
Epoch 6/80
Epoch 7/80
Epoch 8/80
Epoch 9/80
Epoch 10/80
Epoch 11/80
Epoch 12/80
Epoch 13/80
Epoch 14/80
Epoch 15/80
Epoch 16/80
Epoch 17/80
Epoch 18/80
Epoch 19/80
Epoch 20/80
Epoch 21/80
Epoch 22/80
Epoch 23/80
Epoch 24/80
Epoch 25/80
Epoch 26/80
Epoch 27/80
Epoch 28/80
Epoch 29/80
Epoch 30/80
Epoch 31/80
Epoch 32/80
Epoch 33/80


## Calculate predictions

In [21]:
preds = model.predict(X_test, batch_size=1, verbose=1)
score = model.evaluate(X_test, y_test)



## Print classification report

In [22]:
y_pred = [1 if (p > 0.5) else 0 for p in preds]
y_pred = np.asarray(y_pred)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

         0.0       0.95      0.96      0.95       885
         1.0       0.50      0.45      0.47        87

    accuracy                           0.91       972
   macro avg       0.72      0.70      0.71       972
weighted avg       0.91      0.91      0.91       972



## Calculate F1-Score

In [28]:
f_score, precision, recall = evaluate(harmonix_beats, preds, ids_test, True)

print("F-score: ", f_score)
print("Precision: ", precision)
print("Recall: ", recall)

F-score:  0.7079411816253922
Precision:  0.7491938013677144
Recall:  0.7204761904761905


## Best score
 * Every iteration of this notebook has a different result
 * The best score achieved is reported below

```
3 seconds:
F-score:  0.7079411816253922
Precision:  0.7491938013677144
Recall:  0.7204761904761905

0.5 seconds:
F-score:  0.3044451156585869
Precision:  0.2801110180142438
Recall:  0.35031746031746025

2 bars:
F-score:  0.7391024180497865
Precision:  0.7825271347010477
Recall:  0.7499206349206349
```

### Best score architecture

```
model = Sequential([
    Input(shape=(N_MELS, 4, 33)),
    Conv2D(8, 8, activation='relu', kernel_initializer=initializer, padding="same"),
    MaxPooling2D(pool_size=(5, 2)),
    BatchNormalization(),
    Dropout(0.5),
    Conv2D(16, 6, activation='relu', padding="same", kernel_initializer=initializer),
    MaxPooling2D(pool_size=(2, 2)),
    BatchNormalization(),
    Dropout(0.5),
    Flatten(),
    Dense(128, activation='sigmoid'),
    Dropout(0.5),
    Dense(1, activation='sigmoid', bias_initializer=bias_initializer)
])
```

## Save model

## Save results