# Model Analysis

In [1]:
import os
# need to reload kernel between runs of this
os.chdir('genre_classification_289a/src')

In [2]:
import numpy as np
import matplotlib.pyplot as plt
from analysis_utils import find_available_mlp_models, get_model_f1s, get_model_loss_histories, abbrev_targets

## Available MLP Models

In [3]:
# set possible file attrs
dataset_name = "fma_medium"
possible_targets = ["subgenres", "mfcc", "genre"]
possible_layers = [7,6,5,4]
save_plots_path = os.path.join(os.path.curdir, "../models/analysis-plots/")

mlp_models = find_available_mlp_models(dataset_name, possible_targets, possible_layers)
for model in mlp_models:
    print(model)

{'dataset': 'fma_medium', 'targets': ['subgenres'], 'layer': 7}
{'dataset': 'fma_medium', 'targets': ['subgenres'], 'layer': 6}
{'dataset': 'fma_medium', 'targets': ['subgenres'], 'layer': 5}
{'dataset': 'fma_medium', 'targets': ['subgenres', 'mfcc', 'genre'], 'layer': 7}
{'dataset': 'fma_medium', 'targets': ['subgenres', 'mfcc', 'genre'], 'layer': 6}
{'dataset': 'fma_medium', 'targets': ['subgenres', 'mfcc', 'genre'], 'layer': 5}
{'dataset': 'fma_medium', 'targets': ['subgenres', 'mfcc', 'genre'], 'layer': 4}


## Table of F1 (micro) Performance

In [4]:
mlp_f1s = get_model_f1s(mlp_models)

for idx, f1 in enumerate(mlp_f1s):
    print(f"{f1.round(4)} <= layer: {mlp_models[idx]['layer']}, targets: {mlp_models[idx]['targets']}")

0.8602 <= layer: 7, targets: ['subgenres']
0.8802 <= layer: 6, targets: ['subgenres']
0.7275 <= layer: 5, targets: ['subgenres']
0.9747 <= layer: 7, targets: ['subgenres', 'mfcc', 'genre']
0.9611 <= layer: 6, targets: ['subgenres', 'mfcc', 'genre']
0.7905 <= layer: 5, targets: ['subgenres', 'mfcc', 'genre']
0.6721 <= layer: 4, targets: ['subgenres', 'mfcc', 'genre']


## Plots of Losses

In [5]:
mlp_loss_histories = get_model_loss_histories(mlp_models)
batch_set_size = 30

# regular plot
plt.figure()
plt.title("MLP Training Loss")
for idx, mlp_loss_history in enumerate(mlp_loss_histories):
    label = f"{abbrev_targets(mlp_models[idx]['targets'])}:{mlp_models[idx]['layer']}"
    plt.plot((np.arange(len(mlp_loss_history))+1)*batch_set_size, mlp_loss_history, label=label)
plt.ylabel("30-Batch Average Loss")
plt.xlabel("Batches")
plt.legend()
plt.savefig(os.path.join(save_plots_path, f"MLP-training-losses.png"))

# smoother plot (average every 8 sets of 30 batch loss averages)
plt.figure()
plt.title("MLP Training Loss (Smooth)")
n_avg = 20 # num losses to average across

# stack @Jaime
def moving_average(a, n) :
    ret = np.cumsum(a, dtype=float)
    ret[n:] = ret[n:] - ret[:-n]
    return ret[n - 1:] / n

for idx, mlp_loss_history in enumerate(mlp_loss_histories):
    # V1 Smooth (batch average)
#     losses = np.array(mlp_loss_history)
#     nrows = int(losses.shape[0]/ncols)
#     # truncate to be evenly divisble by `average_across`
#     losses_truncated = losses[:ncols*nrows]
#     # reshape to get rows of 8 loss samples
#     losses_mat = losses_truncated.reshape(nrows, ncols)
#     # average across each row (set of 30-batche loss averages)
#     losses_avg = np.average(losses_mat, 1)
#     label = f"{abbrev_targets(mlp_models[idx]['targets'])}:{mlp_models[idx]['layer']}"
#     plt.plot((np.arange(len(losses_avg))+1)*batch_set_size*ncols, losses_avg, label=label)
    # V2 Smooth (moving average, much better metric)
    losses = np.array(mlp_loss_history)
    losses_avg = moving_average(losses, n_avg)
    label = f"{abbrev_targets(mlp_models[idx]['targets'])}:{mlp_models[idx]['layer']}"
    plt.plot((np.arange(len(losses_avg))+1)*batch_set_size, losses_avg, label=label)
plt.ylabel(f"{n_avg}-Tap Moving Avg of 30-Batch Average of Loss")
plt.xlabel("Batches")
plt.legend()
plt.savefig(os.path.join(save_plots_path, f"MLP-training-losses-smooth-{n_avg}.png"))

plt.show()

SyntaxError: invalid syntax (<ipython-input-5-5f96b89c73be>, line 16)