In [21]:
import sys
import os
from git_root import git_root

import numpy as np
import tensorflow as tf

from tensorflow.keras.optimizers import Adam

sys.path.append(git_root("training"))
from fetch_data import fetch_data_local
from fetch_data import to_numpy_arrays, prepare_tf_dataset

sys.path.append(git_root("models"))
from MFCC_model import setup_model

sys.path.append(git_root("utils"))
from utils import load_params, load_config

from validation_utils import plot_history

In [22]:
print(tf.__version__)

2.0.0


In [23]:
df = fetch_data_local(map_type="mfcc", train=False)

test = to_numpy_arrays(df, mfcc=True)

Fetching: data_mfcc_test.json


In [24]:
test_sample_unstacked = []
test_label_unstacked = []

for j in range(len(test[0])):
    test_sample_unstacked += [test[0][j][:,:,i].reshape(40,50,1) for i in range(test[0][0].shape[-1])]

for i in range(len(test[1])):
    test_label_unstacked += [test[1][i]]*(test[0][0].shape[-1])

In [25]:
dataset = prepare_tf_dataset(test_sample_unstacked, test_label_unstacked)

In [26]:
from tensorflow.keras.models import load_model
sys.path.append(git_root("models"))
from affine_scalar_layer import AffineScalar
import pandas as pd

#Fetch the trained model
model_path = git_root('models','saved_models',
                                  'mfcc.h5')
net = load_model(model_path,  custom_objects={'AffineScalar':AffineScalar()})
#For some reason, I need to cast my tensor to float32 for predict (and not for fit ?!)
#dataset = tf.data.Dataset.from_tensors(tf.dtypes.cast(fetch_format_stacked(train=False), tf.float32))

#Generate predictions
y = net.predict(dataset)
preds = pd.DataFrame(y)
preds.columns = ["class{}".format(i) for i in range(10)]

df = df.merge(preds, left_index=True, right_index=True)
df = df.drop(['maps'], axis=1)

save_path=git_root("evaluation","evaluation_results","results_mfcc.csv")
print("Saving to: {}".format(save_path))
df.to_csv(save_path)

Saving to: /Users/arnaudstiegler/Desktop/deep-music-classification/evaluation/evaluation_results/results_mfcc.csv


# Results

In [27]:
config = load_config()

label_names = config["genres"]
label_to_idx = dict((name, index) for index, name in enumerate(label_names))
idx_to_label = {v: k for k, v in label_to_idx.items()}

class_columns = ["class{}".format(i) for i in range(10)]

**Per Piece**

In [28]:
from sklearn.metrics import classification_report


piece_level_acc = {}
piece_level_acc['mfcc']={}


filepath=git_root("evaluation","evaluation_results","results_mfcc.csv")
df = pd.read_csv(filepath, index_col=0)
        
#Generate the label by taking highest prob
df['pred'] = np.argmax(df[class_columns].values, axis=1)
        
df['label'] = df['genre'].apply(lambda x: label_to_idx[x])
        
report = classification_report(df['label'], df['pred'], output_dict=True)
piece_level_acc['mfcc']['accuracy'] = report['accuracy']
        
piece_level_acc['mfcc']['class_precision'] = {}
        
for i in range(10):
    piece_level_acc['mfcc']['class_precision'][idx_to_label[i]] = report[str(i)]['precision']

  'precision', 'predicted', average, warn_for)


In [29]:
import json

#Dump the results
with open(git_root('evaluation','evaluation_results','mfcc_piece_level_results.json'), 'w') as fp:
    json.dump(piece_level_acc, fp, indent=4)

**Per track**

In [30]:
#Generate a table that contains mapping (track: genre)
track_mapping = df[['filename','genre']].drop_duplicates().reset_index(drop=True)

In [31]:
track_level_acc = {}

track_level_acc['mfcc'] = {}


filepath=git_root("evaluation","evaluation_results","results_mfcc.csv")
df = pd.read_csv(filepath, index_col=0)
#Group by track to get track_level probs
#Note that we SUM the probs (rather than AVG them)
df_per_track = df.groupby('filename').sum()
#Label is the highest prob
df_per_track['pred'] = np.argmax(df_per_track[class_columns].values, axis=1)
#Merge back to get the track_names
df_result = df_per_track.merge(track_mapping, on='filename')
df_result['label'] = df_result['genre'].apply(lambda x: label_to_idx[x])
        
report = classification_report(df_result['label'], df_result['pred'], output_dict=True)
track_level_acc['mfcc']['accuracy'] = report['accuracy']
        
track_level_acc['mfcc']['class_precision'] = {}
        
for i in range(10):
    track_level_acc['mfcc']['class_precision'][idx_to_label[i]] = report[str(i)]['precision']

  'precision', 'predicted', average, warn_for)


In [32]:
#Dump the results
with open(git_root('evaluation','evaluation_results','mfcc_track_level_results.json'), 'w') as fp:
    json.dump(track_level_acc, fp, indent=4)