In [66]:
import matplotlib.pyplot as plt
import numpy as np

import utils
import pandas as pd
import classifiers

In [67]:
def compare_plain_vote(dataset_df):
    """
    Train and return accuracy of a fcnn model on the specified dataset
    :param dataset_df: the complete dataset dataframe, which will be splitted accordingly
    :return: the accuracy of by sample prediction and by track prediction
    """
    X_train, y_train, X_val, y_val = utils.train_val_split(dataset_df)
    y_val_pred = classifiers.tensorflow_fcnn(X_train, y_train, X_val, y_val.GenreID)
    df_sample = pd.DataFrame({"pred": y_val_pred, "track": y_val.TrackID, "true": y_val.GenreID})
    df_track = df_sample.groupby('track').agg(list)
    df_track['pred_by_track'] = \
        df_track['pred'] \
        .apply(lambda x: np.unique(x, return_counts=True)) \
        .apply(lambda x: x[0][np.argmax(x[1])])
    df_track['true_by_track'] = \
        df_track['true'] \
        .apply(lambda x: np.unique(x, return_counts=True)) \
        .apply(lambda x: x[0][np.argmax(x[1])])
    return np.mean(df_sample.pred == df_sample.true), np.mean(df_track.pred_by_track == df_track.true_by_track)

In [68]:
df5s, df10s, df30s = utils.task4_df()
dataset = {
    "5s" : df5s,
    "10s" : df10s,
    "30s" : df30s,
    "whole" : pd.concat((df5s, df10s, df30s), ignore_index=True)
}

In [69]:
results = {}
for name, df in dataset.items():
    results[name] = []
    for i in range(5):
        results[name] += compare_plain_vote(df)

Epoch 1/300
[1m149/149[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.4980 - loss: 1.4412 - val_accuracy: 0.6448 - val_loss: 1.0421
Epoch 2/300
[1m149/149[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.7721 - loss: 0.6759 - val_accuracy: 0.6987 - val_loss: 0.9663
Epoch 3/300
[1m149/149[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.8385 - loss: 0.4808 - val_accuracy: 0.6978 - val_loss: 0.9925
Epoch 4/300
[1m149/149[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.8896 - loss: 0.3467 - val_accuracy: 0.7079 - val_loss: 0.9876
Epoch 5/300
[1m149/149[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.9104 - loss: 0.2790 - val_accuracy: 0.7012 - val_loss: 1.0460
38/38 - 0s - 940us/step - accuracy: 0.7012 - loss: 1.0460
{'accuracy': 0.7011784315109253, 'loss': 1.0459628105163574}
Epoch 1/300
[1m149/149[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m

In [79]:
exp = results
grouped = {name : list(zip(*[iter(values)]*2)) for name, values in results.items()}

vote_acc_mean = \
    {name : np.round(np.mean([v[1] for v in val]), decimals=2) for name, val in grouped.items()}
vote_acc_std = \
    {name : np.round(np.sqrt(np.mean([v[1]**2 for v in val]) - np.mean([v[1] for v in val])**2), decimals=3) for name, val in grouped.items()}

plain_acc_mean = \
    {name : np.round(np.mean([v[0] for v in val]), decimals=2) for name, val in grouped.items()}
plain_acc_std = \
    {name : np.round(np.sqrt(np.mean([v[0]**2 for v in val]) - np.mean([v[0] for v in val])**2), decimals=3) for name, val in grouped.items()}
            

mean_gap = {name :np.round(np.mean([v-p for p,v in val]), decimals=2) for name, val in grouped.items()}

print(
    f"Mean gap : {mean_gap}\n"
    f"Plain accuracy mean : {plain_acc_mean}\n"
    f"Vote accuracy mean : {vote_acc_mean}\n"
    f"Plain accuracy std : {plain_acc_std}\n"
    f"Vote accuracy std : {vote_acc_std}\n")

Mean gap : {'5s': 0.04, '10s': 0.03, '30s': 0.0, 'whole': 0.04}
Plain accuracy mean : {'5s': 0.7, '10s': 0.71, '30s': 0.76, 'whole': 0.7}
Vote accuracy mean : {'5s': 0.74, '10s': 0.74, '30s': 0.76, 'whole': 0.74}
Plain accuracy std : {'5s': 0.007, '10s': 0.009, '30s': 0.012, 'whole': 0.011}
Vote accuracy std : {'5s': 0.01, '10s': 0.009, '30s': 0.012, 'whole': 0.02}
