In [1]:
import matplotlib.pyplot as plt
import numpy as np

import utils
import pandas as pd
import classifiers

In [2]:
def compare_plain_vote(dataset_df):
    """
    Train fcnn on specified dataset, and compute accuracy of sample and track prediction on test set.
    :param dataset_df: the complete dataset dataframe, which will be splitted accordingly
    :return: the accuracy of by sample prediction and by track prediction
    """
    X_train, y_train, X_val, y_val = utils.train_val_split(dataset_df, keep_trackID=True)
    y_val_pred = classifiers.tensorflow_fcnn(X_train, y_train, X_val, y_val.GenreID)
    df_sample = pd.DataFrame({"pred": y_val_pred, "track": y_val.TrackID, "true": y_val.GenreID})
    df_track = df_sample.groupby('track').agg(list)
    df_track['pred_by_track'] = \
        df_track['pred'] \
        .apply(lambda x: np.unique(x, return_counts=True)) \
        .apply(lambda x: x[0][np.argmax(x[1])])
    df_track['true_by_track'] = \
        df_track['true'] \
        .apply(lambda x: np.unique(x, return_counts=True)) \
        .apply(lambda x: x[0][np.argmax(x[1])])
    return np.mean(df_sample.pred == df_sample.true), np.mean(df_track.pred_by_track == df_track.true_by_track)

In [3]:
df5s, df10s, df30s = utils.task4_df()
dataset = {
    "5s" : df5s,
    "10s" : df10s,
    "30s" : df30s,
    "whole" : pd.concat((df5s, df10s, df30s), ignore_index=True)
}

In [4]:
results = {}
for name, df in dataset.items():
    results[name] = []
    for i in range(5):
        results[name] += compare_plain_vote(df)

Epoch 1/25
[1m149/149[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.4995 - loss: 1.4235 - val_accuracy: 0.6810 - val_loss: 0.9451
Epoch 2/25
[1m149/149[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.7769 - loss: 0.6508 - val_accuracy: 0.7012 - val_loss: 0.9043
Epoch 3/25
[1m149/149[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.8517 - loss: 0.4699 - val_accuracy: 0.6911 - val_loss: 0.9572
Epoch 4/25
[1m149/149[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.8954 - loss: 0.3431 - val_accuracy: 0.7189 - val_loss: 0.9708
Epoch 5/25
[1m149/149[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.9188 - loss: 0.2650 - val_accuracy: 0.7247 - val_loss: 0.9738
Epoch 6/25
[1m149/149[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.9386 - loss: 0.1932 - val_accuracy: 0.7138 - val_loss: 1.0121
Epoch 7/25
[1m149/149[0m 

In [5]:
exp = results
grouped = {name : list(zip(*[iter(values)]*2)) for name, values in results.items()}

vote_acc_mean = \
    {name : np.round(np.mean([v[1] for v in val]), decimals=2) for name, val in grouped.items()}
vote_acc_std = \
    {name : np.round(np.sqrt(np.mean([v[1]**2 for v in val]) - np.mean([v[1] for v in val])**2), decimals=3) for name, val in grouped.items()}

plain_acc_mean = \
    {name : np.round(np.mean([v[0] for v in val]), decimals=2) for name, val in grouped.items()}
plain_acc_std = \
    {name : np.round(np.sqrt(np.mean([v[0]**2 for v in val]) - np.mean([v[0] for v in val])**2), decimals=3) for name, val in grouped.items()}
            

mean_gap = {name :np.round(np.mean([v-p for p,v in val]), decimals=2) for name, val in grouped.items()}

print(
    f"Mean gap : {mean_gap}\n"
    f"Plain accuracy mean : {plain_acc_mean}\n"
    f"Vote accuracy mean : {vote_acc_mean}\n"
    f"Plain accuracy std : {plain_acc_std}\n"
    f"Vote accuracy std : {vote_acc_std}\n")

Mean gap : {'5s': 0.05, '10s': 0.02, '30s': 0.0, 'whole': 0.05}
Plain accuracy mean : {'5s': 0.7, '10s': 0.72, '30s': 0.76, 'whole': 0.69}
Vote accuracy mean : {'5s': 0.75, '10s': 0.74, '30s': 0.76, 'whole': 0.74}
Plain accuracy std : {'5s': 0.04, '10s': 0.006, '30s': 0.012, 'whole': 0.024}
Vote accuracy std : {'5s': 0.042, '10s': 0.011, '30s': 0.012, 'whole': 0.021}
