In [1]:
import matplotlib.pyplot as plt
import numpy as np

import utils
import pandas as pd
import classifiers

In [2]:
def compare_plain_vote(dataset_df):
    """
    Train fcnn on specified dataset, and compute accuracy of sample and track prediction on test set.
    :param dataset_df: the complete dataset dataframe, which will be splitted accordingly
    :return: the accuracy of by sample prediction and by track prediction
    """
    X_train, y_train, X_val, y_val = utils.train_val_split(dataset_df, keep_trackID=True)
    y_val_pred = classifiers.tensorflow_fcnn(X_train, y_train, X_val, y_val.GenreID, verbose=False)
    df_sample = pd.DataFrame({"pred": y_val_pred, "track": y_val.TrackID, "true": y_val.GenreID})
    df_track = df_sample.groupby('track').agg(list)
    df_track['pred_by_track'] = \
        df_track['pred'] \
        .apply(lambda x: np.unique(x, return_counts=True)) \
        .apply(lambda x: x[0][np.argmax(x[1])])
    df_track['true_by_track'] = \
        df_track['true'] \
        .apply(lambda x: np.unique(x, return_counts=True)) \
        .apply(lambda x: x[0][np.argmax(x[1])])
    return np.mean(df_sample.pred == df_sample.true), np.mean(df_track.pred_by_track == df_track.true_by_track)

In [3]:
df5s, df10s, df30s = utils.task4_df()
dataset = {
    "5s" : df5s,
    # "10s" : df10s,
    "30s" : df30s,
    # "whole" : pd.concat((df5s, df10s, df30s), ignore_index=True)
}

In [4]:
results = {}
for name, df in dataset.items():
    results[name] = []
    for i in range(10):
        results[name] += compare_plain_vote(df)

In [5]:
# Group the (vote, no vote) into tuples
grouped = {name : list(zip(*[iter(values)]*2)) for name, values in results.items()}

# Take the mean of accuracies of models with voted genre 
vote_acc_mean = \
    {name : np.round(np.mean([v[1] for v in val]), decimals=2) for name, val in grouped.items()}
# Take the standard deviation of accuracies of models with voted genre 
vote_acc_std = \
    {name : np.round(np.sqrt(np.mean([v[1]**2 for v in val]) - np.mean([v[1] for v in val])**2), decimals=3) for name, val in grouped.items()}

# Take the mean of accuracies of models without voted genre 
plain_acc_mean = \
    {name : np.round(np.mean([v[0] for v in val]), decimals=2) for name, val in grouped.items()}
# Take the std of accuracies of models without voted genre 
plain_acc_std = \
    {name : np.round(np.sqrt(np.mean([v[0]**2 for v in val]) - np.mean([v[0] for v in val])**2), decimals=3) for name, val in grouped.items()}
            

mean_gap = {name :np.round(np.mean([v-p for p,v in val]), decimals=2) for name, val in grouped.items()}

print(
    f"Mean gap : {mean_gap}\n"
    f"Plain accuracy mean : {plain_acc_mean}\n"
    f"Vote accuracy mean : {vote_acc_mean}\n"
    f"Plain accuracy std : {plain_acc_std}\n"
    f"Vote accuracy std : {vote_acc_std}\n")

Mean gap : {'5s': 0.05, '30s': 0.0}
Plain accuracy mean : {'5s': 0.68, '30s': 0.73}
Vote accuracy mean : {'5s': 0.74, '30s': 0.73}
Plain accuracy std : {'5s': 0.01, '30s': 0.027}
Vote accuracy std : {'5s': 0.021, '30s': 0.027}
