Here do also the t-test for the results of the different models by metric.

In [1]:
import json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import glob
from utils import display_results, display_structure


In [2]:
# import any json file from the results folders

file = "./results/targCls_2_music_openl3_env_voice_True_TotCls_5_dropNs_True_rep_5_fold_5.json"

with open(file) as f:
    data = json.load(f)
    print(file)
    display_results(data)


./results/targCls_2_music_openl3_env_voice_True_TotCls_5_dropNs_True_rep_5_fold_5.json
Key      Subkey                    Sub-subkey         Value
-------  ------------------------  -----------------  ----------
config   modality                                     music
         voice                                        1.00
         classifications           voice_gender       4 cls
                                   voice_exagg        4 cls
                                   voice_type         4 cls
                                   voice_age          4 cls
                                   target             2 cls
         which_embeddings                             openl3_env
         drop_non_significant                         1.00
average  mid_r2                    mean               0.35
                                   std                0.12
         mid_pears                 mean               0.62
                                   std                0.08
         

In [3]:
rep = 5
fold = 5
lvl_1 = ["Binary", "Ternary"]
lvl_2 = ["Voice", "No voice"]
numerical_values = ["Target F1", "Avg. secondary F1", "Avg. R2 emotions", "Avg. r emotions", "Avg. R2 mid-level", "Avg. r mid-level"]

columns = ["Embeddings"] + numerical_values
dict_of_dfs = {k1: {k2: pd.DataFrame(columns=columns) for k2 in lvl_2} for k1 in lvl_1}

for fn in glob.glob(f"./results/*_rep_{rep}_fold_{fold}.json"):
    with open(fn) as f:
        results = json.load(f)

    # aggregate secondary f1 scores
    f1_secondary_means = [v['mean'] for k,v in results["f1"].items() if k != 'target']
    f1_secondary_stds = [v['std'] for k,v in results["f1"].items() if k != 'target']
    results["average"]["secondary f1"] = { "mean": np.mean(f1_secondary_means), "std": np.mean(f1_secondary_stds)}

    first_level = "Binary" if len(results["config"]["classifications"]["target"]) == 2 else "Ternary"
    second_level = "Voice" if results["config"]["voice"] else "No voice"

    # re-calculate stds: not 

    row = {
        "Embeddings": results["config"]["which_embeddings"],
        "Target F1": f'{results["f1"]["target"]["mean"]:.2f} ± {results["f1"]["target"]["std"]:.2f}',
        "Avg. secondary F1": f'{results["average"]["secondary f1"]["mean"]:.2f} ± {results["average"]["secondary f1"]["std"]:.2f}',
        "Avg. R2 emotions": f'{results["average"]["emo_r2"]["mean"]:.2f} ± {results["average"]["emo_r2"]["std"]:.2f}',
        "Avg. r emotions": f'{results["average"]["emo_pears"]["mean"]:.2f} ± {results["average"]["emo_pears"]["std"]:.2f}',
        "Avg. R2 mid-level": f'{results["average"]["mid_r2"]["mean"]:.2f} ± {results["average"]["mid_r2"]["std"]:.2f}',
        "Avg. r mid-level": f'{results["average"]["mid_pears"]["mean"]:.2f} ± {results["average"]["mid_pears"]["std"]:.2f}'
    }

    df = dict_of_dfs[first_level][second_level].copy()
    df = pd.concat([df, pd.DataFrame(row, index=[0])], ignore_index=True)
    dict_of_dfs[first_level][second_level] = df

for l1 in lvl_1:
    for l2 in lvl_2:
        dict_of_dfs[l1][l2] = dict_of_dfs[l1][l2].sort_values(by="Embeddings").set_index("Embeddings")

In [4]:
for l1 in lvl_1:
    print(f"{l1}")
    for l2 in lvl_2:
        print(f"{l2}")
        df = dict_of_dfs[l1][l2].copy()
        display(df.round(2))
        #display(df_std)
        print("")
    print("_____________________________________________________________")

Binary
Voice


Unnamed: 0_level_0,Target F1,Avg. secondary F1,Avg. R2 emotions,Avg. r emotions,Avg. R2 mid-level,Avg. r mid-level
Embeddings,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
mfcc,0.78 ± 0.10,0.65 ± 0.07,0.06 ± 0.16,0.38 ± 0.11,0.13 ± 0.15,0.43 ± 0.10
msd,0.95 ± 0.04,0.79 ± 0.05,0.26 ± 0.15,0.56 ± 0.09,0.30 ± 0.12,0.58 ± 0.09
openl3_env,0.95 ± 0.04,0.77 ± 0.03,0.34 ± 0.13,0.62 ± 0.08,0.35 ± 0.12,0.62 ± 0.08
openl3_music,0.91 ± 0.11,0.76 ± 0.04,0.29 ± 0.17,0.56 ± 0.19,0.31 ± 0.14,0.59 ± 0.13



No voice


Unnamed: 0_level_0,Target F1,Avg. secondary F1,Avg. R2 emotions,Avg. r emotions,Avg. R2 mid-level,Avg. r mid-level
Embeddings,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
mfcc,0.79 ± 0.08,0.66 ± 0.07,0.02 ± 0.17,0.36 ± 0.11,0.14 ± 0.16,0.48 ± 0.09
msd,0.87 ± 0.05,0.66 ± 0.05,0.25 ± 0.11,0.54 ± 0.08,0.35 ± 0.14,0.62 ± 0.09
openl3_env,0.91 ± 0.05,0.72 ± 0.04,0.34 ± 0.10,0.61 ± 0.08,0.41 ± 0.10,0.66 ± 0.07
openl3_music,0.87 ± 0.09,0.71 ± 0.05,0.31 ± 0.16,0.56 ± 0.19,0.39 ± 0.16,0.64 ± 0.15



_____________________________________________________________
Ternary
Voice


Unnamed: 0_level_0,Target F1,Avg. secondary F1,Avg. R2 emotions,Avg. r emotions,Avg. R2 mid-level,Avg. r mid-level
Embeddings,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
mfcc,0.48 ± 0.04,0.67 ± 0.07,0.05 ± 0.15,0.38 ± 0.11,0.15 ± 0.14,0.46 ± 0.09
msd,0.67 ± 0.06,0.80 ± 0.06,0.29 ± 0.12,0.57 ± 0.08,0.33 ± 0.10,0.60 ± 0.07
openl3_env,0.66 ± 0.07,0.77 ± 0.05,0.34 ± 0.11,0.61 ± 0.07,0.35 ± 0.10,0.62 ± 0.07
openl3_music,0.67 ± 0.04,0.78 ± 0.04,0.35 ± 0.12,0.61 ± 0.08,0.37 ± 0.10,0.63 ± 0.07



No voice


Unnamed: 0_level_0,Target F1,Avg. secondary F1,Avg. R2 emotions,Avg. r emotions,Avg. R2 mid-level,Avg. r mid-level
Embeddings,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
mfcc,0.52 ± 0.05,0.67 ± 0.07,0.04 ± 0.16,0.37 ± 0.11,0.14 ± 0.14,0.48 ± 0.09
msd,0.62 ± 0.05,0.67 ± 0.05,0.23 ± 0.15,0.54 ± 0.10,0.36 ± 0.12,0.64 ± 0.07
openl3_env,0.59 ± 0.06,0.72 ± 0.04,0.30 ± 0.12,0.59 ± 0.08,0.42 ± 0.10,0.66 ± 0.07
openl3_music,0.64 ± 0.07,0.73 ± 0.04,0.32 ± 0.12,0.60 ± 0.08,0.43 ± 0.11,0.68 ± 0.07



_____________________________________________________________


    lvl_0 = ["Baseline", "FiLM-ed"]
    lvl_1 = ["Binary", "Ternary"]
    lvl_2 = ["Voice", "No voice"]
    numerical_values = ["Target F1", "Avg. secondary F1", "Avg. R2 emotions", "Avg. r emotions", "Avg. R2 mid-level", "Avg. r mid-level"]

    columns = ["Embeddings"] + [f"{col} mean" for col in numerical_values] + [f"{col} std" for col in numerical_values]
    dict_of_dfs = {k0: {k1: {k2: pd.DataFrame(columns=columns) for k2 in lvl_2} for k1 in lvl_1} for k0 in lvl_0}

    for suffix, l0 in zip(["", "_filmed"], lvl_0):
        temp_dict = dict_of_dfs[l0]
        for fn in glob.glob(f"./results{suffix}/*.json"):
            with open(fn) as f:
                results = json.load(f)

            # aggregate secondary f1 scores
            f1_secondary_means = [v['mean'] for k,v in results["f1"].items() if k != 'target']
            f1_secondary_stds = [v['std'] for k,v in results["f1"].items() if k != 'target']
            results["average"]["secondary f1"] = { "mean": np.mean(f1_secondary_means), "std": np.mean(f1_secondary_stds)}

            first_level = "Binary" if len(results["config"]["classifications"]["target"]) == 2 else "Ternary"
            second_level = "Voice" if results["config"]["voice"] else "No voice"

            row = {
                "Embeddings": results["config"]["which_embeddings"],
                "Target F1 mean": results["f1"]["target"]["mean"],
                "Target F1 std": results["f1"]["target"]["std"],
                "Avg. secondary F1 mean": results["average"]["secondary f1"]["mean"],
                "Avg. secondary F1 std": results["average"]["secondary f1"]["std"],
                "Avg. R2 emotions mean": results["average"]["emo_r2"]["mean"],
                "Avg. R2 emotions std": results["average"]["emo_r2"]["std"],
                "Avg. r emotions mean": results["average"]["emo_pears"]["mean"],
                "Avg. r emotions std": results["average"]["emo_pears"]["std"],
                "Avg. R2 mid-level mean": results["average"]["mid_r2"]["mean"],
                "Avg. R2 mid-level std": results["average"]["mid_r2"]["std"],
                "Avg. r mid-level mean": results["average"]["mid_pears"]["mean"],
                "Avg. r mid-level std": results["average"]["mid_pears"]["std"]
            }

            df = temp_dict[first_level][second_level].copy()
            df = pd.concat([df, pd.DataFrame(row, index=[0])], ignore_index=True)
            
            temp_dict[first_level][second_level] = df
        
        dict_of_dfs[l0] = temp_dict

    for l0 in lvl_0:
        for l1 in lvl_1:
            for l2 in lvl_2:
                dict_of_dfs[l0][l1][l2] = dict_of_dfs[l0][l1][l2].sort_values(by="Embeddings").set_index("Embeddings")
