In [1]:
import numpy as np
import pandas as pd
import yaml, glob
import evaluate
import plotly.express as px
import plotly.graph_objects as go
import plotly.figure_factory as ff
from scipy import stats

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
wer = evaluate.load("wer")

# Bootstraping: sample students

In [3]:
lang = "sv"

In [4]:
with open("../../config.yml") as f:
    train_config = yaml.safe_load(f)

In [5]:
if lang == "fi":
    csv_path = train_config['data_args']['csv_fi']
elif lang == "sv":
    csv_path = train_config['data_args']['csv_sv']
    
usecols=['sample', 'student', 'recording_path', 'cefr_mean', 'split', 'transcript_normalized', 
        'task_id' if lang == "fi" else "task"]

df = pd.read_csv(csv_path, usecols=usecols)
df = df.rename(columns={"transcript_normalized":"text"})

In [48]:
B = 1000
np.random.seed(seed=55102023)
student_ids = df.student.unique()
sampled_students = np.random.choice(student_ids, size=(B, len(student_ids)), replace=True)

In [49]:
sampled_students

array([[ 72,  92,  39, ...,  22,   5, 162],
       [ 64, 126, 139, ..., 113,  36, 131],
       [ 36, 108, 154, ...,  69,  53,  54],
       ...,
       [ 67,  66,  79, ..., 163, 163,  98],
       [ 39, 106,  24, ...,  79,  64,  81],
       [145,  97, 148, ...,  94,  18, 138]])

In [50]:
df = df.set_index('student')
df.loc[sampled_students[0]]

Unnamed: 0_level_0,sample,task,recording_path,cefr_mean,split,text
student,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
72,72,1,/m/teamwork/t40511_asr/c/digitala/2020-2021_re...,2,2,öö vi har potatis och s mm sallad och tomato h...
72,234,3,/m/teamwork/t40511_asr/c/digitala/2020-2021_re...,3,2,tack för god lektion lektionen
72,547,7,/m/teamwork/t40511_asr/c/digitala/2020-2021_re...,2,2,öö tack för din fester
72,812,9,/m/teamwork/t40511_asr/c/digitala/2020-2021_re...,2,2,var liv du och
72,906,10,/m/teamwork/t40511_asr/c/digitala/2020-2021_re...,3,2,jag ringer polis och ööm vi har problem på ka
...,...,...,...,...,...,...
162,1170,13,/m/teamwork/t40511_asr/c/digitala/2020-2021_re...,4,0,jag mår bra och du
162,1646,20,/m/teamwork/t40511_asr/c/digitala/2020-2021_re...,3,0,ja jag har
162,1729,11,/m/teamwork/t40511_asr/c/digitala/2020-2021_re...,2,0,aa jag har jag har varit i skolan
162,1830,21,/m/teamwork/t40511_asr/c/digitala/2020-2021_re...,3,0,jag kan inte komma eftersom jag mår inte så bra


#### This is only for the baseline used in the paper

In [51]:
def true_round(x):
    import decimal
    return int(decimal.Decimal(str(x)).quantize(decimal.Decimal("1"), rounding=decimal.ROUND_HALF_UP))

if lang == "fi":
    original_csv_path = '/scratch/work/lunt1/wav2vec2-finetune/csv/finnish_df.csv'
elif lang == "sv":
    original_csv_path = '/scratch/work/lunt1/wav2vec2-finetune/csv/swedish_df.csv'
    
df_original = pd.read_csv(original_csv_path, usecols=usecols)
df_original["cefr_mean"] = df_original["cefr_mean"].apply(true_round) - 1
df_original = df_original.rename(columns={"transcript_normalized":"text"})
df_original = df_original.set_index("student")
# df_original

# ASR

In [31]:
asr_exp_outputs = {
#     "Before L2 fine-tuning": glob.glob("experiments/before_l2_finetune/finnish_ASR_transcrip_fold0.csv"),
    
    # Baselines
    "BASE": glob.glob("../../experiments/ex0_base/finnish_ASR_transcrip_fold*.csv"), 
    "BASE_same": glob.glob("../../experiments/ex0_duplicate/finnish_ASR_transcrip_fold*.csv"),
    "BASE_OS": glob.glob("../../experiments/ex2_base_os/finnish_ASR_transcrip_fold*.csv"),
    
    # Experiment 1
    "1. Time masking": glob.glob("../../experiments/ex1_time_masking/finnish_ASR_transcrip_fold*.csv"), 
    "2. Frequency masking": glob.glob("../../experiments/ex1_band_reject/finnish_ASR_transcrip_fold*.csv"),
    "3. Additive noise": glob.glob("../../experiments/ex1_additive_noise/finnish_ASR_transcrip_fold*.csv"),
    "4. Reverberation": glob.glob("../../experiments/ex1_reverberation/finnish_ASR_transcrip_fold*.csv"),
    "5. Pitch shift": glob.glob("../../experiments/ex1_pitch_shift/finnish_ASR_transcrip_fold*.csv"),
    "6. Tempo perturbation": glob.glob("../../experiments/ex1_tempo_perturbation/finnish_ASR_transcrip_fold*.csv"), 
    "7. Random transformation": glob.glob("../../experiments/ex1a_random_transform/finnish_ASR_transcrip_fold*.csv"), 
    
    # Experiment 2
    "TTS_augment": glob.glob("../../experiments/ex3_tts/finnish_ASR_transcrip_fold*.csv"),
    
    # Experiment 3
    "OS_augment": glob.glob("../../experiments/ex2_resample_data_cefr/finnish_ASR_transcrip_fold*.csv"),
    
    # Experiment 4
    "BASE_CCL": glob.glob("../../experiments/ex4_base_cl/finnish_ASR_transcrip_fold*.csv"),
    "BASE_CCL_UM": glob.glob("../../experiments/ex4_base_cl_um/finnish_ASR_transcrip_fold*.csv"),
    "BASE_CCL_2": glob.glob("../../experiments/ex4_base_cl_2/finnish_ASR_transcrip_fold?.csv"),
    "BASE_CCL_2_UM":glob.glob("../../experiments/ex4_base_cl_2_um/finnish_ASR_transcrip_fold*.csv"),
    
    "FM_CCL": glob.glob("../../experiments/ex4_frequency_masking/finnish_ASR_transcrip_fold?.csv"),
    "FM_CCL_UM": glob.glob("../../experiments/ex4_frequency_masking_um/finnish_ASR_transcrip_fold?.csv"),
    "FM_CCL_2":glob.glob("../../experiments/ex4_frequency_masking_2/finnish_ASR_transcrip_fold*.csv"),
    "FM_CCL_2_UM":glob.glob("../../experiments/ex4_frequency_masking_2_um/finnish_ASR_transcrip_fold*.csv"),
    
#     "TTS_CL": glob.glob("../../experiments/ex4_tts_cl/finnish_ASR_transcrip_fold?.csv"),
#     "TTS_CL_UM":glob.glob("../../experiments/ex4_tts_cl_um/finnish_ASR_transcrip_fold?.csv"),

}

In [32]:
exp_names = [
    "Before L2 fine-tuning",
    
    "BASE", 
    "BASE_same", 
    "BASE_OS", 
    
    "1. Time masking", 
    "2. Frequency masking", 
    "3. Additive noise",
    "4. Reverberation", 
    "5. Pitch shift", 
    "6. Tempo perturbation", 
    "7. Random transformation", 
    
    "TTS_augment", 
    
    "OS_augment", 
    
    "BASE_CCL",
    "BASE_CCL_UM",
    "BASE_CCL_2",
    "BASE_CCL_2_UM",
    
    "FM_CCL",
    "FM_CCL_UM",
    "FM_CCL_2",
    "FM_CCL_2_UM"
    
#     "TTS_CL",
#     "TTS_CL_UM",
]

In [33]:
asr_exp_outputs_sv = {
    # Baseline
    "BASE": glob.glob("../../experiments/sv_base/swedish_ASR_transcrip_fold*.csv"),
    "Frequency masking": glob.glob("../../experiments/sv_fm/swedish_ASR_transcrip_fold*.csv"),
   
    # Experiment 1
#     "1. Time masking": glob.glob("../../experiments/ex1_time_masking/finnish_ASR_transcrip_fold*.csv"), 

}

In [34]:
def get_aggregated_df(paths):
    dfs = []
    for path in paths:

        df = pd.read_csv(path)
        df = df.drop(["labels", "input_values", 
                      "speech", "sampling_rate"], axis=1)
        dfs.append(df)
        
    return pd.concat(dfs).reset_index(drop=True)

In [35]:
def plot_heatmap(matrix):
    ticks = [i+1 for i in list(range(matrix.shape[0]))]
    fig = ff.create_annotated_heatmap(
        matrix, colorscale="blues_r", x=ticks, y=ticks)
    fig.update_layout(
        yaxis=dict(title="True score", autorange="reversed"), 
        xaxis=dict(title="Predicted score"), 
        width=600, height=600
    )
    fig['data'][0]['showscale'] = True
    return fig

In [36]:
def get_wer(group:pd.DataFrame):
    group = group.reset_index()
    stud_wer = 100*wer.compute(predictions=group.ASR_transcript, references=group.text)
    
    # number of words of all utterance by this student
    n = len(" ".join(group.text.tolist()).split()) 
    
    # edit distance
    e = np.round(n*stud_wer)
    
    return  n, e

In [49]:
def asr_statistical_significace(X):
    """
    Input:
        X: pd.DataFrame, with 1) num of words and 2) num of errors columns
    Output:
        WER
    """
    wer_bs = []
    
    X = X.set_index('student')

    for ids in sampled_students:
        X_b = X.loc[ids]
        wer_b = X_b.e.sum() / X_b.n.sum()

        wer_bs.append(wer_b)

    wer_bs = np.array(wer_bs)
    
    return wer_bs.mean()

In [50]:
def by_cefr_ss(col_by_cefr):
#     X_by_cefr = pd.DataFrame(column.tolist(), columns=["n", "e"])
    
    cefr_wer_bs = []
    
    for ids in sampled_students:
        cefr_b = col_by_cefr[ids].dropna()
        cefr_b = pd.DataFrame(cefr_b.tolist(), columns=["n","e"])
        cefr_wer = cefr_b.e.sum()/cefr_b.n.sum()
        cefr_wer_bs.append(cefr_wer)
    cefr_wer_bs = np.array(cefr_wer_bs)
    return cefr_wer_bs, cefr_wer_bs.mean()

In [51]:
def get_stat_results(exp_results, by_cefr=False):
    wer_boots = []
    wer_boots_by_cefr = []
    bootstrap_wers = np.zeros((len(exp_results), B))
    bootstrap_wer_by_cefr = np.zeros((len(exp_results), B))

    for i,  (name, paths) in enumerate(exp_results.items()):
        print(name)

        df = get_aggregated_df(paths)
        
        if by_cefr:
            X_1 = df.groupby(["student", "cefr_mean"]).apply(get_wer).unstack()
            print(X_1.apply(by_cefr_ss))
#             wer_boots_by_cefr.append()
            
        else:
            # obtain df with 1) number of wods and 2) number of errors by student 
            X = df.groupby('student').apply(get_wer)
            X = X.to_frame(name="stat").reset_index()
            X['n'], X['e'] = zip(*X['stat'])

            # Obtain statistically significan wer
            all_bs, wer_boot_mean = asr_statistical_significace(X)
            wer_boots.append([name, wer_boot_mean])
            bootstrap_wers[i, :] = all_bs
    
    if by_cefr:
        df_wer = pd.DataFrame(wer_boots_by_cefr, index=list(exp_results.keys()))
    else:
        df_wer = pd.DataFrame(wer_boots, columns=["Experiment","WER"])
    
    return {"df": df_wer, "bootstrap_wers": bootstrap_wers}

In [52]:
def plot_exp_results(df_wer, exp_name, include_exp, up=[], down=[], big=False, colours=[]):
    
    
    df_wer = df_wer[df_wer.Experiment.isin(include_exp)]
    df_wer['x'] = exp_name 
    print(exp_name)
    
    annotations = []

    for _, row in df_wer.iterrows():
        annotations.append({
            "x": row.x, 
            "y": row.WER,
            "xref": "x", 
            "yref": "y",
            "xanchor":"left",
            "text": row.Experiment,
            "showarrow": True,
            "ax": 20,
            "ay": 0,
            "font": {"size":20 if big else 12},
            "xshift":5, 
            "yshift":0
        })

    for i, a in enumerate(annotations):
        if a['text'] in up:
            annotations[i]['ay'] = -10
            annotations[i]['yshift'] = 2
            annotations[i]['xshift'] = 5
        elif a['text'] in down:
            annotations[i]['ay'] = 8
            annotations[i]['yshift'] = -2
            annotations[i]['xshift'] = 5

    fig = go.Figure()
    fig.add_trace(go.Scatter(x=df_wer.x, 
                   y=df_wer.WER, 
                   mode='markers', 
                   marker=dict(color=px.colors.qualitative.Plotly if len(colours)==0 else colours, 
                               size=15 if big else 10)))
    
    fig.update_layout(plot_bgcolor='whitesmoke', 
                      width=700 if big else 400, 
                      height=700 if big else 600, 
                      annotations=annotations, 
                      xaxis=dict(range=[-0.2, 2.2], showticklabels=True), 
                      yaxis=dict(title="WER %"), 
                      font=dict(size=20 if big else 12))
    fig.show()
    
    print(df_wer.drop(columns=['x']).to_latex(index=False,
                  formatters={"name": str.upper},
                  float_format="{:.2f}".format,))
    return fig

#### This takes around a minute to run

In [65]:
# asr_result = get_stat_results(asr_exp_outputs)
asr_result_cefr = get_stat_results(asr_exp_outputs, by_cefr=True)

BASE


KeyboardInterrupt: 

In [44]:
p_values_wer = []

for i in range(len(asr_result["df"])):
    p = stats.ttest_ind(asr_result["bootstrap_wers"][0], 
                        asr_result["bootstrap_wers"][i]).pvalue
    p_values_wer.append(p)
    
asr_pvalues = pd.DataFrame(p_values_wer, index=asr_result["df"].Experiment, columns=["p-value"])
print(asr_pvalues.to_latex(index=True,
                  formatters={"name": str.upper},
                  float_format="{:.6f}".format,))

\begin{tabular}{lr}
\toprule
{} &  p-value \\
Experiment               &          \\
\midrule
BASE                     & 1.000000 \\
BASE\_same                & 0.658622 \\
BASE\_OS                  & 0.000000 \\
1. Time masking          & 0.005962 \\
2. Frequency masking     & 0.000000 \\
3. Additive noise        & 0.012952 \\
4. Reverberation         & 0.000000 \\
5. Pitch shift           & 0.000000 \\
6. Tempo perturbation    & 0.000000 \\
7. Random transformation & 0.000018 \\
TTS\_augment              & 0.085505 \\
OS\_augment               & 0.000000 \\
BASE\_CCL                 & 0.059518 \\
BASE\_CCL\_UM              & 0.112773 \\
BASE\_CCL\_2               & 0.000048 \\
BASE\_CCL\_2\_UM            & 0.066650 \\
FM\_CCL                   & 0.032265 \\
FM\_CCL\_UM                & 0.126020 \\
FM\_CCL\_2                 & 0.000000 \\
FM\_CCL\_2\_UM              & 0.000000 \\
\bottomrule
\end{tabular}



  print(asr_pvalues.to_latex(index=True,


In [42]:
asr_result["df"]

Unnamed: 0,Experiment,WER
0,BASE,21.232776
1,BASE_same,21.220012
2,BASE_OS,21.46546
3,1. Time masking,21.153157
4,2. Frequency masking,20.976799
5,3. Additive noise,21.160846
6,4. Reverberation,21.011681
7,5. Pitch shift,21.050934
8,6. Tempo perturbation,21.019089
9,7. Random transformation,21.109257


In [82]:
# asr_result_sv = get_stat_results(asr_exp_outputs_sv)
# asr_result_cefr_sv = get_stat_results(asr_exp_outputs_sv, by_cefr=True)

In [77]:
plot_exp_results(asr_result_sv, 
                  'exp',
                  asr_result_sv.columns, 
                  big=True)

exp


\begin{tabular}{lr}
\toprule
Empty DataFrame
Columns: Index(['Experiment', 'WER'], dtype='object')
Index: Int64Index([], dtype='int64') \\
\bottomrule
\end{tabular}




In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.



In [82]:
asr_result_cefr_sv.T

Unnamed: 0_level_0,BASE,Frequency masking
cefr_mean,Unnamed: 1_level_1,Unnamed: 2_level_1
1,26.405815,30.350446
2,23.493985,22.320813
3,17.663235,17.667651
4,14.681887,14.135346
5,8.06221,8.086909
6,8.636321,9.816692


In [72]:
# for_plotting = asr_result_cefr.T

# fig = go.Figure()

# count = [33, 350, 512, 425, 416, 285, 91]
# bar = go.Bar(x=[1, 2, 3, 4, 5, 6, 7], y=count, name="Number of samples", 
#              marker=dict(color="#DDF2FD"), showlegend=True)

# lines = px.line(for_plotting, y=for_plotting.columns)

# fig.add_trace(bar)
# for data in lines.data:
#     data.yaxis = "y2"
#     fig.add_trace(data)

# fig.update_layout(plot_bgcolor="whitesmoke", 
#                  yaxis=dict(title="Number of Samples for Evaluation", showgrid=False, 
#                             side='right'), 
#                  xaxis=dict(title="Class (CEFR score)", dtick=1), width=1200, height=800, font=dict(size=20), 
#                  yaxis2=dict(title="WER %", overlaying='y'),
#                  legend=dict(x=0.995, xanchor='right', y=0.99)
#                  )
# fig.show()

In [71]:
# print(asr_result_cefr[1].to_latex(index=True,
#                   formatters={"name": str.upper},
#                   float_format="{:.2f}".format,))

In [38]:
exp_names = [
    "BASE", 
#     "BASE_same", 
#     "BASE_OS", 
    
#     "1. Time masking", 
#     "2. Frequency masking", 
#     "3. Additive noise",
#     "4. Reverberation", 
#     "5. Pitch shift", 
#     "6. Tempo perturbation", 
#     "7. Random transformation", 
    
#     "TTS_augment", 
    
#     "OS_augment", 
    
#     "BASE_CCL",
#     "BASE_CCL_UM",
#     "BASE_CCL_2",
#     "BASE_CCL_2_UM",
    
#     "FM_CCL",
#     "FM_CCL_UM",
#     "FM_CCL_2",
#     "FM_CCL_2_UM",
    
#     "TTS_CL",
#     "TTS_CL_UM",
]
# colours = px.colors.qualitative.Plotly + ["#B0A695", "#CC704B"]

# asr_result_by_cefr = pd.DataFrame(
#     asr_result_cefr.mean(axis=1)).reset_index().rename(
#     columns={"index":"Experiment", 0: "WER"})

# exp1_latex = plot_exp_results(asr_result_by_cefr, 
# #                  ["Base"] + ["Frequency masking"] + ["Base"]*4 + ["Frequency masking"]*4,
#                   'exp',
#                   exp_names, 
#                   up=["2. Frequency masking"],
#                   down=["BASE_OS"], 
#                   big=True, 
#                   colours=[colours[0],colours[0]] + colours[1:5] + colours[1:5]
# #                              colours=['#61A3BA','#F4CE14','#DA0C81', '#61A3BA','#F4CE14','#DA0C81','#61A3BA','#F4CE14','#DA0C81',]
#                  )

exp


\begin{tabular}{lr}
\toprule
Experiment &   WER \\
\midrule
      BASE & 22.78 \\
\bottomrule
\end{tabular}




In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.



In [518]:
exp_names = [
    "BASE", 
#     "BASE_same", 
#     "BASE_OS", 
    
#     "1. Time masking", 
    "2. Frequency masking", 
#     "3. Additive noise",
#     "4. Reverberation", 
#     "5. Pitch shift", 
#     "6. Tempo perturbation", 
#     "7. Random transformation", 
    
#     "TTS_augment", 
    
#     "OS_augment", 
    
    "BASE_CCL",
    "BASE_CCL_UM",
    "BASE_CCL_2",
    "BASE_CCL_2_UM",
    
    "FM_CCL",
    "FM_CCL_UM",
    "FM_CCL_2",
    "FM_CCL_2_UM",
    
#     "TTS_CL",
#     "TTS_CL_UM",
]

colours = px.colors.qualitative.Plotly + ["#B0A695", "#CC704B"]

exp1_latex = plot_exp_results(asr_result, 
                              ["Base"] + ["Frequency masking"] + ["Base"]*4 + ["Frequency masking"]*4,
#                               'exp',
                              exp_names, 
#                               up=["3. Additive noise", "6. Tempo perturbation", "BASE"],
                              down=["BASE_CCL"], 
                              big=True, 
                              colours=[colours[0],colours[0]] + colours[1:5] + colours[1:5]
#                              colours=['#61A3BA','#F4CE14','#DA0C81', '#61A3BA','#F4CE14','#DA0C81','#61A3BA','#F4CE14','#DA0C81',]
                             )

['Base', 'Frequency masking', 'Base', 'Base', 'Base', 'Base', 'Frequency masking', 'Frequency masking', 'Frequency masking', 'Frequency masking']


\begin{tabular}{lr}
\toprule
          Experiment &   WER \\
\midrule
                BASE & 21.23 \\
2. Frequency masking & 20.98 \\
            BASE\_CCL & 21.18 \\
         BASE\_CCL\_UM & 21.19 \\
          BASE\_CCL\_2 & 21.35 \\
       BASE\_CCL\_2\_UM & 21.29 \\
              FM\_CCL & 21.17 \\
           FM\_CCL\_UM & 21.28 \\
            FM\_CCL\_2 & 21.01 \\
         FM\_CCL\_2\_UM & 21.08 \\
\bottomrule
\end{tabular}




In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.



# ASA

In [20]:
import yaml
from sklearn import metrics
import plotly.figure_factory as ff
from plotly.subplots import make_subplots


### Experiment 1

In [21]:
asa_exp_outputs = {
    # Result from reference paper
    "BASE + BASE (original)": glob.glob("../../experiments/ex0_base/asa_ex0_drop_1_original_df/asa_output_?.out"), 
    
    # Baselines a -------
    "BASE + BASE": glob.glob("../../experiments/ex0_base/asa_ex0_no_augment_no_drop/asa_output_?.out"),
    "BASE_same + BASE": glob.glob("../../experiments/ex0_duplicate/asa_ex0_base/asa_output_?.out"), 
    "BASE_OS + BASE": glob.glob("../../experiments/ex2_base_os/asa_ex0_base/asa_output_?.out"),
    
    # Experiment 1a
    "1. Time masking + BASE": glob.glob("../../experiments/ex1_time_masking/asa_ex0_base/asa_output_?.out"),    
    "2. Frequency masking + BASE": glob.glob("../../experiments/ex1_band_reject/asa_ex0_base/asa_output_?.out"), 
    "3. Additive noise + BASE": glob.glob("../../experiments/ex1_additive_noise/asa_ex0_base/asa_output_?.out"), 
    "4. Reverberation + BASE": glob.glob("../../experiments/ex1_reverberation/asa_ex0_base/asa_output_?.out"),     
    "5. Pitch shift + BASE": glob.glob("../../experiments/ex1_pitch_shift/asa_ex0_base/asa_output_?.out"), 
    "6. Tempo perturbation + BASE": glob.glob("../../experiments/ex1_tempo_perturbation/asa_ex0_base/asa_output_?.out"), 
    "7. Random transform + BASE": glob.glob("../../experiments/ex1a_random_transform/asa_ex0_base/asa_output_?.out"),
    
    # Experiment 2a
    "OS_augment + BASE": glob.glob("../../experiments/ex2_resample_data_cefr/asa_ex0_base/asa_output_?.out"), 
    
    # Experiment 3
    "TTS + BASE": glob.glob("../../experiments/ex3_tts/asa_ex0_base/asa_output_*.out"), 
    
    # Baselines b -----
    "BASE + BASE": glob.glob("../../experiments/ex0_base/asa_ex0_no_augment_no_drop/asa_output_?.out"),
    "BASE + BASE_same": glob.glob("../../experiments/ex0_base/asa_ex0_duplicate/asa_output_?.out"),
    "BASE + BASE_OS": glob.glob("../../experiments/ex0_base/asa_ex2_base_os/asa_output_?.out"),
    
    # Experiment 1b
    "BASE + 1. Time masking": glob.glob("../../experiments/ex0_base/asa_ex1_time_masking/asa_output_?.out"),
    "BASE + 2. Frequency masking": glob.glob("../../experiments/ex0_base/asa_ex1_band_reject/asa_output_?.out"),
    "BASE + 3. Additive noise": glob.glob("../../experiments/ex0_base/asa_ex1_additive_noise/asa_output_?.out"), 
    "BASE + 4. Reverberation": glob.glob("../../experiments/ex0_base/asa_ex1_reverberation/asa_output_?.out"),    
    "BASE + 5. Pitch shift": glob.glob("../../experiments/ex0_base/asa_ex1_pitch_shift/asa_output_?.out"), 
    "BASE + 6. Tempo perturbation": glob.glob("../../experiments/ex0_base/asa_ex1_tempo_perturbation/asa_output_?.out"),
    "BASE + 7. Random transform": glob.glob("../../experiments/ex0_base/asa_ex1a_random_transforms/asa_output_?.out"),
    
    # Experiment 2b
    "BASE + OS_augment": glob.glob("../../experiments/ex0_base/asa_ex2_resample_cefr/asa_output_?.out"),
    
    # Experiment 4
    "BASE + BASE_CCL": glob.glob("../../experiments/ex0_base/asa_ex4_base+base_ccl/asa_output_?.out"),
    "BASE + BASE_CCL_UM": glob.glob("../../experiments/ex0_base/asa_ex4_base+base_ccl_um/asa_output_?.out"),
    "BASE + BASE_CCL_2": glob.glob("../../experiments/ex0_base/asa_ex4_base+base_ccl_2/asa_output_?.out"),
    "BASE + BASE_CCL_2_UM": glob.glob("../../experiments/ex0_base/asa_ex4_base+base_ccl_2_um/asa_output_?.out"),
    
    "BASE + OS_augment_CCL": glob.glob("../../experiments/ex0_base/asa_ex4_base+os_augment_ccl/asa_output_?.out"),
    "BASE + OS_augment_CCL_UM": glob.glob("../../experiments/ex0_base/asa_ex4_base+os_augment_ccl_um/asa_output_?.out"),
    "BASE + OS_augment_CCL_2": glob.glob("../../experiments/ex0_base/asa_ex4_base+os_augment_ccl_2/asa_output_?.out"),
    "BASE + OS_augment_CCL_2_UM": glob.glob("../../experiments/ex0_base/asa_ex4_base+os_augment_ccl_2_um/asa_output_?.out"),
    
#     "TTS_CL + BASE": glob.glob("../../experiments/ex4_tts_cl/asa_ex0_base/asa_output_?.out"),
    
}

In [22]:
asa_exp_names = [
    "BASE + BASE (original)",
    
    # Baselines a -------
    "BASE + BASE",
    "BASE_same + BASE",
    "BASE_OS + BASE",
    
    # Experiment 1a
    "1. Time masking + BASE",  
    "2. Frequency masking + BASE",
    "3. Additive noise + BASE",
    "4. Reverberation + BASE",    
    "5. Pitch shift + BASE",
    "6. Tempo perturbation + BASE", 
    "7. Random transform + BASE",
    
    # Experiment 2a
    "OS_augment + BASE", 
    
    # Experiment 3
    "TTS + BASE", 
    
    # Baselines b -----
    "BASE + BASE",
    "BASE + BASE_same",
    "BASE + BASE_OS",
    
    # Experiment 1b
    "BASE + 1. Time masking",
    "BASE + 2. Frequency masking",
    "BASE + 3. Additive noise", 
    "BASE + 4. Reverberation",    
    "BASE + 5. Pitch shift", 
    "BASE + 6. Tempo perturbation",
    "BASE + 7. Random transform",
    
    # Experiment 2b
    "BASE + OS_augment",
    
    # Experiment 4
    "BASE + BASE_CCL",
    "BASE + BASE_CCL_UM",
    "BASE + BASE_CCL_2",
    "BASE + BASE_CCL_2_UM",
    
    "BASE + OS_augment_CCL",
    "BASE + OS_augment_CCL_UM",
    "BASE + OS_augment_CCL_2",
    "BASE + OS_augment_CCL_2_UM",
    
    "TTS_CL + BASE"
]

In [43]:
asa_exp_outputs_sv = {
    "BASE + BASE": glob.glob("../../experiments/sv_base/asa_base/asa_output_?.out"), 
    "BASE + OS_augment": glob.glob("../../experiments/sv_base/asa_os_augment/asa_output_?.out"),
    "BASE + OS_augment_CCL_2_UM": glob.glob("../../experiments/sv_base/asa_os_augment_2_um/asa_output_?.out"),
}

In [24]:
def add_asa_result_to_df(df, output_path, drop_class=[]):
    """
    params:
        df: the whole df 
        output_path: path to file that contains the prediction results
    return: 
        a df of this fold only, containing that results
    """
    fold = int(output_path.split("asa_output_")[-1][0])
    fold_df = df[df.split == fold]
    fold_df = fold_df[~fold_df.cefr_mean.isin(drop_class)]
    
    # get results from output path 
    labels = []
    predictions = []

    with open(output_path, 'r') as file:
        while True:
            line = file.readline().strip()
            if not line: 
                break 
            if "label" in line:
                labels.append(int(line.strip()[-1]) + 1)
            elif "pred" in line:
                predictions.append(int(line.strip()[-1]) + 1)

    assert len(fold_df) == len(labels), f"{len(fold_df)}, {len(labels)}" 
    assert all(fold_df.cefr_mean == np.array(labels))
    
    fold_df.insert(4, "Prediction", predictions, True)
    return fold_df

In [25]:
def get_aggregated_asa_df(df, paths, drop_class):
    dfs = []
    for path in paths:
        fold_df = add_asa_result_to_df(df, path, drop_class)
        dfs.append(fold_df)    
    return pd.concat(dfs)

In [26]:
def get_asa_metrics(y_true, y_pred, average="macro"):
    precision = 100*metrics.precision_score(y_true=y_true, y_pred=y_pred, average=average, zero_division=0)
    recall = 100*metrics.recall_score(y_true=y_true, y_pred=y_pred, average=average, zero_division=0)
    f1 = 100*metrics.f1_score(y_true=y_true, y_pred=y_pred, average=average, zero_division=0)
    kappa = 100*metrics.cohen_kappa_score(y1=y_true, y2=y_pred, weights="quadratic")
    
    cm = metrics.confusion_matrix(y_true, y_pred, normalize='true')
    
    return precision,recall, f1, kappa, cm

In [27]:
def dist_plot(data, df, plot_exps, metric="F1 score", bin_size=0.1, big=False):
    """
    input:
        - data (n_exp, B): matix containing the bootstrap metrics
        - df: df of the mean metrics 
        - plot_exps: list of name of experiments to include in the plot
    """
    inds = df.index[df.experiment.isin(plot_exps)]
    data = data[inds]
    df = df[df.experiment.isin(plot_exps)].reset_index()
    names = df.experiment
    
    fig = ff.create_distplot(data, group_labels=names, curve_type='kde', show_hist=True, bin_size=bin_size, colors=px.colors.qualitative.Plotly)
    fig.update_layout(xaxis=dict(title=metric), yaxis=dict(title="Probability"), 
                      legend=dict(x=0.995, xanchor='right', y=0.99),
                      width=1200 if big else 700, 
                      height=800 if big else 500, 
                      font=dict(size=20 if big else 10))
    for trace in fig.data:
        if trace.type == "histogram":
            trace.opacity=0.3
    return fig

In [28]:
def get_asa_stat_results(df, asa_results):
    """
    input:
        - df: df of the whole dataset
        - asa_results: Dict, containing all the paths to the output files 
    """
#     asa_result["df"][asa_result["df"].experiment.isin(asa_exp_names)]
    

    n_exp = len(asa_results)
    n_class = len(df.cefr_mean.unique())

    exps = []
    
    precision_boots = np.zeros((n_exp, B))
    recall_boots = np.zeros((n_exp,B))
    f1_boots = np.zeros((n_exp,B))
    kappa_boots = np.zeros((n_exp,B))
    
    precision_boot_avg = np.zeros((n_exp))
    recall_boot_avg = np.zeros((n_exp))
    f1_boot_avg = np.zeros((n_exp))
    kappa_boot_avg = np.zeros((n_exp))
    cm_avgs = np.zeros((n_exp, n_class, n_class))
    
    p_values = np.zeros((n_exp, 4))
    
    for i, (name, paths) in enumerate(asa_results.items()):
        print(name)
        
        drop_class = [0] if "original" in name else []
        
        if "original" not in name:
            cm_sum = np.zeros((n_class, n_class))
        
        df_with_results = get_aggregated_asa_df(df_original if "original" in name else df, 
                                                paths, drop_class=drop_class)
        n_students = len(df_with_results.index.unique())
        
        precisions = np.zeros((B))
        recalls = np.zeros((B))
        f1s = np.zeros((B))
        kappas = np.zeros((B))
        
        for b, ids in enumerate(sampled_students):
            sampled_df = df_with_results.loc[ids]
            precisions[b], recalls[b], f1s[b], kappas[b], cm = get_asa_metrics(sampled_df.cefr_mean, 
                                                                           sampled_df.Prediction)
            if "original" not in name:
                cm_sum += cm
        
        exps.append(name)
        precision_boots[i, :] = precisions
        recall_boots[i, :] = recalls
        f1_boots[i, :] = f1s
        kappa_boots[i, :] = kappas
        
        precision_boot_avg[i] = np.round(np.mean(precisions), 2)
        recall_boot_avg[i] = np.round(np.mean(recalls), 2)
        f1_boot_avg[i] = np.round(np.mean(f1s), 2)
        kappa_boot_avg[i] = np.round(np.mean(kappas), 2)
        if "original" not in name:
            cm_avgs[i, :] = cm_sum/B
        
    avg_metrics_df = pd.DataFrame(np.array([exps, precision_boot_avg, recall_boot_avg, f1_boot_avg, kappa_boot_avg]).T, 
                       columns=["experiment", "precision", "recall", "f1", "kappa"])
    return {"df": avg_metrics_df, 
            "precision": precision_boots, 
            "recall": recall_boots, 
            "f1": f1_boots, 
            "kappa": kappa_boots, 
             "cm": cm_avgs}

In [29]:
def plot_asa_results(df, plot_exps, exp_name="Experiment 1", metric="f1", metric_name="F1 score",
                     up=[], down=[], downdown=[], downdowndown=[], bottom=[], colours=[], 
                     big=False, width=700, height=900):
    """
    inputs: 
        - df: df containing the average bootstrap metrics
        - exp_name: str 
        - plot_exps: list, list of experiments to include in the plot 
        - metric: str, the metric to plot
        - up: list, list of exp names, move them up in the plot 
        - down: list, list of exp names, move them down in the plot
    """
    
#     df = df[df.experiment.isin(plot_exps)]
    df = df.set_index("experiment").loc[plot_exps].reset_index()
    df['x'] = exp_name    
    annotations = []

    for _, row in df.iterrows():
        annotations.append({
            "x": row.x, 
            "y": row[metric],
            "xref": "x", 
            "yref": "y",
            "xanchor":"left",
            "text": row.experiment,
            "showarrow": True,
            "ax": 20,
            "ay": 0,
            "font": {"size":20 if big else 12},
            "xshift":5, 
            "yshift":0
        })

    for i, a in enumerate(annotations):
        if a['text'] in up:
            annotations[i]['ay'] = -10
            annotations[i]['yshift'] = 5
            annotations[i]['xshift'] = 5
        elif a['text'] in down:
            annotations[i]['ay'] = 10
            annotations[i]['yshift'] = -5
            annotations[i]['xshift'] = 5
        elif a['text'] in downdown:
            annotations[i]['ay'] = 30
            annotations[i]['yshift'] = -5
            annotations[i]['xshift'] = 5
        elif a['text'] in downdowndown:
            annotations[i]['ay'] = 40
            annotations[i]['yshift'] = -5
            annotations[i]['xshift'] = 5
        elif a['text'] in bottom:
            annotations[i]['ay'] = 50
            annotations[i]['yshift'] = -5
            annotations[i]['xshift'] = 5

    fig = go.Figure()
    fig.add_trace(go.Scatter(x=df.x, 
                   y=df[metric].astype(float), 
                   mode='markers', 
                   marker=dict(color=px.colors.qualitative.Plotly if len(colours)==0 else colours, 
                               size=15 if big else 10)))

    fig.update_layout(plot_bgcolor='whitesmoke', 
                      width=width if big else 420, 
                      height=height if big else 600, 
                      annotations=annotations, 
                      xaxis=dict(range=[-.2, 3], showticklabels=True, title="ASA experiments"), 
                      yaxis=dict(title=metric_name), 
                      font=dict(size=20 if big else 12))
    
#     print(df.drop(columns=['x']).to_latex(index=False,
#               formatters={"name": str.upper},
#               float_format="{:.2f}".format,))
    return fig

#### Get metrics for all experiments
#### Take about 5 minutes to run

In [38]:
asa_result = get_asa_stat_results(df, asa_exp_outputs)

BASE + BASE (original)
BASE + BASE
BASE_same + BASE
BASE_OS + BASE
1. Time masking + BASE
2. Frequency masking + BASE
3. Additive noise + BASE
4. Reverberation + BASE
5. Pitch shift + BASE
6. Tempo perturbation + BASE
7. Random transform + BASE
OS_augment + BASE
TTS + BASE
BASE + BASE_same
BASE + BASE_OS
BASE + 1. Time masking
BASE + 2. Frequency masking
BASE + 3. Additive noise
BASE + 4. Reverberation
BASE + 5. Pitch shift
BASE + 6. Tempo perturbation
BASE + 7. Random transform
BASE + OS_augment
BASE + BASE_CCL
BASE + BASE_CCL_UM
BASE + BASE_CCL_2
BASE + BASE_CCL_2_UM
BASE + OS_augment_CCL
BASE + OS_augment_CCL_UM
BASE + OS_augment_CCL_2
BASE + OS_augment_CCL_2_UM


In [52]:
asa_result = get_asa_stat_results(df, asa_exp_outputs_sv)

BASE + BASE
BASE + OS_augment
BASE + OS_augment_CCL_2_UM


In [19]:
asa_result["df"]

Unnamed: 0,experiment,precision,recall,f1,kappa
0,BASE + BASE,38.16,34.6,35.31,65.02
1,BASE + OS_augment,41.29,40.01,40.2,66.38
2,BASE + OS_augment_CCL_2_UM,48.83,41.66,43.8,66.3


In [40]:
asa_exp_names = [
#     "BASE + BASE (original)",
    
    # Baselines a -------
#     "BASE + BASE",
#     "BASE_same + BASE",
#     "BASE_OS + BASE",
    
    # Experiment 1a
#     "1. Time masking + BASE",  
#     "2. Frequency masking + BASE",
#     "3. Additive noise + BASE",
#     "4. Reverberation + BASE",    
#     "5. Pitch shift + BASE",
#     "6. Tempo perturbation + BASE", 
#     "7. Random transform + BASE",
    
    # Experiment 2a
#     "OS_augment + BASE", 
    
#     Experiment 3
#     "TTS + BASE", 
    
#     Baselines b -----
#     "BASE + BASE",
#     "BASE + BASE_same",
#     "BASE + BASE_OS",
    
    # Experiment 1b
#     "BASE + 1. Time masking",
#     "BASE + 2. Frequency masking",
#     "BASE + 3. Additive noise", 
#     "BASE + 4. Reverberation",    
#     "BASE + 5. Pitch shift", 
#     "BASE + 6. Tempo perturbation",
#     "BASE + 7. Random transform",
    
    # Experiment 2b
    "BASE + OS_augment",
    
    # Experiment 4
#     "BASE + BASE_CCL",
#     "BASE + BASE_CCL_UM",
#     "BASE + BASE_CCL_2",
#     "BASE + BASE_CCL_2_UM",
    
#     "BASE + OS_augment_CCL",
#     "BASE + OS_augment_CCL_UM",
#     "BASE + OS_augment_CCL_2",
    "BASE + OS_augment_CCL_2_UM",
    
#     "TTS_CL + BASE"
    ]

In [83]:
# pd.DataFrame(asa_result["f1"].T, columns=asa_result["df"].experiment)

In [84]:
# sd = np.std(asa_result["f1"], axis=1)
# df_sd = pd.DataFrame(np.array([sd, asa_result['df'].f1]).T, index=asa_result['df'].experiment, columns=["SD", "mean"]).loc[asa_exp_names]
# df_sd["SD"] = df_sd["SD"].astype(float)
# df_sd["mean"] = df_sd["mean"].astype(float)

# # print(df_sd.to_latex(index=True,
# #               formatters={"name": str.upper},
# #               float_format="{:.2f}".format,))
# fig = px.scatter(df_sd, x="mean", y="SD")
# fig.update_layout(plot_bgcolor='whitesmoke', xaxis=dict(title="F1 Mean"), 
#                   yaxis=dict(title="F1 Standard Deviation"), 
#                   width=1400, height=700, font=dict(size=20))
# fig.update_traces(marker=dict(size=15))

In [47]:
# colours = ['#636EFA',
#              '#EF553B',
#              '#00CC96',
#              '#AB63FA',
#              '#FFA15A',
#              '#19D3F3',
#              '#FF6692',
#              '#B6E880',
#              '#FF97FF',
#             '#FECB52', 
#            "#B0A695", 
#            "#CC704B",
#           ]

# plot_asa_results(asa_result["df"], 
#                  asa_exp_names, 
# #                  exp_name=["Baselines"]*3+["Case1"]*9+["Baselines"]*2+["Case2"]*8,
#                  exp_name=["Case 1"] * 12 + ["Case 2"] * 11,
#                  metric="f1", 
#                  up=["4. Reverberation + BASE", "5. Pitch shift + BASE", "BASE + 6. Tempo perturbation", "BASE + 4. Reverberation"],
#                  down=["BASE + 3. Additive noise", "BASE + 2. Frequency masking", "6. Tempo perturbation + BASE", "7. Random transform",  "2. Frequency masking + BASE"],
#                  downdown=[ "BASE_OS + BASE", ],
#                  downdowndown=["7. Random transform + BASE",],
#                  bottom=["3. Additive noise + BASE",],
#                  colours=colours*2,
#                  big=True, 
#                  width=1000, 
#                  height=1200).show()

In [84]:
colours = px.colors.qualitative.Plotly + ["#B0A695", "#CC704B"]
plot_asa_results(asa_result["df"], 
                 asa_exp_names, 
                 exp_name="exp1",
#                   exp_name = ["Base + Base"] + ["Base + OS_augment"] + ["Base + Base"]*4 + ["Base + OS_augment"] * 4,
                  colours=[colours[0],colours[0]] + colours[1:5] + colours[1:5],
                 big=True, 
                 width=760, height=700).show()

In [51]:
dist_plot(asa_result["f1"], asa_result["df"], asa_exp_names, big=True)

# Confusion matrix

In [58]:
def plot_cms(cms, df, plot_exps, rows=5, cols=2):
    """
    input:
        - cms (n_exp, n_class, n_class): avg cm of all experiments
        - df: df contains the avg bootstrap results
        - plot_exps: list containing names of the experiments to include in the plot
    """
    df = pd.concat([df[df["experiment"]==exp] for exp in plot_exps])
    cms = cms[df.index, :, :]
    
    ex1a_heatmaps = make_subplots(rows=rows, cols=cols, subplot_titles=df.experiment.tolist(), 
                             horizontal_spacing=0.05, vertical_spacing=0.1, shared_xaxes=True, shared_yaxes=True)

    for i, matrix in enumerate(cms):
        fig = px.imshow(np.round(matrix, 2), text_auto=True)
        ticks = np.arange(matrix.shape[0])+1   
        ex1a_heatmaps.add_trace(fig.data[0], row=1+i//cols, col=1+i%cols)
    ex1a_heatmaps.update_annotations(font_size=25)
    ex1a_heatmaps.update_layout(coloraxis=dict(colorscale="blues_r"), width=500*cols, height=550*rows, font=dict(size=20))
    ex1a_heatmaps.update_yaxes(autorange="reversed", tickvals=ticks-1, ticktext=ticks)
    ex1a_heatmaps.update_yaxes(title="True scores", row=1, col=1)
    ex1a_heatmaps.update_xaxes(tickvals=ticks-1, ticktext=ticks, title="Predicted scores")
    ex1a_heatmaps.show()

In [59]:
asa_exp_names = [ "BASE + OS_augment","BASE + OS_augment_CCL_2_UM"]
plot_cms(asa_result["cm"], asa_result["df"], asa_exp_names, rows=1, cols=2)

In [87]:
asa_exp_names

['BASE_same + BASE', 'BASE + OS_augment']

# Compute the class order for ASA CCL

In [145]:
df_with_prediciton = get_aggregated_asa_df(df, asa_exp_outputs_sv["BASE + BASE"], drop_class=[])
df_with_prediciton

experiments/sv_base/asa_base/asa_output_2.out
experiments/sv_base/asa_base/asa_output_1.out
experiments/sv_base/asa_base/asa_output_3.out
experiments/sv_base/asa_base/asa_output_0.out


Unnamed: 0_level_0,sample,task,recording_path,cefr_mean,Prediction,split,text
student,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
4,4,1,/m/teamwork/t40511_asr/c/digitala/2020-2021_re...,3,3,2,aa jag har här potatis och ketsup och sallad m...
8,8,1,/m/teamwork/t40511_asr/c/digitala/2020-2021_re...,3,3,2,jag hade öö pommes frites och och lite sallad ...
12,12,1,/m/teamwork/t40511_asr/c/digitala/2020-2021_re...,3,2,2,där är så fin och jätte bra potatis
18,18,1,/m/teamwork/t40511_asr/c/digitala/2020-2021_re...,3,2,2,öö det var ööm köt och p potatis och ketsup oc...
23,23,1,/m/teamwork/t40511_asr/c/digitala/2020-2021_re...,3,3,2,öö min mat öö öö är bra jag har potatisk och s...
...,...,...,...,...,...,...,...
53,1990,18,/m/teamwork/t40511_asr/c/digitala/2020-2021_re...,2,2,0,du måste titta på dig öm instagram
162,1998,18,/m/teamwork/t40511_asr/c/digitala/2020-2021_re...,3,2,0,mm du måste öö titta på på mig instagram
70,2008,18,/m/teamwork/t40511_asr/c/digitala/2020-2021_re...,4,3,0,kan du min nyaste instagram bild
87,2018,18,/m/teamwork/t40511_asr/c/digitala/2020-2021_re...,3,2,0,hej min ny instagram foto


In [146]:
f1 = 100*metrics.f1_score(
    y_true=df_with_prediciton.cefr_mean, 
    y_pred=df_with_prediciton.Prediction, average=None)

In [150]:
f1_order = pd.DataFrame(np.array([[1, 2, 3, 4, 5, 6], f1]).T, columns=['CEFR', 'F_1']).sort_values(by=["F_1"], ascending=False)

In [151]:
print(f1_order.to_latex(index=False,
          formatters={"name": str.upper},
          float_format="{:.2f}".format,))

\begin{tabular}{rr}
\toprule
 CEFR &   F\_1 \\
\midrule
 3.00 & 62.32 \\
 2.00 & 57.79 \\
 4.00 & 50.45 \\
 5.00 & 41.67 \\
 1.00 &  0.00 \\
 6.00 &  0.00 \\
\bottomrule
\end{tabular}



  print(f1_order.to_latex(index=False,


In [28]:
np.sum(np.abs(asa_result["f1"][0]) >= 36.51)/1000

0.999

In [29]:
import numpy as np

# Assuming these are mean F1 scores for each model
f1_scores = np.array([31, 31.5, 31.6, 31.7])
groups = np.array(['Model1', 'Model2', 'Model3', 'Model4'])
# Perform one-way ANOVA
anova_result = stats.f_oneway(f1_scores[groups == 'Model1'],
                              f1_scores[groups == 'Model2'],
                              f1_scores[groups == 'Model3'],
                              f1_scores[groups == 'Model4'])
print("ANOVA Result: F-value =", anova_result.statistic, ", p-value =", anova_result.pvalue)


ANOVA Result: F-value = nan , p-value = nan




In [21]:
asa_result["df"]

Unnamed: 0,experiment,precision,recall,f1,kappa
0,BASE + BASE (original),43.09,41.27,40.1,79.76
1,BASE + BASE,33.52,36.3,34.51,79.98
2,BASE_same + BASE,34.07,37.05,35.0,80.74
3,BASE_OS + BASE,35.95,35.26,34.45,79.18
4,1. Time masking + BASE,34.31,37.09,35.32,80.4
5,2. Frequency masking + BASE,33.72,36.01,34.46,80.32
6,3. Additive noise + BASE,33.49,35.67,34.35,79.56
7,4. Reverberation + BASE,33.64,36.49,34.69,80.1
8,5. Pitch shift + BASE,34.09,36.91,35.07,80.52
9,6. Tempo perturbation + BASE,34.22,36.65,34.98,80.3


In [22]:
p_values_precision = []
p_values_recall = []
p_values_f1 = []
p_values_kappa = []

for i in range(len(asa_result["df"])):
    p = stats.ttest_ind(asa_result["precision"][22], asa_result["precision"][i]).pvalue
    p_values_precision.append(p)
    
    p = stats.ttest_ind(asa_result["recall"][22], asa_result["recall"][i]).pvalue
    p_values_recall.append(p)
    
    p = stats.ttest_ind(asa_result["f1"][22], asa_result["f1"][i]).pvalue
    p_values_f1.append(p)
    
    p = stats.ttest_ind(asa_result["kappa"][22], asa_result["kappa"][i]).pvalue
    p_values_kappa.append(p)

In [30]:
p_values = np.array([p_values_precision, p_values_recall, p_values_f1, p_values_kappa])

asa_p_values = pd.DataFrame(p_values.T, index=asa_result["df"].experiment, columns=["precision", "recall", "f1", "kappa"])
asa_p_values = asa_p_values.iloc[23:]

print(asa_p_values.to_latex(index=True,
                  formatters={"name": str.upper},
                  float_format="{:.6f}".format,))

\begin{tabular}{lrrrr}
\toprule
{} &  precision &   recall &       f1 &    kappa \\
experiment                 &            &          &          &          \\
\midrule
BASE + BASE\_CCL            &   0.000000 & 0.000000 & 0.000000 & 0.000000 \\
BASE + BASE\_CCL\_UM         &   0.000000 & 0.000000 & 0.000000 & 0.000000 \\
BASE + BASE\_CCL\_2          &   0.000000 & 0.000000 & 0.000000 & 0.000000 \\
BASE + BASE\_CCL\_2\_UM       &   0.000000 & 0.000000 & 0.000000 & 0.000000 \\
BASE + OS\_augment\_CCL      &   0.000000 & 0.000000 & 0.000000 & 0.000000 \\
BASE + OS\_augment\_CCL\_UM   &   0.000000 & 0.000000 & 0.000000 & 0.000000 \\
BASE + OS\_augment\_CCL\_2    &   0.000000 & 0.000000 & 0.000000 & 0.000000 \\
BASE + OS\_augment\_CCL\_2\_UM &   0.000000 & 0.000000 & 0.096255 & 0.870298 \\
\bottomrule
\end{tabular}



  print(asa_p_values.to_latex(index=True,


In [24]:
asa_p_values

Unnamed: 0_level_0,precision,recall,f1,kappa
experiment,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
BASE + BASE (original),3.617284e-248,1.361864e-310,3.592998e-150,1.303584e-07
BASE + BASE,0.0,0.0,0.0,3.203647e-24
BASE_same + BASE,0.0,9.602393e-225,0.0,2.391138e-138
BASE_OS + BASE,1.3205440000000001e-248,0.0,0.0,1.71675e-12
1. Time masking + BASE,0.0,6.989468999999999e-221,0.0,1.0818719999999999e-78
2. Frequency masking + BASE,0.0,0.0,0.0,9.286548999999999e-65
3. Additive noise + BASE,0.0,0.0,0.0,0.2625475
4. Reverberation + BASE,0.0,0.0,0.0,9.923761e-37
5. Pitch shift + BASE,0.0,1.2003229999999998e-246,0.0,1.337536e-97
6. Tempo perturbation + BASE,0.0,1.149567e-301,0.0,4.79263e-60
