In [1]:
import glob, yaml
import pandas as pd

from helper_functions import get_asa_metrics, get_aggregated_asa_df

### Load the dfs 
These dfs **do not contain** the **model predictions**.

In [24]:
with open("../../config.yml") as file:
    train_config = yaml.safe_load(file)

csv_path_fi = train_config["data_args"]["csv_fi"]
csv_path_sv = train_config["data_args"]["csv_sv"]

usecols=['sample', 'student', 'recording_path', 
         'cefr_mean', 'split', 'transcript_normalized']

df_fi = pd.read_csv(csv_path_fi, usecols=usecols+["task_id"])
df_sv = pd.read_csv(csv_path_sv, usecols=usecols+["task"])

df_fi.head()

Unnamed: 0,sample,student,task_id,recording_path,cefr_mean,split,transcript_normalized
0,782,1,1,/m/teamwork/t40511_asr/c/digitala/DigiTala_201...,5,1,jos saisin valita tärkeimmän paikan minulle se...
1,181,3,1,/m/teamwork/t40511_asr/c/digitala/DigiTala_201...,4,1,minä kertoon minun kesämökistä öö se on paras ...
2,913,4,1,/m/teamwork/t40511_asr/c/digitala/DigiTala_201...,5,3,tärkeä paikka minulle on makuuhuoneeni siellä ...
3,1822,5,1,/m/teamwork/t40511_asr/c/digitala/DigiTala_201...,5,2,minulle tärkeä paikka on mun olohuone koska vo...
4,12,7,1,/m/teamwork/t40511_asr/c/digitala/DigiTala_201...,6,1,tämä paikka on minulle tosi tärkeä koska se an...


### Get results file paths 

These output files **contain the predictions** of the best model (with the highest $F_1$) on the test subsets of the folds (4 folds in total).

In [18]:
result_paths_fi = {
    "BASE": glob.glob("../../experiments/ex0_base/asa_ex0_no_augment_no_drop/asa_output_?.out"),
    "BASE_CCL": glob.glob("../../experiments/ex0_base/asa_ex4_base+base_ccl/asa_output_?.out"),
    "BASE_CCL_2": glob.glob("../../experiments/ex0_base/asa_ex4_base+base_ccl_2/asa_output_?.out"),
    "BASE_CCL_UM": glob.glob("../../experiments/ex0_base/asa_ex4_base+base_ccl_um/asa_output_?.out"),
    "BASE_CCL_2_UM": glob.glob("../../experiments/ex0_base/asa_ex4_base+base_ccl_2_um/asa_output_?.out"),
    
    "OS": glob.glob("../../experiments/ex0_base/asa_ex2_resample_cefr/asa_output_?.out"),
    "OS_CCL": glob.glob("../../experiments/ex0_base/asa_ex4_base+os_augment_ccl/asa_output_?.out"),
    "OS_CCL_2": glob.glob("../../experiments/ex0_base/asa_ex4_base+os_augment_ccl_2/asa_output_?.out"),
    "OS_CCL_UM": glob.glob("../../experiments/ex0_base/asa_ex4_base+os_augment_ccl_um/asa_output_?.out"),
    "OS_CCL_2_UM": glob.glob("../../experiments/ex0_base/asa_ex4_base+os_augment_ccl_2_um/asa_output_?.out")
}

In [19]:
result_paths_sv = {
    "BASE": glob.glob("../../experiments/sv_base/asa_base/asa_output_?.out"),
    "BASE_CCL": glob.glob("../../experiments/sv_base/asa_base_ccl/asa_output_?.out"),
    "BASE_CCL_2": glob.glob("../../experiments/sv_base/asa_base_ccl_2/asa_output_?.out"),
    "BASE_CCL_UM": glob.glob("../../experiments/sv_base/asa_base_ccl_um/asa_output_?.out"),
    "BASE_CCL_2_UM": glob.glob("../../experiments/sv_base/asa_base_ccl_2_um/asa_output_?.out"),
    
    "OS": glob.glob("../../experiments/sv_base/asa_os_augment/asa_output_?.out"),
    "OS_CCL": glob.glob("../../experiments/sv_base/asa_os_augment_ccl/asa_output_?.out"),
    "OS_CCL_2": glob.glob("../../experiments/sv_base/asa_os_augment_ccl_2/asa_output_?.out"),
    "OS_CCL_UM": glob.glob("../../experiments/sv_base/asa_os_augment_ccl_um/asa_output_?.out"),
    "OS_CCL_2_UM": glob.glob("../../experiments/sv_base/asa_os_augment_ccl_2_um/asa_output_?.out")
}

### Gather the predictions of each experiments to the same df

In [27]:
# For each experiment
# We add the predction of the best model as a column to the df

# get_aggregated_asa_df is a helper function that puts
# the model predictions in the output file as a column
# and returns the final df

# 1. Finnish 
for exp_name, result_paths in result_paths_fi.items():
    df = get_aggregated_asa_df(df_fi, result_paths, []).sort_index()
    df_fi[exp_name] = df.Prediction

# 2. Finland Swedish
for exp_name, result_paths in result_paths_sv.items():
    df = get_aggregated_asa_df(df_sv, result_paths, []).sort_index()
    df_sv[exp_name] = df.Prediction

In [28]:
# Inspect the final dfs
df_fi.head()

Unnamed: 0,sample,student,task_id,recording_path,cefr_mean,split,transcript_normalized,BASE,BASE_CCL,BASE_CCL_2,BASE_CCL_UM,BASE_CCL_2_UM,OS,OS_CCL,OS_CCL_2,OS_CCL_UM,OS_CCL_2_UM
0,782,1,1,/m/teamwork/t40511_asr/c/digitala/DigiTala_201...,5,1,jos saisin valita tärkeimmän paikan minulle se...,5,5,5,5,5,5,5,5,5,6
1,181,3,1,/m/teamwork/t40511_asr/c/digitala/DigiTala_201...,4,1,minä kertoon minun kesämökistä öö se on paras ...,4,5,4,4,5,4,4,4,4,4
2,913,4,1,/m/teamwork/t40511_asr/c/digitala/DigiTala_201...,5,3,tärkeä paikka minulle on makuuhuoneeni siellä ...,5,6,5,5,5,5,6,6,6,6
3,1822,5,1,/m/teamwork/t40511_asr/c/digitala/DigiTala_201...,5,2,minulle tärkeä paikka on mun olohuone koska vo...,5,5,6,6,5,6,5,6,5,5
4,12,7,1,/m/teamwork/t40511_asr/c/digitala/DigiTala_201...,6,1,tämä paikka on minulle tosi tärkeä koska se an...,4,5,5,5,5,4,5,5,5,5


### For each experiment, get metrics of the model predictions
Metrics used:
- $F_1$
- Precision
- Recall
- $\kappa$ (quadratically weighted kappa)

In [32]:
metrics_fi = []
metrics_sv = []

for exp_name, _ in result_paths_fi.items():
    y_true, y_pred = df_fi["cefr_mean"], df_fi[exp_name]
    metrics = get_asa_metrics(y_true, y_pred) # precision,recall, f1, kappa
    metrics_fi.append(metrics)

for exp_name, _ in result_paths_sv.items():
    y_true, y_pred = df_sv["cefr_mean"], df_sv[exp_name]
    metrics = get_asa_metrics(y_true, y_pred)
    metrics_sv.append(metrics)

In [51]:
columns=["precision","recall", "f1", "kappa"]

df_metrics_fi = pd.DataFrame(metrics_fi, columns=columns,
                             index=result_paths_fi.keys())

df_metrics_sv = pd.DataFrame(metrics_sv, columns=columns, 
                             index=result_paths_sv.keys())

# multiply precision, recall and f1 by 100 -> %
df_metrics_fi.iloc[:,:3] = df_metrics_fi.iloc[:,:3]*100
df_metrics_sv.iloc[:,:3] = df_metrics_sv.iloc[:,:3]*100

In [60]:
# df_metrics_fi

In [61]:
df_metrics_sv.max()

precision    56.406783
recall       41.878642
f1           45.174396
kappa         0.673999
dtype: float64

### Print tables for latex

In [58]:
print(df_metrics_fi.to_latex(index=True, float_format="{:.3f}".format))

\begin{tabular}{lrrrr}
\toprule
{} &  precision &  recall &     f1 &  kappa \\
\midrule
BASE          &     33.555 &  36.364 & 34.589 &  0.801 \\
BASE\_CCL      &     32.626 &  34.378 & 33.017 &  0.790 \\
BASE\_CCL\_2    &     33.379 &  36.147 & 34.390 &  0.800 \\
BASE\_CCL\_UM   &     32.586 &  35.551 & 33.110 &  0.805 \\
BASE\_CCL\_2\_UM &     33.316 &  35.795 & 33.996 &  0.801 \\
OS            &     39.060 &  38.864 & 38.611 &  0.797 \\
OS\_CCL        &     37.388 &  38.279 & 37.271 &  0.791 \\
OS\_CCL\_2      &     37.799 &  38.062 & 37.792 &  0.780 \\
OS\_CCL\_UM     &     36.143 &  37.662 & 36.334 &  0.803 \\
OS\_CCL\_2\_UM   &     41.620 &  38.261 & 38.803 &  0.797 \\
\bottomrule
\end{tabular}



  print(df_metrics_fi.to_latex(index=True, float_format="{:.3f}".format))


In [56]:
print(df_metrics_sv.to_latex(index=True, float_format="{:.2f}".format))

\begin{tabular}{lrrrr}
\toprule
{} &  precision &  recall &    f1 &  kappa \\
\midrule
BASE          &      38.18 &   34.62 & 35.37 &   0.65 \\
BASE\_CCL      &      37.43 &   29.53 & 28.33 &   0.60 \\
BASE\_CCL\_2    &      37.75 &   32.94 & 33.37 &   0.64 \\
BASE\_CCL\_UM   &      38.17 &   30.92 & 29.32 &   0.62 \\
BASE\_CCL\_2\_UM &      37.96 &   33.88 & 34.60 &   0.63 \\
OS            &      44.22 &   39.65 & 40.84 &   0.67 \\
OS\_CCL        &      47.79 &   41.59 & 43.38 &   0.67 \\
OS\_CCL\_2      &      52.76 &   41.88 & 45.17 &   0.67 \\
OS\_CCL\_UM     &      56.41 &   41.57 & 42.78 &   0.65 \\
OS\_CCL\_2\_UM   &      48.68 &   41.44 & 43.97 &   0.66 \\
\bottomrule
\end{tabular}



  print(df_metrics_sv.to_latex(index=True, float_format="{:.2f}".format))
