In [2]:
from sklearn.metrics import accuracy_score,classification_report, precision_recall_fscore_support
from sklearn.metrics import f1_score
from sklearn.metrics import confusion_matrix
import pandas as pd
import os
import numpy as np
import json
import re

In [3]:
with open('./test_golden_labels/GYAFC_test_labels.json') as f:
    t = json.load(f)
    
len(t)

41600

In [4]:
with open('./test_results_all_but_en/distilbert-base-multilingual-cased_ep5_wus298_lr1e-05_batchpergpu256_gpu2_langall_but_en.json') as f:
    t = json.load(f)
    
len(t)

41605

In [5]:
def highlight_max(s):
    if s.dtype == np.object:
        is_max = [False for _ in range(s.shape[0])]
    else:
        is_max = s == s.max()
    return ['background: lightgreen' if cell else '' for cell in is_max]

In [6]:
def get_metrics(true_labels, predicted_labels):
        
#     prec, rec, fscore, _ = precision_recall_fscore_support(true_labels, predicted_labels)
    
    acc = accuracy_score(true_labels, predicted_labels)
    
#     fsc_macr = f1_score(true_labels, predicted_labels, average='macro')
    
    res_dict = {
                "acc": acc,
#                 "form_prec": prec[0],
#                "form_rec": rec[0],
#                "form_fscore": fscore[0],
        
#                 "inform_prec": prec[1],
#                 "inform_rec": rec[1],
#                 "inform_fscore": fscore[1],
#                 "fsc_macr": fsc_macr
               }
    
    return res_dict    

In [8]:
exclusive_lang = ["en","it", "pt", "fr"]
lang_list = []
for lang in exclusive_lang:
    lang_list.append(lang+"_only")
    
for lang in exclusive_lang:
    lang_list.append("all_but_"+lang)
  
lang_list.append(f"all")
for lang in exclusive_lang:
    lang_list.append(f"all2{lang}")
    
    
LANG_RES_DICT = {}
    
for lang in lang_list:
    
    if "all_but" in lang:
        
        pure_lang = lang.split("_")[-1]
        
        pure_lang = f"{pure_lang}_only" if pure_lang != "en" else "GYAFC_test_labels_no_drop"
        
        LANG_RES_DICT[lang] = {"preds_folder":f"test_results_{lang}/", 
                        "golden_path": f"./test_golden_labels/{pure_lang}.json"}
        
    elif "all2" in lang:
        
        pure_lang = lang[len("all2"):]
        pure_lang = f"{pure_lang}_only" if pure_lang != "en" else "GYAFC_test_labels_no_drop"
        
        LANG_RES_DICT[lang] = {"preds_folder":f"test_results_{lang}/", 
                        "golden_path": f"./test_golden_labels/{pure_lang}.json"}
        
    elif "only" in lang:
        pure_lang = lang.split("_")[0]
        
        pure_lang = f"{pure_lang}_only" if pure_lang != "en" else "GYAFC_test_labels"
        
        LANG_RES_DICT[lang] = {"preds_folder":f"test_results_{lang}/", 
                        "golden_path": f"./test_golden_labels/{pure_lang}.json"}
        
    elif lang == "all":
        LANG_RES_DICT[lang] = {"preds_folder":f"test_results_all/", 
                        "golden_path": f"./test_golden_labels/all.json"}
        
    else:
        raise Exception("Unhandled!")


RESULTS_REPORT_DICT = {}

In [9]:
lang_list

['en_only',
 'it_only',
 'pt_only',
 'fr_only',
 'all_but_en',
 'all_but_it',
 'all_but_pt',
 'all_but_fr',
 'all',
 'all2en',
 'all2it',
 'all2pt',
 'all2fr']

In [10]:
LANG_RES_DICT

{'en_only': {'preds_folder': 'test_results_en_only/',
  'golden_path': './test_golden_labels/GYAFC_test_labels.json'},
 'it_only': {'preds_folder': 'test_results_it_only/',
  'golden_path': './test_golden_labels/it_only.json'},
 'pt_only': {'preds_folder': 'test_results_pt_only/',
  'golden_path': './test_golden_labels/pt_only.json'},
 'fr_only': {'preds_folder': 'test_results_fr_only/',
  'golden_path': './test_golden_labels/fr_only.json'},
 'all_but_en': {'preds_folder': 'test_results_all_but_en/',
  'golden_path': './test_golden_labels/GYAFC_test_labels_no_drop.json'},
 'all_but_it': {'preds_folder': 'test_results_all_but_it/',
  'golden_path': './test_golden_labels/it_only.json'},
 'all_but_pt': {'preds_folder': 'test_results_all_but_pt/',
  'golden_path': './test_golden_labels/pt_only.json'},
 'all_but_fr': {'preds_folder': 'test_results_all_but_fr/',
  'golden_path': './test_golden_labels/fr_only.json'},
 'all': {'preds_folder': 'test_results_all/',
  'golden_path': './test_golde

# ditilbert multilingual report

In [58]:
def get_folder_stat(test_res, golden_file_path, select_model = "distilbert"):
    
    models_index = []
    data = []

    
    with open(golden_file_path) as f:
        golden_labels = json.load(f)
        
    for model_pred_path in os.listdir(test_res):
        
        if select_model in model_pred_path:

            test_predicts_path = os.path.join(test_res, model_pred_path)

            with open(test_predicts_path) as f:
                test_predicts_list = json.load(f)
                
            model_pred_path = re.sub("distilbert-base-multilingual-cased","disbert_multi_case", model_pred_path)
            model_pred_path = re.sub("batchpergpu","b", model_pred_path)

            if select_model == "distilbert":
                
                model_pred_path = model_pred_path[len("disbert_multi_case_ep5_"):]


            models_index.append(model_pred_path)
            
            if len(golden_labels) < len(test_predicts_list):
                with open('./test_golden_labels/GYAFC_test_labels_no_drop.json') as f:
                    golden_labels = json.load(f)

            res_dict_i = get_metrics(golden_labels, test_predicts_list)
            data.append(res_dict_i)
    df_report = pd.DataFrame(data, index = models_index)
    if len(df_report) > 0:
        df_report = df_report.sort_values(by=["acc"], ascending=False)
    
    return df_report


dfr = get_folder_stat(LANG_RES_DICT["fr_only"]["preds_folder"],LANG_RES_DICT["fr_only"]["golden_path"])

In [12]:
dfr

Unnamed: 0,acc
wus297_lr1e-05_b256_gpu2_langfr_only.json,0.791492
wus297_lr1e-06_b256_gpu2_langfr_only.json,0.779159
wus99_lr1e-06_b256_gpu2_langfr_only.json,0.778448


In [13]:
dfr.iloc[0].name, dfr.iloc[0].acc

('wus297_lr1e-05_b256_gpu2_langfr_only.json', 0.7914919700870418)

In [14]:
report_data = []
report_data_index = []

for lang in lang_list:
    
    if "all2" in lang or lang == "all":
        
        print(lang)
        
        dfr = get_folder_stat(LANG_RES_DICT[lang]["preds_folder"],LANG_RES_DICT[lang]["golden_path"])
        
        best_all = dfr.loc["wus397_lr1e-05_b256_gpu2_langall.json"]
        
        RESULTS_REPORT_DICT[lang] = {"model":best_all.name,
                                    "acc":best_all.acc}
        
            
    elif os.path.isdir(LANG_RES_DICT[lang]["preds_folder"]):
        
        print(lang)
        
        dfr = get_folder_stat(LANG_RES_DICT[lang]["preds_folder"],LANG_RES_DICT[lang]["golden_path"])

        
        
        RESULTS_REPORT_DICT[lang] = {"model":dfr.iloc[0].name,
                                    "acc":dfr.iloc[0].acc}

        
    else:
        
        RESULTS_REPORT_DICT[lang] = {"model":"ToDo",
                                    "acc":"ToDo"}
        
    report_data_index.append(lang)
    report_data.append(RESULTS_REPORT_DICT[lang])
    
df_model_report = pd.DataFrame(report_data)
df_model_report.index = report_data_index
df_model_report

en_only
it_only
pt_only
fr_only
all_but_en
all_but_it
all_but_pt
all_but_fr
all
all2en
all2it
all2pt
all2fr


Unnamed: 0,model,acc
en_only,wus399_lr1e-05_batch256.json,0.866178
it_only,wus298_lr1e-05_b256_gpu2_langit_only.json,0.768148
pt_only,wus597_lr1e-05_b128_gpu2_langpt_only.json,0.759404
fr_only,wus297_lr1e-05_b256_gpu2_langfr_only.json,0.791492
all_but_en,wus298_lr1e-05_b256_gpu2_langall_but_en.json,0.836342
all_but_it,wus298_lr1e-05_b256_gpu2_langall_but_it.json,0.750539
all_but_pt,wus298_lr1e-05_b256_gpu2_langall_but_pt.json,0.737654
all_but_fr,wus298_lr1e-05_b256_gpu2_langall_but_fr.json,0.771141
all,wus397_lr1e-05_b256_gpu2_langall.json,0.794434
all2en,wus397_lr1e-05_b256_gpu2_langall.json,0.858551


In [136]:
df_model_report["acc"] = df_model_report["acc"]*100
df_model_report["acc"] = df_model_report["acc"].apply(lambda x: rnd(x))

df_model_report

Unnamed: 0,model,acc
en_only,wus399_lr1e-05_batch256.json,86.617788
it_only,wus298_lr1e-05_b256_gpu2_langit_only.json,76.814753
pt_only,wus597_lr1e-05_b128_gpu2_langpt_only.json,75.940437
fr_only,wus297_lr1e-05_b256_gpu2_langfr_only.json,79.149197
all_but_en,wus298_lr1e-05_b256_gpu2_langall_but_en.json,83.634179
all_but_it,wus298_lr1e-05_b256_gpu2_langall_but_it.json,75.053879
all_but_pt,wus298_lr1e-05_b256_gpu2_langall_but_pt.json,73.765409
all_but_fr,wus298_lr1e-05_b256_gpu2_langall_but_fr.json,77.114135
all,wus397_lr1e-05_b256_gpu2_langall.json,79.443368
all2en,wus397_lr1e-05_b256_gpu2_langall.json,85.855065


In [15]:
def rnd(val):
    val = round(val,1)
    return float(str(val)[:4])

In [16]:

df_model_report

Unnamed: 0,model,acc
en_only,wus399_lr1e-05_batch256.json,0.9
it_only,wus298_lr1e-05_b256_gpu2_langit_only.json,0.8
pt_only,wus597_lr1e-05_b128_gpu2_langpt_only.json,0.8
fr_only,wus297_lr1e-05_b256_gpu2_langfr_only.json,0.8
all_but_en,wus298_lr1e-05_b256_gpu2_langall_but_en.json,0.8
all_but_it,wus298_lr1e-05_b256_gpu2_langall_but_it.json,0.8
all_but_pt,wus298_lr1e-05_b256_gpu2_langall_but_pt.json,0.7
all_but_fr,wus298_lr1e-05_b256_gpu2_langall_but_fr.json,0.8
all,wus397_lr1e-05_b256_gpu2_langall.json,0.8
all2en,wus397_lr1e-05_b256_gpu2_langall.json,0.9


In [17]:
df_model_report

Unnamed: 0,model,acc
en_only,wus399_lr1e-05_batch256.json,0.9
it_only,wus298_lr1e-05_b256_gpu2_langit_only.json,0.8
pt_only,wus597_lr1e-05_b128_gpu2_langpt_only.json,0.8
fr_only,wus297_lr1e-05_b256_gpu2_langfr_only.json,0.8
all_but_en,wus298_lr1e-05_b256_gpu2_langall_but_en.json,0.8
all_but_it,wus298_lr1e-05_b256_gpu2_langall_but_it.json,0.8
all_but_pt,wus298_lr1e-05_b256_gpu2_langall_but_pt.json,0.7
all_but_fr,wus298_lr1e-05_b256_gpu2_langall_but_fr.json,0.8
all,wus397_lr1e-05_b256_gpu2_langall.json,0.8
all2en,wus397_lr1e-05_b256_gpu2_langall.json,0.9


In [18]:
from collections import defaultdict
# points_dct = defaultdict(lambda: 0)
# points_dct[0]+=1

In [125]:
points_dct = defaultdict(lambda: 0)


for lang in lang_list[-5:]:
    if os.path.isdir(LANG_RES_DICT[lang]["preds_folder"]):
        print(lang)
        dfr = get_folder_stat(LANG_RES_DICT[lang]["preds_folder"],LANG_RES_DICT[lang]["golden_path"])
        
        for place, model in enumerate(dfr.index.tolist()):
#             points_dct[model] += (5-place)  #(5-place) place
            points_dct[model] += place
        
        print(dfr)
        print("-"*100)

all
                                             acc
wus397_lr1e-05_b256_gpu2_langall.json   0.794434
wus1193_lr1e-05_b256_gpu2_langall.json  0.794239
wus1193_lr1e-06_b256_gpu2_langall.json  0.789786
wus397_lr1e-06_b256_gpu2_langall.json   0.789774
----------------------------------------------------------------------------------------------------
all2en
                                             acc
wus397_lr1e-06_b256_gpu2_langall.json   0.862709
wus1193_lr1e-06_b256_gpu2_langall.json  0.862613
wus397_lr1e-05_b256_gpu2_langall.json   0.858551
wus1193_lr1e-05_b256_gpu2_langall.json  0.858262
----------------------------------------------------------------------------------------------------
all2it
                                             acc
wus1193_lr1e-05_b256_gpu2_langall.json  0.768025
wus397_lr1e-05_b256_gpu2_langall.json   0.767903
wus1193_lr1e-06_b256_gpu2_langall.json  0.760139
wus397_lr1e-06_b256_gpu2_langall.json   0.759943
---------------------------------------------

In [114]:
dfr.loc["disbert_multi_case_ep5_wus397_lr1e-05_b256_gpu2_langall.json"].acc

0.7944336829794233

In [117]:
dfr.loc["disbert_multi_case_ep5_wus397_lr1e-05_b256_gpu2_langall.json"].name

'disbert_multi_case_ep5_wus397_lr1e-05_b256_gpu2_langall.json'

In [126]:
points_dct

defaultdict(<function __main__.<lambda>()>,
            {'wus397_lr1e-05_b256_gpu2_langall.json': 3,
             'wus1193_lr1e-05_b256_gpu2_langall.json': 6,
             'wus1193_lr1e-06_b256_gpu2_langall.json': 10,
             'wus397_lr1e-06_b256_gpu2_langall.json': 11})

# bilstm table 5

In [43]:
lang_list

['en_only',
 'it_only',
 'pt_only',
 'fr_only',
 'all_but_en',
 'all_but_it',
 'all_but_pt',
 'all_but_fr',
 'all',
 'all2en',
 'all2it',
 'all2pt',
 'all2fr']

In [44]:
lang_list_short = ['en_only',
 'it_only',
 'pt_only',
 'fr_only',
  'all']

In [45]:
report_data = []
report_data_index = []

for lang in lang_list_short:
    
    dfr = get_folder_stat(LANG_RES_DICT[lang]["preds_folder"],
                              LANG_RES_DICT[lang]["golden_path"],
                             select_model = "bilstm")

    
    if len(dfr) > 0:
        RESULTS_REPORT_DICT[lang] = {"model":dfr.iloc[0].name,
                                    "acc":dfr.iloc[0].acc}
    else:
        RESULTS_REPORT_DICT[lang] = {"model":"ToDo",
                                    "acc":"ToDo"}
        
    report_data_index.append(lang)
    report_data.append(RESULTS_REPORT_DICT[lang])
    
df_model_report = pd.DataFrame(report_data)
df_model_report.index = report_data_index
df_model_report

Unnamed: 0,model,acc
en_only,bilstm_ds_gyafc_lang_en_only_lr1e-05_ed100_hd5...,0.869511
it_only,bilstm_ds_xformal_lang_it_only_lr1e-05_ed50_hd...,0.790556
pt_only,bilstm_ds_xformal_lang_pt_only_lr1e-05_ed100_h...,0.759258
fr_only,bilstm_ds_xformal_lang_fr_only_lr1e-05_ed100_h...,0.813216
all,bilstm_ds_xformal_lang_all_lr1e-05_ed100_hd100...,0.827486


In [46]:
df_model_report["acc"] = df_model_report["acc"]*100
df_model_report["acc"] = df_model_report["acc"].apply(lambda x: rnd(x))

In [47]:
df_model_report

Unnamed: 0,model,acc
en_only,bilstm_ds_gyafc_lang_en_only_lr1e-05_ed100_hd5...,87.0
it_only,bilstm_ds_xformal_lang_it_only_lr1e-05_ed50_hd...,79.1
pt_only,bilstm_ds_xformal_lang_pt_only_lr1e-05_ed100_h...,75.9
fr_only,bilstm_ds_xformal_lang_fr_only_lr1e-05_ed100_h...,81.3
all,bilstm_ds_xformal_lang_all_lr1e-05_ed100_hd100...,82.7


# mt5-base

In [59]:
report_data = []
report_data_index = []

for lang in lang_list_short:
    
    dfr = get_folder_stat(LANG_RES_DICT[lang]["preds_folder"],
                              LANG_RES_DICT[lang]["golden_path"],
                             select_model = "mt5")

    
    if len(dfr) > 0:
        RESULTS_REPORT_DICT[lang] = {"model":dfr.iloc[0].name,
                                    "acc":dfr.iloc[0].acc}
    else:
        RESULTS_REPORT_DICT[lang] = {"model":"ToDo",
                                    "acc":"ToDo"}
        
    report_data_index.append(lang)
    report_data.append(RESULTS_REPORT_DICT[lang])
    
df_model_report = pd.DataFrame(report_data)
df_model_report.index = report_data_index
df_model_report["acc"] = df_model_report["acc"]*100
df_model_report["acc"] = df_model_report["acc"].apply(lambda x: rnd(x))
df_model_report

Unnamed: 0,model,acc
en_only,google_mt5-base_ep5_wus6394_lr1e-05_batch16.json,83.4
it_only,google_mt5-base_ep5_wus254_lr1e-05_b200_gpu2_l...,72.9
pt_only,google_mt5-base_ep5_wus254_lr1e-05_b200_gpu2_l...,70.3
fr_only,google_mt5-base_ep5_wus254_lr1e-05_b200_gpu2_l...,72.4
all,google_mt5-base_ep5_wus1018_lr5e-05_b200_gpu2_...,78.2


# mbart-large

In [63]:
report_data = []
report_data_index = []

for lang in lang_list_short:
    
    dfr = get_folder_stat(LANG_RES_DICT[lang]["preds_folder"],
                              LANG_RES_DICT[lang]["golden_path"],
                             select_model = "mbart")

    
    if len(dfr) > 0:
        RESULTS_REPORT_DICT[lang] = {"model":dfr.iloc[0].name,
                                    "acc":dfr.iloc[0].acc}
    else:
        RESULTS_REPORT_DICT[lang] = {"model":"ToDo",
                                    "acc":"ToDo"}
        
    report_data_index.append(lang)
    report_data.append(RESULTS_REPORT_DICT[lang])
    
df_model_report = pd.DataFrame(report_data)
df_model_report.index = report_data_index
# df_model_report["acc"] = df_model_report["acc"]*100
# df_model_report["acc"] = df_model_report["acc"].apply(lambda x: rnd(x))
df_model_report

Unnamed: 0,model,acc
en_only,facebook_mbart-large-50_ep5_wus1598_lr1e-06_ba...,0.86875
it_only,facebook_mbart-large-50_ep5_wus254_lr1e-05_b20...,0.769103
pt_only,facebook_mbart-large-50_ep5_wus254_lr1e-05_b20...,0.758794
fr_only,facebook_mbart-large-50_ep5_wus198_lr1e-05_b12...,0.792546
all,ToDo,ToDo
