In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from tqdm import tqdm

import os
from typing import List

# For plots with LaTeX, might give issues depending on your local LaTeX installation
plt.rcParams.update({
    'text.usetex': True,
    'font.family': 'serif',
})

In [2]:
LANGS = [
    "bg",
    "cs",
    "de",
    "es",
    "lv",
    "af",
    "ar",
    "ca",
    "da",
    "el",
    "fa",
    "fi",
    "fr",
    "he",
    "hi",
    "hu",
    "it",
    "ja",
    "ko",
    "lt",
    "no",
    "pl",
    "pt",
    "ro",
    "ru",
    "sk",
    "sl",
    "sv",
    "ta",
    "th",
    "tr",
    "uk",
    "vi",
    "zh",
    "avg",
    "same",
]

METHODS = [
    #"baseline",
    "before_awesome",
    "before_dico",
    "before_fastalign",
    "during_awesome",
    "during_dico",
    "during_fastalign",
    "during_partial_freeze_back_awesome",
    "during_partial_freeze_back_dico",
    "during_partial_freeze_back_fastalign",
    "during_partial_freeze_front_awesome",
    "during_partial_freeze_front_dico",
    "during_partial_freeze_front_fastalign",
    "freeze_realign_unfreeze_awesome",
    "freeze_realign_unfreeze_dico",
    "freeze_realign_unfreeze_fastalign",
    "freeze_realign_unfreeze_last_6_awesome",
    "freeze_realign_unfreeze_last_6_dico",
    "freeze_realign_unfreeze_last_6_fastalign",
]

In [55]:
# directory = 'Re_ CometKiwi Pipeline'
directory = '/data0/felix/align_freeze/raw_results'
thresholds = [0, 25, 37, 50, 62, 75]
results = []

for threshold in thresholds:
    if threshold == 0:
        csv_file = os.path.join(directory, f"distilbert-base-multilingual-cased__opus100.csv")
    else:
        csv_file = os.path.join(directory, f"distilbert-base-multilingual-cased__opus100_filtered_percent_{threshold}.csv")
    df = pd.read_csv(csv_file)
    df["threshold"] = threshold
    results.append(df)
    
results = pd.concat(results, ignore_index=True)
results

Unnamed: 0,seed,model,task,method,finetuning_steps,realignment_steps,distinct_realignment_samples,repeated_realignment_samples,train_loss,realignment_loss,...,final_eval_sv_accuracy,final_eval_ta_accuracy,final_eval_th_accuracy,final_eval_tr_accuracy,final_eval_uk_accuracy,final_eval_vi_accuracy,final_eval_zh_accuracy,final_eval_avg_accuracy,final_eval_same_accuracy,threshold
0,31,distilbert-base-multilingual-cased,udpos,before_fastalign,1965,1965,31440,31440,0.060920,0.000000,...,0.899921,0.556979,0.532832,0.706698,0.853404,0.564617,0.661089,0.774327,0.960009,0
1,42,distilbert-base-multilingual-cased,udpos,before_fastalign,1965,1965,31440,31440,0.061314,0.000000,...,0.899555,0.553776,0.516987,0.712329,0.848858,0.560602,0.663836,0.769088,0.960599,0
2,66,distilbert-base-multilingual-cased,udpos,before_fastalign,1965,1965,31440,31440,0.030025,0.000000,...,0.903851,0.549199,0.537532,0.714534,0.852063,0.568883,0.668914,0.773126,0.959026,0
3,23,distilbert-base-multilingual-cased,udpos,before_fastalign,1965,1965,31440,31440,0.040779,0.000000,...,0.898507,0.546453,0.523119,0.710413,0.845477,0.558511,0.660839,0.771487,0.959380,0
4,17,distilbert-base-multilingual-cased,udpos,before_fastalign,1965,1965,31440,31440,0.057110,0.000000,...,0.899450,0.525858,0.524327,0.703564,0.847342,0.565705,0.656011,0.772058,0.959656,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
540,31,distilbert-base-multilingual-cased,udpos,during_partial_freeze_back_dico,1965,1965,3496,31400,0.188252,0.117115,...,0.900865,0.545080,0.534578,0.697353,0.842679,0.578503,0.667000,0.769491,0.958672,75
541,42,distilbert-base-multilingual-cased,udpos,during_partial_freeze_back_dico,1965,1965,3496,31400,0.154988,0.095324,...,0.901441,0.536842,0.536995,0.706060,0.836850,0.577666,0.664003,0.769687,0.959262,75
542,66,distilbert-base-multilingual-cased,udpos,during_partial_freeze_back_dico,1965,1965,3496,31400,0.114762,0.080945,...,0.905004,0.554233,0.549707,0.690330,0.841163,0.579172,0.667333,0.772195,0.959262,75
543,23,distilbert-base-multilingual-cased,udpos,during_partial_freeze_back_dico,1965,1965,3496,31400,0.142846,0.103482,...,0.900812,0.541419,0.541874,0.694509,0.845244,0.567461,0.658675,0.771711,0.958555,75


In [60]:
# Compute distinct realignment samples
for method in METHODS:
    print(f'{method}:  {list(results[(results.seed == 31) & (results.model == "distilbert-base-multilingual-cased") & (results.method == method)]["distinct_realignment_samples"])}')

baseline:  [0]
before_awesome:  [31440, 31440, 31440, 31440, 31440, 31440]
before_dico:  [19782, 15974, 12880, 10058, 7440, 3496]
before_fastalign:  [31440, 31440, 31440, 31440, 31440, 31440]
during_awesome:  [31440, 31440, 31440, 31440, 31440, 31440]
during_dico:  [31440, 31440, 31440, 31440, 31440, 3496]
during_fastalign:  [31440, 31440, 31440, 31440, 31440, 31440]
during_partial_freeze_back_awesome:  [31440, 31440, 31440, 31440, 31440, 31440]
during_partial_freeze_back_dico:  [31440, 31440, 31440, 31440, 31440, 3496]
during_partial_freeze_back_fastalign:  [31440, 31440, 31440, 31440, 31440, 31440]
during_partial_freeze_front_awesome:  [31440, 31440, 31440, 31440, 31440, 31440]
during_partial_freeze_front_dico:  [31440, 31440, 31440, 31440, 31440, 3496]
during_partial_freeze_front_fastalign:  [31440, 31440, 31440, 31440, 31440, 31440]
freeze_realign_unfreeze_awesome:  [31440, 31440, 31440, 31440, 31440, 31440]
freeze_realign_unfreeze_dico:  [19782, 15974, 12880, 10058, 7440, 3496]
fr

In [54]:
# Get the top performing runs-languages  

# get baseline resulte
key_to_baseline = {}
results = results[results.method == "baseline"]




Unnamed: 0,seed,model,task,method,finetuning_steps,realignment_steps,distinct_realignment_samples,repeated_realignment_samples,train_loss,realignment_loss,...,final_eval_sv_accuracy,final_eval_ta_accuracy,final_eval_th_accuracy,final_eval_tr_accuracy,final_eval_uk_accuracy,final_eval_vi_accuracy,final_eval_zh_accuracy,final_eval_avg_accuracy,final_eval_same_accuracy,threshold
0,31,distilbert-base-multilingual-cased,udpos,before_fastalign,1965,1965,31440,31440,0.061836,0.000000,...,0.900288,0.547368,0.520970,0.704435,0.847167,0.569552,0.662837,0.774695,0.960206,75
1,42,distilbert-base-multilingual-cased,udpos,before_fastalign,1965,1965,31440,31440,0.061632,0.000000,...,0.901022,0.541876,0.536234,0.706176,0.843845,0.569385,0.667749,0.770799,0.960127,75
2,66,distilbert-base-multilingual-cased,udpos,before_fastalign,1965,1965,31440,31440,0.036379,0.000000,...,0.904061,0.557437,0.528759,0.712967,0.846526,0.575408,0.664585,0.772637,0.959695,75
3,23,distilbert-base-multilingual-cased,udpos,before_fastalign,1965,1965,31440,31440,0.027465,0.000000,...,0.897878,0.553776,0.527640,0.706989,0.847983,0.570473,0.662671,0.772442,0.959813,75
4,17,distilbert-base-multilingual-cased,udpos,before_fastalign,1965,1965,31440,31440,0.046036,0.000000,...,0.899450,0.549199,0.528625,0.711864,0.841163,0.572146,0.664502,0.770632,0.960442,75
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
85,31,distilbert-base-multilingual-cased,udpos,during_partial_freeze_back_dico,1965,1965,3496,31400,0.188252,0.117115,...,0.900865,0.545080,0.534578,0.697353,0.842679,0.578503,0.667000,0.769491,0.958672,75
86,42,distilbert-base-multilingual-cased,udpos,during_partial_freeze_back_dico,1965,1965,3496,31400,0.154988,0.095324,...,0.901441,0.536842,0.536995,0.706060,0.836850,0.577666,0.664003,0.769687,0.959262,75
87,66,distilbert-base-multilingual-cased,udpos,during_partial_freeze_back_dico,1965,1965,3496,31400,0.114762,0.080945,...,0.905004,0.554233,0.549707,0.690330,0.841163,0.579172,0.667333,0.772195,0.959262,75
88,23,distilbert-base-multilingual-cased,udpos,during_partial_freeze_back_dico,1965,1965,3496,31400,0.142846,0.103482,...,0.900812,0.541419,0.541874,0.694509,0.845244,0.567461,0.658675,0.771711,0.958555,75


In [24]:
## Show that there can be competition between languages

82