# Tabulate results


In [1]:
import os
import sys
from tqdm import tqdm
import pandas as pd
import os

sys.path.append('../src')
from read_log_file import read_log_file

In [2]:
LOG_HOME_DIR = os.path.join('../logs/')
assert os.path.isdir(LOG_HOME_DIR)

In [3]:
MODEL_NAMES = ['logistic_regression', 'transformer_encoder', 'bert-base-uncased', 'bert-base-multilingual-cased']

In [4]:
SETUPS = ['zero', 'few50', 'few100', 'few150', 'few200', 'full', 'trg']

In [5]:
def get_best_score_from_dict(di: dict) -> dict:
    """Get max value from a dict"""
    keys_with_max_val = []
    # find max value
    max_val = -float('inf')
    for k, v in di.items():
        if v > max_val:
            max_val = v
    # find all keys with max value
    for k, v in di.items():
        if v == max_val:
            keys_with_max_val.append(k)
    return {
        'k': keys_with_max_val,
        'v': max_val,
    }

In [6]:
def create_best_results_df(langs: str) -> pd.DataFrame:
    results_dict = {}
    for model_name in MODEL_NAMES:
        results_dict[model_name] = {}
        log_dir = os.path.join(LOG_HOME_DIR, langs, model_name)
        log_filenames = os.listdir(log_dir)
        for fname in log_filenames:
            results_dict[model_name][fname] = read_log_file(
                log_file_path=os.path.join(log_dir, fname),
                plot=False,
                verbose=False,
            )['best_val_metrics']['f1']

    best_results_dict = {'Setup': SETUPS}
    best_results_dict.update({model_name: [] for model_name in MODEL_NAMES})
    for model_name in MODEL_NAMES:
        for setup in SETUPS:
            best_results_dict[model_name].append(
                get_best_score_from_dict(
                    {k: v for k, v in results_dict[model_name].items() if k.startswith(f'{setup}_')}
                )['v']
            )

    best_results_df = pd.DataFrame(best_results_dict)
    return best_results_df

In [7]:
best_results_dfs_dict = {}
for langs in tqdm(['enbg', 'enar', 'bgen', 'bgar', 'aren', 'arbg']):
    best_results_dfs_dict[langs] = create_best_results_df(langs)

100%|██████████| 6/6 [00:01<00:00,  4.03it/s]


## en-bg

In [8]:
best_results_dfs_dict['enbg']

Unnamed: 0,Setup,logistic_regression,transformer_encoder,bert-base-uncased,bert-base-multilingual-cased
0,zero,0.508343,0.791592,0.803166,0.804159
1,few50,0.669023,0.805915,0.811297,0.810629
2,few100,0.789056,0.808465,0.81235,0.815694
3,few150,0.80144,0.812891,0.820823,0.815002
4,few200,0.806709,0.811863,0.813044,0.816025
5,full,0.817149,0.812225,0.819922,0.826783
6,trg,0.813775,0.809617,0.820598,0.825151


In [9]:
best_results_dfs_dict['enbg'].round(4).to_clipboard(index=False)

## en-ar

In [10]:
best_results_dfs_dict['enar']

Unnamed: 0,Setup,logistic_regression,transformer_encoder,bert-base-uncased,bert-base-multilingual-cased
0,zero,0.459554,0.543634,0.541674,0.626963
1,few50,0.602569,0.62838,0.588368,0.640045
2,few100,0.59967,0.655968,0.631603,0.699871
3,few150,0.62009,0.70856,0.655699,0.727245
4,few200,0.622177,0.714131,0.580542,0.706095
5,full,0.622177,0.714131,0.580542,0.706095
6,trg,0.644527,0.695322,0.581654,0.712644


In [11]:
best_results_dfs_dict['enar'].round(4).to_clipboard(index=False)

## bg-en

In [12]:
best_results_dfs_dict['bgen']

Unnamed: 0,Setup,logistic_regression,transformer_encoder,bert-base-uncased,bert-base-multilingual-cased
0,zero,0.439126,0.528149,0.476081,0.508174
1,few50,0.587053,0.605046,0.604485,0.595866
2,few100,0.590057,0.600062,0.633917,0.648412
3,few150,0.592898,0.594378,0.639445,0.648841
4,few200,0.592521,0.599739,0.647169,0.659728
5,full,0.589093,0.61602,0.682836,0.670561
6,trg,0.568931,0.624487,0.677584,0.649745


In [13]:
best_results_dfs_dict['bgen'].round(4).to_clipboard(index=False)

## bg-ar

In [14]:
best_results_dfs_dict['bgar']

Unnamed: 0,Setup,logistic_regression,transformer_encoder,bert-base-uncased,bert-base-multilingual-cased
0,zero,0.496158,0.58302,0.518447,0.577075
1,few50,0.565675,0.674656,0.557078,0.60641
2,few100,0.645268,0.64606,0.654343,0.666635
3,few150,0.642236,0.688185,0.60246,0.698114
4,few200,0.654307,0.709412,0.608944,0.676088
5,full,0.654307,0.709412,0.608944,0.676088
6,trg,0.480602,0.664462,0.581654,0.712644


In [15]:
best_results_dfs_dict['bgar'].round(4).to_clipboard(index=False)

## ar-en

In [16]:
best_results_dfs_dict['aren']

Unnamed: 0,Setup,logistic_regression,transformer_encoder,bert-base-uncased,bert-base-multilingual-cased
0,zero,0.200949,0.489936,0.499624,0.573033
1,few50,0.523237,0.587793,0.609163,0.597817
2,few100,0.511002,0.582618,0.652872,0.600951
3,few150,0.537679,0.607381,0.645421,0.621036
4,few200,0.571547,0.603234,0.635513,0.641816
5,full,0.565071,0.589381,0.664358,0.681317
6,trg,0.568931,0.624487,0.677584,0.649745


In [17]:
best_results_dfs_dict['aren'].round(4).to_clipboard(index=False)

## ar-bg

In [18]:
best_results_dfs_dict['arbg']

Unnamed: 0,Setup,logistic_regression,transformer_encoder,bert-base-uncased,bert-base-multilingual-cased
0,zero,0.458805,0.792784,0.807905,0.80272
1,few50,0.794631,0.791141,0.809904,0.814546
2,few100,0.797243,0.814881,0.813654,0.819369
3,few150,0.804574,0.814388,0.817971,0.822597
4,few200,0.802283,0.809488,0.811947,0.820767
5,full,0.813638,0.815279,0.821831,0.8264
6,trg,0.813775,0.809617,0.820598,0.825151


In [19]:
best_results_dfs_dict['arbg'].round(4).to_clipboard(index=False)