# Tabulate results


In [1]:
import os
import sys
from typing import Tuple
import pandas as pd
from tabulate import tabulate
from tqdm import tqdm
sys.path.append('../src')
from read_log_file import read_log_file

In [2]:
LOG_HOME_DIR = os.path.join('../logs_v1/')
assert os.path.isdir(LOG_HOME_DIR)

In [3]:
MODEL_NAMES = ['logistic_regression', 'transformer_encoder', 'bert-base-uncased', 'bert-base-multilingual-cased']

In [4]:
SETUPS = ['zero', 'few50', 'few100', 'few150', 'few200', 'full', 'trg']

In [5]:
def get_best_score_from_dict(di: dict) -> dict:
    """Get max value from a dict"""
    keys_with_max_val = []
    # find max value
    max_val = -float('inf')
    for k, v in di.items():
        if v > max_val:
            max_val = v
    # find all keys with max value
    for k, v in di.items():
        if v == max_val:
            keys_with_max_val.append(k)
    return {
        'k': keys_with_max_val,
        'v': max_val,
    }

In [6]:
def create_best_results_df(langs: str) -> Tuple[pd.DataFrame, pd.DataFrame]:
    results_dict = {}
    for model_name in MODEL_NAMES:
        results_dict[model_name] = {}
        log_dir = os.path.join(LOG_HOME_DIR, langs, model_name)
        log_filenames = os.listdir(log_dir)
        for fname in log_filenames:
            results_dict[model_name][fname] = read_log_file(
                log_file_path=os.path.join(log_dir, fname),
                plot=False,
                verbose=False,
            )['best_val_metrics']['f1']

    best_results_dict = {'Setup': SETUPS}
    best_hparams_dict = {'Setup': SETUPS}
    best_results_dict.update({model_name: [] for model_name in MODEL_NAMES})
    best_hparams_dict.update({model_name: [] for model_name in MODEL_NAMES})
    for model_name in MODEL_NAMES:
        for setup in SETUPS:
            best_score = get_best_score_from_dict(
                {k: v for k, v in results_dict[model_name].items() if k.startswith(f'{setup}_')}
            )
            best_results_dict[model_name].append(
                best_score['v']
            )
            best_hparams_dict[model_name].append(
                best_score['k']
            )


    best_results_df = pd.DataFrame(best_results_dict)
    best_hparams_df = pd.DataFrame(best_hparams_dict)
    return best_results_df, best_hparams_df

In [7]:
def highlight_best_score(df: pd.DataFrame) -> pd.DataFrame:
    """Highlight best score in each row"""
    return df.style.apply(lambda x: ['background: red' if isinstance(v, float) and v == max(x.iloc[1:]) else '' for v in x], axis=1)

In [8]:
def tabulate_markdown(df: pd.DataFrame) -> str:
    """Tabulate in markdown format and bold best scores in each row"""
    df = df.round(4)
    for model_name in MODEL_NAMES:
        df[model_name] = df[model_name].astype(str)
    for idx in range(len(df)):
        max_val = max(float(df.iloc[idx][model_name]) for model_name in MODEL_NAMES)
        for model_name in MODEL_NAMES:
            cell_val = float(df.iloc[idx][model_name])
            if cell_val == max_val:
                df.at[idx, model_name] = f'**{cell_val}**'
            else:
                df.at[idx, model_name] = f'{cell_val}'

    return tabulate(df, headers='keys', showindex=False, tablefmt='github')


In [9]:
best_results_dfs_dict = {}
best_hparams_dfs_dict = {}
for langs in tqdm(['enbg', 'enar', 'bgen', 'bgar', 'aren', 'arbg']):
    best_results_dfs_dict[langs], best_hparams_dfs_dict[langs] = create_best_results_df(langs)

100%|██████████| 6/6 [00:19<00:00,  3.30s/it]


## en-bg

In [10]:
highlight_best_score(best_results_dfs_dict['enbg'])

Unnamed: 0,Setup,logistic_regression,transformer_encoder,bert-base-uncased,bert-base-multilingual-cased
0,zero,0.508343,0.791592,0.803166,0.804159
1,few50,0.669023,0.805915,0.811297,0.810629
2,few100,0.789056,0.808465,0.81235,0.815694
3,few150,0.80144,0.812891,0.820823,0.815002
4,few200,0.806709,0.811863,0.813044,0.816025
5,full,0.817149,0.812225,0.819922,0.826783
6,trg,0.813775,0.809617,0.820598,0.825151


In [11]:
print(tabulate_markdown(best_results_dfs_dict['enbg']))

| Setup   |   logistic_regression |   transformer_encoder | bert-base-uncased   | bert-base-multilingual-cased   |
|---------|-----------------------|-----------------------|---------------------|--------------------------------|
| zero    |                0.5083 |                0.7916 | 0.8032              | **0.8042**                     |
| few50   |                0.669  |                0.8059 | **0.8113**          | 0.8106                         |
| few100  |                0.7891 |                0.8085 | 0.8123              | **0.8157**                     |
| few150  |                0.8014 |                0.8129 | **0.8208**          | 0.815                          |
| few200  |                0.8067 |                0.8119 | 0.813               | **0.816**                      |
| full    |                0.8171 |                0.8122 | 0.8199              | **0.8268**                     |
| trg     |                0.8138 |                0.8096 | 0.8206              

In [12]:
best_hparams_dfs_dict['enbg']

Unnamed: 0,Setup,logistic_regression,transformer_encoder,bert-base-uncased,bert-base-multilingual-cased
0,zero,"[zero_hidden128_vocab16000.txt, zero_hidden128...","[zero_hidden512_vocab16000.txt, zero_hidden512...",[zero_fc512_lr0.05_frozen.txt],[zero_fc512_lr0.005_frozen.txt]
1,few50,"[few50_hidden256_vocab16000.txt, few50_hidden2...","[few50_hidden128_vocab16000.txt, few50_hidden1...",[few50_fc512_lr0.0005_frozen.txt],[few50_fc512_lr0.0005_frozen.txt]
2,few100,"[few100_hidden512_vocab16000.txt, few100_hidde...","[few100_hidden512_vocab16000.txt, few100_hidde...",[few100_fc256_lr0.0005_frozen.txt],[few100_fc512_lr0.0005_frozen.txt]
3,few150,"[few150_hidden512_vocab16000.txt, few150_hidde...","[few150_hidden256_vocab16000.txt, few150_hidde...",[few150_fc512_lr0.005_frozen.txt],[few150_fc512_lr0.005_frozen.txt]
4,few200,"[few200_hidden256_vocab16000.txt, few200_hidde...","[few200_hidden128_vocab16000.txt, few200_hidde...",[few200_fc512_lr0.0005_frozen.txt],[few200_fc256_lr0.005_frozen.txt]
5,full,"[full_hidden256_vocab16000.txt, full_hidden256...","[full_hidden128_vocab16000.txt, full_hidden128...",[full_fc128_lr0.005_frozen.txt],[full_fc128_lr0.0005_frozen.txt]
6,trg,"[trg_hidden256_vocab16000.txt, trg_hidden256_v...","[trg_hidden256_vocab16000.txt, trg_hidden256_v...",[trg_fc512_lr0.005_frozen.txt],[trg_fc256_lr0.005_frozen.txt]


## en-ar

In [13]:
highlight_best_score(best_results_dfs_dict['enar'])

Unnamed: 0,Setup,logistic_regression,transformer_encoder,bert-base-uncased,bert-base-multilingual-cased
0,zero,0.459554,0.543634,0.541674,0.626963
1,few50,0.602569,0.62838,0.588368,0.640045
2,few100,0.59967,0.655968,0.631603,0.699871
3,few150,0.62009,0.70856,0.655699,0.727245
4,few200,0.622177,0.714131,0.580542,0.706095
5,full,0.622177,0.714131,0.580542,0.706095
6,trg,0.644527,0.695322,0.581654,0.712644


In [14]:
print(tabulate_markdown(best_results_dfs_dict['enar']))

| Setup   |   logistic_regression | transformer_encoder   |   bert-base-uncased | bert-base-multilingual-cased   |
|---------|-----------------------|-----------------------|---------------------|--------------------------------|
| zero    |                0.4596 | 0.5436                |              0.5417 | **0.627**                      |
| few50   |                0.6026 | 0.6284                |              0.5884 | **0.64**                       |
| few100  |                0.5997 | 0.656                 |              0.6316 | **0.6999**                     |
| few150  |                0.6201 | 0.7086                |              0.6557 | **0.7272**                     |
| few200  |                0.6222 | **0.7141**            |              0.5805 | 0.7061                         |
| full    |                0.6222 | **0.7141**            |              0.5805 | 0.7061                         |
| trg     |                0.6445 | 0.6953                |              0.5817 

In [15]:
best_hparams_dfs_dict['enar']

Unnamed: 0,Setup,logistic_regression,transformer_encoder,bert-base-uncased,bert-base-multilingual-cased
0,zero,"[zero_hidden128_vocab16000.txt, zero_hidden128...","[zero_hidden256_vocab16000.txt, zero_hidden256...",[zero_fc256_lr0.0005_frozen.txt],[zero_fc512_lr0.005_frozen.txt]
1,few50,"[few50_hidden256_vocab16000.txt, few50_hidden2...","[few50_hidden512_vocab16000.txt, few50_hidden5...",[few50_fc512_lr0.0005_frozen.txt],[few50_fc128_lr0.005_frozen.txt]
2,few100,"[few100_hidden512_vocab16000.txt, few100_hidde...","[few100_hidden512_vocab16000.txt, few100_hidde...",[few100_fc128_lr0.005_frozen.txt],[few100_fc128_lr0.005_frozen.txt]
3,few150,"[few150_hidden256_vocab16000.txt, few150_hidde...","[few150_hidden128_vocab16000.txt, few150_hidde...",[few150_fc512_lr0.005_frozen.txt],[few150_fc512_lr0.0005_frozen.txt]
4,few200,"[few200_hidden128_vocab16000.txt, few200_hidde...","[few200_hidden512_vocab16000.txt, few200_hidde...",[few200_fc512_lr0.0005_frozen.txt],[few200_fc512_lr0.0005_frozen.txt]
5,full,"[full_hidden128_vocab16000.txt, full_hidden128...","[full_hidden512_vocab16000.txt, full_hidden512...",[full_fc512_lr0.0005_frozen.txt],[full_fc512_lr0.0005_frozen.txt]
6,trg,"[trg_hidden512_vocab16000.txt, trg_hidden512_v...","[trg_hidden128_vocab16000.txt, trg_hidden128_v...",[trg_fc512_lr0.005_frozen.txt],[trg_fc256_lr0.005_frozen.txt]


## bg-en

In [16]:
highlight_best_score(best_results_dfs_dict['bgen'])

Unnamed: 0,Setup,logistic_regression,transformer_encoder,bert-base-uncased,bert-base-multilingual-cased
0,zero,0.439126,0.528149,0.476081,0.508174
1,few50,0.587053,0.605046,0.604485,0.595866
2,few100,0.590057,0.600062,0.633917,0.648412
3,few150,0.592898,0.594378,0.639445,0.648841
4,few200,0.592521,0.599739,0.647169,0.659728
5,full,0.589093,0.61602,0.682836,0.670561
6,trg,0.568931,0.624487,0.677584,0.649745


In [17]:
print(tabulate_markdown(best_results_dfs_dict['bgen']))

| Setup   |   logistic_regression | transformer_encoder   | bert-base-uncased   | bert-base-multilingual-cased   |
|---------|-----------------------|-----------------------|---------------------|--------------------------------|
| zero    |                0.4391 | **0.5281**            | 0.4761              | 0.5082                         |
| few50   |                0.5871 | **0.605**             | 0.6045              | 0.5959                         |
| few100  |                0.5901 | 0.6001                | 0.6339              | **0.6484**                     |
| few150  |                0.5929 | 0.5944                | 0.6394              | **0.6488**                     |
| few200  |                0.5925 | 0.5997                | 0.6472              | **0.6597**                     |
| full    |                0.5891 | 0.616                 | **0.6828**          | 0.6706                         |
| trg     |                0.5689 | 0.6245                | **0.6776**          

In [18]:
best_hparams_dfs_dict['bgen']

Unnamed: 0,Setup,logistic_regression,transformer_encoder,bert-base-uncased,bert-base-multilingual-cased
0,zero,"[zero_hidden128_vocab16000.txt, zero_hidden128...","[zero_hidden256_vocab16000.txt, zero_hidden256...",[zero_fc256_lr0.05_frozen.txt],[zero_fc512_lr0.05_frozen.txt]
1,few50,"[few50_hidden128_vocab16000.txt, few50_hidden1...","[few50_hidden512_vocab16000.txt, few50_hidden5...",[few50_fc128_lr0.0005_frozen.txt],[few50_fc512_lr0.0005_frozen.txt]
2,few100,"[few100_hidden512_vocab16000.txt, few100_hidde...","[few100_hidden512_vocab16000.txt, few100_hidde...",[few100_fc512_lr0.005_frozen.txt],[few100_fc128_lr0.005_frozen.txt]
3,few150,"[few150_hidden512_vocab16000.txt, few150_hidde...","[few150_hidden512_vocab16000.txt, few150_hidde...",[few150_fc512_lr0.0005_frozen.txt],[few150_fc256_lr0.005_frozen.txt]
4,few200,"[few200_hidden256_vocab16000.txt, few200_hidde...","[few200_hidden256_vocab16000.txt, few200_hidde...",[few200_fc512_lr0.0005_frozen.txt],[few200_fc256_lr0.005_frozen.txt]
5,full,"[full_hidden128_vocab16000.txt, full_hidden128...","[full_hidden256_vocab16000.txt, full_hidden256...",[full_fc128_lr0.005_frozen.txt],[full_fc512_lr0.005_frozen.txt]
6,trg,"[trg_hidden512_vocab16000.txt, trg_hidden512_v...","[trg_hidden512_vocab16000.txt, trg_hidden512_v...",[trg_fc512_lr0.005_frozen.txt],[trg_fc512_lr0.005_frozen.txt]


## bg-ar

In [19]:
highlight_best_score(best_results_dfs_dict['bgar'])

Unnamed: 0,Setup,logistic_regression,transformer_encoder,bert-base-uncased,bert-base-multilingual-cased
0,zero,0.496158,0.58302,0.518447,0.577075
1,few50,0.565675,0.674656,0.557078,0.60641
2,few100,0.645268,0.64606,0.654343,0.666635
3,few150,0.642236,0.688185,0.60246,0.698114
4,few200,0.654307,0.709412,0.608944,0.676088
5,full,0.654307,0.709412,0.608944,0.676088
6,trg,0.480602,0.664462,0.581654,0.712644


In [20]:
print(tabulate_markdown(best_results_dfs_dict['bgar']))

| Setup   |   logistic_regression | transformer_encoder   |   bert-base-uncased | bert-base-multilingual-cased   |
|---------|-----------------------|-----------------------|---------------------|--------------------------------|
| zero    |                0.4962 | **0.583**             |              0.5184 | 0.5771                         |
| few50   |                0.5657 | **0.6747**            |              0.5571 | 0.6064                         |
| few100  |                0.6453 | 0.6461                |              0.6543 | **0.6666**                     |
| few150  |                0.6422 | 0.6882                |              0.6025 | **0.6981**                     |
| few200  |                0.6543 | **0.7094**            |              0.6089 | 0.6761                         |
| full    |                0.6543 | **0.7094**            |              0.6089 | 0.6761                         |
| trg     |                0.4806 | 0.6645                |              0.5817 

In [21]:
best_hparams_dfs_dict['bgar']

Unnamed: 0,Setup,logistic_regression,transformer_encoder,bert-base-uncased,bert-base-multilingual-cased
0,zero,"[zero_hidden128_vocab16000.txt, zero_hidden128...","[zero_hidden256_vocab16000.txt, zero_hidden256...","[zero_fc128_lr0.0005_trainable.txt, zero_fc128...",[zero_fc128_lr0.005_frozen.txt]
1,few50,"[few50_hidden512_vocab16000.txt, few50_hidden5...","[few50_hidden256_vocab16000.txt, few50_hidden2...",[few50_fc512_lr0.005_frozen.txt],[few50_fc256_lr0.0005_frozen.txt]
2,few100,"[few100_hidden512_vocab16000.txt, few100_hidde...","[few100_hidden512_vocab16000.txt, few100_hidde...",[few100_fc512_lr0.0005_frozen.txt],[few100_fc512_lr0.0005_frozen.txt]
3,few150,"[few150_hidden256_vocab16000.txt, few150_hidde...","[few150_hidden128_vocab16000.txt, few150_hidde...",[few150_fc512_lr0.0005_frozen.txt],[few150_fc512_lr0.0005_frozen.txt]
4,few200,"[few200_hidden256_vocab16000.txt, few200_hidde...","[few200_hidden256_vocab16000.txt, few200_hidde...",[few200_fc256_lr0.0005_frozen.txt],[few200_fc512_lr0.0005_frozen.txt]
5,full,"[full_hidden256_vocab16000.txt, full_hidden256...","[full_hidden256_vocab16000.txt, full_hidden256...",[full_fc256_lr0.0005_frozen.txt],[full_fc512_lr0.0005_frozen.txt]
6,trg,"[trg_hidden512_vocab16000.txt, trg_hidden512_v...","[trg_hidden128_vocab16000.txt, trg_hidden128_v...",[trg_fc512_lr0.005_frozen.txt],[trg_fc256_lr0.005_frozen.txt]


## ar-en

In [22]:
highlight_best_score(best_results_dfs_dict['aren'])

Unnamed: 0,Setup,logistic_regression,transformer_encoder,bert-base-uncased,bert-base-multilingual-cased
0,zero,0.200949,0.489936,0.499624,0.573033
1,few50,0.523237,0.587793,0.609163,0.597817
2,few100,0.511002,0.582618,0.652872,0.600951
3,few150,0.537679,0.607381,0.645421,0.621036
4,few200,0.571547,0.603234,0.635513,0.641816
5,full,0.565071,0.589381,0.664358,0.681317
6,trg,0.568931,0.624487,0.677584,0.649745


In [23]:
print(tabulate_markdown(best_results_dfs_dict['aren']))

| Setup   |   logistic_regression |   transformer_encoder | bert-base-uncased   | bert-base-multilingual-cased   |
|---------|-----------------------|-----------------------|---------------------|--------------------------------|
| zero    |                0.2009 |                0.4899 | 0.4996              | **0.573**                      |
| few50   |                0.5232 |                0.5878 | **0.6092**          | 0.5978                         |
| few100  |                0.511  |                0.5826 | **0.6529**          | 0.601                          |
| few150  |                0.5377 |                0.6074 | **0.6454**          | 0.621                          |
| few200  |                0.5715 |                0.6032 | 0.6355              | **0.6418**                     |
| full    |                0.5651 |                0.5894 | 0.6644              | **0.6813**                     |
| trg     |                0.5689 |                0.6245 | **0.6776**          

In [24]:
best_hparams_dfs_dict['aren']

Unnamed: 0,Setup,logistic_regression,transformer_encoder,bert-base-uncased,bert-base-multilingual-cased
0,zero,"[zero_hidden128_vocab16000.txt, zero_hidden128...","[zero_hidden512_vocab16000.txt, zero_hidden512...",[zero_fc256_lr0.005_frozen.txt],[zero_fc512_lr0.0005_frozen.txt]
1,few50,"[few50_hidden256_vocab16000.txt, few50_hidden2...","[few50_hidden256_vocab16000.txt, few50_hidden2...",[few50_fc256_lr0.005_frozen.txt],[few50_fc256_lr0.005_frozen.txt]
2,few100,"[few100_hidden256_vocab16000.txt, few100_hidde...","[few100_hidden128_vocab16000.txt, few100_hidde...",[few100_fc256_lr0.005_frozen.txt],[few100_fc512_lr0.0005_frozen.txt]
3,few150,"[few150_hidden512_vocab16000.txt, few150_hidde...","[few150_hidden256_vocab16000.txt, few150_hidde...",[few150_fc512_lr0.005_frozen.txt],[few150_fc128_lr0.005_frozen.txt]
4,few200,"[few200_hidden512_vocab16000.txt, few200_hidde...","[few200_hidden256_vocab16000.txt, few200_hidde...",[few200_fc512_lr0.0005_frozen.txt],[few200_fc256_lr0.005_frozen.txt]
5,full,"[full_hidden256_vocab16000.txt, full_hidden256...","[full_hidden256_vocab16000.txt, full_hidden256...",[full_fc512_lr0.0005_frozen.txt],[full_fc512_lr0.005_frozen.txt]
6,trg,"[trg_hidden512_vocab16000.txt, trg_hidden512_v...","[trg_hidden512_vocab16000.txt, trg_hidden512_v...",[trg_fc512_lr0.005_frozen.txt],[trg_fc512_lr0.005_frozen.txt]


## ar-bg

In [25]:
highlight_best_score(best_results_dfs_dict['arbg'])

Unnamed: 0,Setup,logistic_regression,transformer_encoder,bert-base-uncased,bert-base-multilingual-cased
0,zero,0.458805,0.792784,0.807905,0.80272
1,few50,0.794631,0.791141,0.809904,0.814546
2,few100,0.797243,0.814881,0.813654,0.819369
3,few150,0.804574,0.814388,0.817971,0.822597
4,few200,0.802283,0.809488,0.811947,0.820767
5,full,0.813638,0.815279,0.821831,0.8264
6,trg,0.813775,0.809617,0.820598,0.825151


In [26]:
print(tabulate_markdown(best_results_dfs_dict['arbg']))

| Setup   |   logistic_regression |   transformer_encoder | bert-base-uncased   | bert-base-multilingual-cased   |
|---------|-----------------------|-----------------------|---------------------|--------------------------------|
| zero    |                0.4588 |                0.7928 | **0.8079**          | 0.8027                         |
| few50   |                0.7946 |                0.7911 | 0.8099              | **0.8145**                     |
| few100  |                0.7972 |                0.8149 | 0.8137              | **0.8194**                     |
| few150  |                0.8046 |                0.8144 | 0.818               | **0.8226**                     |
| few200  |                0.8023 |                0.8095 | 0.8119              | **0.8208**                     |
| full    |                0.8136 |                0.8153 | 0.8218              | **0.8264**                     |
| trg     |                0.8138 |                0.8096 | 0.8206              

In [27]:
best_hparams_dfs_dict['arbg']

Unnamed: 0,Setup,logistic_regression,transformer_encoder,bert-base-uncased,bert-base-multilingual-cased
0,zero,"[zero_hidden128_vocab16000.txt, zero_hidden128...","[zero_hidden512_vocab16000.txt, zero_hidden512...",[zero_fc512_lr0.005_frozen.txt],"[zero_fc128_lr0.05_trainable.txt, zero_fc256_l..."
1,few50,"[few50_hidden256_vocab16000.txt, few50_hidden2...","[few50_hidden128_vocab16000.txt, few50_hidden1...",[few50_fc512_lr0.005_frozen.txt],[few50_fc512_lr0.0005_frozen.txt]
2,few100,"[few100_hidden512_vocab16000.txt, few100_hidde...","[few100_hidden256_vocab16000.txt, few100_hidde...",[few100_fc128_lr0.005_frozen.txt],[few100_fc512_lr0.0005_frozen.txt]
3,few150,"[few150_hidden512_vocab16000.txt, few150_hidde...","[few150_hidden512_vocab16000.txt, few150_hidde...",[few150_fc512_lr0.005_frozen.txt],[few150_fc128_lr0.0005_frozen.txt]
4,few200,"[few200_hidden512_vocab16000.txt, few200_hidde...","[few200_hidden512_vocab16000.txt, few200_hidde...",[few200_fc128_lr0.005_frozen.txt],[few200_fc128_lr0.0005_frozen.txt]
5,full,"[full_hidden256_vocab16000.txt, full_hidden256...","[full_hidden128_vocab16000.txt, full_hidden128...",[full_fc512_lr0.005_frozen.txt],[full_fc256_lr0.005_frozen.txt]
6,trg,"[trg_hidden256_vocab16000.txt, trg_hidden256_v...","[trg_hidden256_vocab16000.txt, trg_hidden256_v...",[trg_fc512_lr0.005_frozen.txt],[trg_fc256_lr0.005_frozen.txt]
