# Tabulate results


In [1]:
import os
import sys
from typing import Tuple

import pandas as pd
from tabulate import tabulate
from tqdm import tqdm
sys.path.append('../src')
from read_log_file import read_log_file

In [2]:
LOG_HOME_DIR = os.path.join('../logs')
assert os.path.isdir(LOG_HOME_DIR)

In [3]:
MODEL_NAMES = ['bert-base-uncased', 'bert-base-multilingual-cased']

In [4]:
SETUPS = ['full', 'trg']

In [5]:
def get_best_score_from_dict(di: dict) -> dict:
    """Get max value from a dict"""
    keys_with_max_val = []
    # find max value
    max_val = -float('inf')
    for k, v in di.items():
        if v > max_val:
            max_val = v
    # find all keys with max value
    for k, v in di.items():
        if v == max_val:
            keys_with_max_val.append(k)
    return {
        'k': keys_with_max_val,
        'v': max_val,
    }

In [6]:
def create_best_results_df(langs: str) -> Tuple[pd.DataFrame, pd.DataFrame]:
    results_dict = {}
    for model_name in MODEL_NAMES:
        results_dict[model_name] = {}
        log_dir = os.path.join(LOG_HOME_DIR, langs, model_name)
        log_filenames = os.listdir(log_dir)
        for fname in log_filenames:
            results_dict[model_name][fname] = read_log_file(
                log_file_path=os.path.join(log_dir, fname),
                plot=False,
                verbose=False,
            )['best_val_metrics']['f1']

    best_results_dict = {'Setup': SETUPS}
    best_hparams_dict = {'Setup': SETUPS}
    best_results_dict.update({model_name: [] for model_name in MODEL_NAMES})
    best_hparams_dict.update({model_name: [] for model_name in MODEL_NAMES})
    for model_name in MODEL_NAMES:
        for setup in SETUPS:
            best_score = get_best_score_from_dict(
                {k: v for k, v in results_dict[model_name].items() if k.startswith(f'{setup}_')}
            )
            best_results_dict[model_name].append(
                best_score['v']
            )
            best_hparams_dict[model_name].append(
                best_score['k']
            )


    best_results_df = pd.DataFrame(best_results_dict)
    best_hparams_df = pd.DataFrame(best_hparams_dict)
    return best_results_df, best_hparams_df

In [7]:
def highlight_best_score(df: pd.DataFrame) -> pd.DataFrame:
    """Highlight best score in each row"""
    return df.style.apply(lambda x: ['background: red' if isinstance(v, float) and v == max(x.iloc[1:]) else '' for v in x], axis=1)

In [8]:
def tabulate_markdown(df: pd.DataFrame) -> str:
    """Tabulate in markdown format and bold best scores in each row"""
    df = df.round(4)
    for model_name in MODEL_NAMES:
        df[model_name] = df[model_name].astype(str)
    for idx in range(len(df)):
        max_val = max(float(df.iloc[idx][model_name]) for model_name in MODEL_NAMES)
        for model_name in MODEL_NAMES:
            cell_val = float(df.iloc[idx][model_name])
            if cell_val == max_val:
                df.at[idx, model_name] = f'**{cell_val}**'
            else:
                df.at[idx, model_name] = f'{cell_val}'

    return tabulate(df, headers='keys', showindex=False, tablefmt='github')


In [9]:
def tabulate_latex(df: pd.DataFrame) -> str:
    """Tabulate in markdown format and bold best scores in each row"""
    df = df.round(4)
    for model_name in MODEL_NAMES:
        df[model_name] = df[model_name].astype(str)
    for idx in range(len(df)):
        max_val = max(float(df.iloc[idx][model_name]) for model_name in MODEL_NAMES)
        for model_name in MODEL_NAMES:
            cell_val = float(df.iloc[idx][model_name])
            if cell_val == max_val:
                df.at[idx, model_name] = f'{cell_val}'
            else:
                df.at[idx, model_name] = f'{cell_val}'

    return tabulate(df, headers='keys', showindex=False, tablefmt='latex')

In [10]:
best_results_dfs_dict = {}
best_hparams_dfs_dict = {}
for langs in tqdm(['enbg', 'enar', 'bgen', 'bgar', 'aren', 'arbg']):
    best_results_dfs_dict[langs], best_hparams_dfs_dict[langs] = create_best_results_df(langs)

100%|██████████| 6/6 [00:00<00:00,  9.19it/s]


## en-bg

In [11]:
highlight_best_score(best_results_dfs_dict['enbg'])

Unnamed: 0,Setup,bert-base-uncased,bert-base-multilingual-cased
0,full,0.823907,0.831701
1,trg,0.821763,0.843117


In [12]:
print(tabulate_markdown(best_results_dfs_dict['enbg']))

| Setup   |   bert-base-uncased | bert-base-multilingual-cased   |
|---------|---------------------|--------------------------------|
| full    |              0.8239 | **0.8317**                     |
| trg     |              0.8218 | **0.8431**                     |


In [13]:
print(tabulate_latex(best_results_dfs_dict['enbg']))

\begin{tabular}{lrr}
\hline
 Setup   &   bert-base-uncased &   bert-base-multilingual-cased \\
\hline
 full    &              0.8239 &                         0.8317 \\
 trg     &              0.8218 &                         0.8431 \\
\hline
\end{tabular}


In [14]:
best_hparams_dfs_dict['enbg']

Unnamed: 0,Setup,bert-base-uncased,bert-base-multilingual-cased
0,full,[full_fc128_lr0.0005_frozen.txt],[full_fc256_lr0.005_frozen.txt]
1,trg,[trg_fc128_lr0.0005_frozen.txt],[trg_fc512_lr0.0005_frozen.txt]


## en-ar

In [15]:
highlight_best_score(best_results_dfs_dict['enar'])

Unnamed: 0,Setup,bert-base-uncased,bert-base-multilingual-cased
0,full,0.594156,0.693677
1,trg,0.606164,0.687872


In [16]:
print(tabulate_markdown(best_results_dfs_dict['enar']))

| Setup   |   bert-base-uncased | bert-base-multilingual-cased   |
|---------|---------------------|--------------------------------|
| full    |              0.5942 | **0.6937**                     |
| trg     |              0.6062 | **0.6879**                     |


In [17]:
print(tabulate_latex(best_results_dfs_dict['enar']))

\begin{tabular}{lrr}
\hline
 Setup   &   bert-base-uncased &   bert-base-multilingual-cased \\
\hline
 full    &              0.5942 &                         0.6937 \\
 trg     &              0.6062 &                         0.6879 \\
\hline
\end{tabular}


In [18]:
best_hparams_dfs_dict['enar']

Unnamed: 0,Setup,bert-base-uncased,bert-base-multilingual-cased
0,full,[full_fc256_lr0.005_frozen.txt],[full_fc256_lr0.0005_frozen.txt]
1,trg,[trg_fc256_lr0.0005_frozen.txt],[trg_fc256_lr0.005_frozen.txt]


## bg-en

In [19]:
highlight_best_score(best_results_dfs_dict['bgen'])

Unnamed: 0,Setup,bert-base-uncased,bert-base-multilingual-cased
0,full,0.708275,0.709109
1,trg,0.72908,0.716178


In [20]:
print(tabulate_markdown(best_results_dfs_dict['bgen']))

| Setup   | bert-base-uncased   | bert-base-multilingual-cased   |
|---------|---------------------|--------------------------------|
| full    | 0.7083              | **0.7091**                     |
| trg     | **0.7291**          | 0.7162                         |


In [21]:
print(tabulate_latex(best_results_dfs_dict['bgen']))

\begin{tabular}{lrr}
\hline
 Setup   &   bert-base-uncased &   bert-base-multilingual-cased \\
\hline
 full    &              0.7083 &                         0.7091 \\
 trg     &              0.7291 &                         0.7162 \\
\hline
\end{tabular}


In [22]:
best_hparams_dfs_dict['bgen']

Unnamed: 0,Setup,bert-base-uncased,bert-base-multilingual-cased
0,full,[full_fc512_lr0.0005_frozen.txt],[full_fc128_lr0.005_frozen.txt]
1,trg,[trg_fc128_lr0.005_frozen.txt],[trg_fc512_lr0.0005_frozen.txt]


## bg-ar

In [23]:
highlight_best_score(best_results_dfs_dict['bgar'])

Unnamed: 0,Setup,bert-base-uncased,bert-base-multilingual-cased
0,full,0.597743,0.678938
1,trg,0.606164,0.687872


In [24]:
print(tabulate_markdown(best_results_dfs_dict['bgar']))

| Setup   |   bert-base-uncased | bert-base-multilingual-cased   |
|---------|---------------------|--------------------------------|
| full    |              0.5977 | **0.6789**                     |
| trg     |              0.6062 | **0.6879**                     |


In [25]:
print(tabulate_latex(best_results_dfs_dict['bgar']))

\begin{tabular}{lrr}
\hline
 Setup   &   bert-base-uncased &   bert-base-multilingual-cased \\
\hline
 full    &              0.5977 &                         0.6789 \\
 trg     &              0.6062 &                         0.6879 \\
\hline
\end{tabular}


In [26]:
best_hparams_dfs_dict['bgar']

Unnamed: 0,Setup,bert-base-uncased,bert-base-multilingual-cased
0,full,[full_fc512_lr0.0005_frozen.txt],[full_fc512_lr0.0005_frozen.txt]
1,trg,[trg_fc256_lr0.0005_frozen.txt],[trg_fc256_lr0.005_frozen.txt]


## ar-en

In [27]:
highlight_best_score(best_results_dfs_dict['aren'])

Unnamed: 0,Setup,bert-base-uncased,bert-base-multilingual-cased
0,full,0.701622,0.696182
1,trg,0.72908,0.716178


In [28]:
print(tabulate_markdown(best_results_dfs_dict['aren']))

| Setup   | bert-base-uncased   |   bert-base-multilingual-cased |
|---------|---------------------|--------------------------------|
| full    | **0.7016**          |                         0.6962 |
| trg     | **0.7291**          |                         0.7162 |


In [29]:
print(tabulate_latex(best_results_dfs_dict['aren']))

\begin{tabular}{lrr}
\hline
 Setup   &   bert-base-uncased &   bert-base-multilingual-cased \\
\hline
 full    &              0.7016 &                         0.6962 \\
 trg     &              0.7291 &                         0.7162 \\
\hline
\end{tabular}


In [30]:
best_hparams_dfs_dict['aren']

Unnamed: 0,Setup,bert-base-uncased,bert-base-multilingual-cased
0,full,[full_fc512_lr0.005_frozen.txt],[full_fc256_lr0.005_frozen.txt]
1,trg,[trg_fc128_lr0.005_frozen.txt],[trg_fc512_lr0.0005_frozen.txt]


## ar-bg

In [31]:
highlight_best_score(best_results_dfs_dict['arbg'])

Unnamed: 0,Setup,bert-base-uncased,bert-base-multilingual-cased
0,full,0.820622,0.841008
1,trg,0.821763,0.843117


In [32]:
print(tabulate_markdown(best_results_dfs_dict['arbg']))

| Setup   |   bert-base-uncased | bert-base-multilingual-cased   |
|---------|---------------------|--------------------------------|
| full    |              0.8206 | **0.841**                      |
| trg     |              0.8218 | **0.8431**                     |


In [33]:
print(tabulate_latex(best_results_dfs_dict['arbg']))



\begin{tabular}{lrr}
\hline
 Setup   &   bert-base-uncased &   bert-base-multilingual-cased \\
\hline
 full    &              0.8206 &                         0.841  \\
 trg     &              0.8218 &                         0.8431 \\
\hline
\end{tabular}


In [34]:
best_hparams_dfs_dict['arbg']

Unnamed: 0,Setup,bert-base-uncased,bert-base-multilingual-cased
0,full,[full_fc512_lr0.005_frozen.txt],[full_fc256_lr0.0005_frozen.txt]
1,trg,[trg_fc128_lr0.0005_frozen.txt],[trg_fc512_lr0.0005_frozen.txt]
