In [1]:
import os
import pandas as pd
import numpy as np 

results = {}

for root, dirs, files in os.walk("../reports"):
    for name in files:
        if name.endswith('result.txt'):
            fname = os.path.join(root, name)
            data = open(fname, 'r').read()
            results[root] = data

In [9]:
def get_data(key, entry):
    lines = entry.split('\n')
    
    stacking_rte = lines[5].split()
    stacking_rte_acc = float(stacking_rte[0].strip('%'))/100
    stacking_rte_f1 = float(stacking_rte[1])
    
    stacking_sts = lines[10].split()
    stacking_sts_pcc = float(stacking_sts[0])
    stacking_sts_mse = float(stacking_sts[1])
    
    average_rte = lines[16].split()
    average_rte_acc = float(average_rte[0].strip('%'))/100
    average_rte_f1 = float(average_rte[1])
    
    average_sts = lines[21].split()
    average_sts_pcc= float(average_sts[0])
    average_sts_mse = float(average_sts[1])
    
    return [
        {
            "fname": key,
            "ensemble": 'stacking',
            "pcc": stacking_sts_pcc,
            "mse": stacking_sts_mse,
            "acc": stacking_rte_acc,
            "f1": stacking_rte_f1
        },
        {
            "fname": key,
            "ensemble": 'average',
            "pcc": average_sts_pcc,
            "mse": average_sts_mse,
            "acc": stacking_rte_acc,
            "f1": stacking_rte_f1
        }
    ]

df = []
for item in results.items():
    df.extend(get_data(item[0], results[item[0]]))
    
for idx, item in enumerate(df):
    dataset = item['fname'].split('/')[-2]
    name = item['fname'].split('/')[-1]
    model1 = name.split('_')[0]
    model2 = name.split('_')[1]
    
    if model2 == 'bert-base-multilingual':
        model2 = 'bert-multilingual'
        
    if model2 == 'neuralmind-portuguese-bert':
        model2 = 'portuguese-bert'
    
    folds = name.split('_')[2]
    df[idx]['model1'] = model1
    df[idx]['model2'] = model2
    df[idx]['folds'] = int(folds.strip('folds'))
    df[idx]['dataset'] = dataset
    
df = pd.DataFrame(df)
df = df[['dataset', 'model1', 'model2', 'ensemble', 'folds', 'pcc', 'mse', 'acc', 'f1']]
#    ensemble 	pcc 	mse 	acc 	f1 	model1 	model2 	folds 	
df

Unnamed: 0,dataset,model1,model2,ensemble,folds,pcc,mse,acc,f1
0,assin-ptpt,roberta-large,portuguese-bert,stacking,10,0.88,0.33,0.8775,0.58
1,assin-ptpt,roberta-large,portuguese-bert,average,10,0.88,0.39,0.8775,0.58
2,assin-ptpt,roberta-large,bert-multilingual,stacking,10,0.88,0.36,0.8755,0.58
3,assin-ptpt,roberta-large,bert-multilingual,average,10,0.88,0.42,0.8755,0.58
4,assin-ptpt,roberta-large,bert-multilingual,stacking,5,0.88,0.4,0.8755,0.58
5,assin-ptpt,roberta-large,bert-multilingual,average,5,0.88,0.42,0.8755,0.58
6,assin-ptpt,roberta-large,portuguese-bert,stacking,20,0.88,0.37,0.879,0.58
7,assin-ptpt,roberta-large,portuguese-bert,average,20,0.88,0.39,0.879,0.58
8,assin-ptpt,roberta-large,bert-multilingual,stacking,10,0.88,0.37,0.877,0.58
9,assin-ptpt,roberta-large,bert-multilingual,average,10,0.88,0.42,0.877,0.58


In [23]:
def get_structured_df(df, dataset):
    tmp_df = df[df['dataset']==dataset]
    structured_df = tmp_df.groupby(['model1', 'model2', 'ensemble', 'folds']).apply(np.mean)[['pcc', 'mse', 'acc', 'f1']]
    return structured_df.round(2)

In [25]:
print(get_structured_df(df, 'assin2').to_latex())

\begin{tabular}{llllrrrr}
\toprule
              &                 &          &    &   pcc &   mse &   acc &    f1 \\
model1 & model2 & ensemble & folds &       &       &       &       \\
\midrule
roberta-large & bert-multilingual & average & 10 &  0.86 &  0.91 &  0.89 &  0.89 \\
              &                 &          & 20 &  0.86 &  0.91 &  0.89 &  0.89 \\
              &                 & stacking & 10 &  0.85 &  0.50 &  0.89 &  0.89 \\
              &                 &          & 20 &  0.85 &  0.50 &  0.89 &  0.89 \\
              & portuguese-bert & average & 5  &  0.86 &  0.91 &  0.89 &  0.89 \\
              &                 &          & 10 &  0.86 &  0.71 &  0.89 &  0.89 \\
              &                 &          & 20 &  0.86 &  0.71 &  0.89 &  0.89 \\
              &                 & stacking & 5  &  0.80 &  0.68 &  0.89 &  0.89 \\
              &                 &          & 10 &  0.86 &  0.51 &  0.89 &  0.89 \\
              &                 &          & 20 &  0.86 

In [26]:
print(get_structured_df(df, 'assin-ptbr').to_latex())

\begin{tabular}{llllrrrr}
\toprule
              &                 &          &    &   pcc &   mse &   acc &    f1 \\
model1 & model2 & ensemble & folds &       &       &       &       \\
\midrule
roberta-large & bert-multilingual & average & 5  &  0.86 &  0.22 &  0.78 &  0.30 \\
              &                 &          & 10 &  0.86 &  0.22 &  0.89 &  0.55 \\
              &                 &          & 20 &  0.86 &  0.22 &  0.89 &  0.56 \\
              &                 & stacking & 5  &  0.86 &  0.22 &  0.78 &  0.30 \\
              &                 &          & 10 &  0.86 &  0.21 &  0.89 &  0.55 \\
              &                 &          & 20 &  0.86 &  0.22 &  0.89 &  0.56 \\
              & portuguese-bert & average & 5  &  0.87 &  0.20 &  0.78 &  0.32 \\
              &                 &          & 10 &  0.87 &  0.20 &  0.88 &  0.56 \\
              &                 &          & 20 &  0.87 &  0.20 &  0.89 &  0.55 \\
              &                 & stacking & 5  &  0.87 

In [27]:
print(get_structured_df(df, 'assin-ptpt').to_latex())

\begin{tabular}{llllrrrr}
\toprule
              &                 &          &    &   pcc &   mse &   acc &    f1 \\
model1 & model2 & ensemble & folds &       &       &       &       \\
\midrule
roberta-large & bert-multilingual & average & 5  &  0.88 &  0.42 &  0.88 &  0.58 \\
              &                 &          & 10 &  0.88 &  0.42 &  0.88 &  0.58 \\
              &                 &          & 20 &  0.88 &  0.42 &  0.88 &  0.58 \\
              &                 & stacking & 5  &  0.88 &  0.40 &  0.88 &  0.58 \\
              &                 &          & 10 &  0.88 &  0.36 &  0.88 &  0.58 \\
              &                 &          & 20 &  0.88 &  0.38 &  0.88 &  0.58 \\
              & portuguese-bert & average & 5  &  0.88 &  0.39 &  0.88 &  0.58 \\
              &                 &          & 10 &  0.88 &  0.39 &  0.88 &  0.58 \\
              &                 &          & 20 &  0.88 &  0.39 &  0.88 &  0.58 \\
              &                 & stacking & 5  &  0.88 

In [3]:
overview = df.drop('folds', axis='columns').groupby('dataset').agg([np.mean, np.std]).round(2)

In [4]:
print(overview.to_latex())

\begin{tabular}{lrrrrrrrr}
\toprule
{} & \multicolumn{2}{l}{pcc} & \multicolumn{2}{l}{mse} & \multicolumn{2}{l}{acc} & \multicolumn{2}{l}{f1} \\
{} &  mean &   std &  mean &   std &  mean &   std &  mean &   std \\
dataset    &       &       &       &       &       &       &       &       \\
\midrule
assin-ptbr &  0.86 &  0.01 &  0.21 &  0.02 &  0.85 &  0.05 &  0.47 &  0.12 \\
assin-ptpt &  0.88 &  0.00 &  0.38 &  0.03 &  0.88 &  0.00 &  0.58 &  0.00 \\
assin2     &  0.85 &  0.02 &  0.68 &  0.18 &  0.89 &  0.00 &  0.89 &  0.00 \\
\bottomrule
\end{tabular}

