In [1]:
import os
import json
import pandas as pd

path = '../reports/'

records = []
for root, dirs, files in os.walk("../reports", topdown=False):
    for name in files:
        if name.endswith('.json') and not name.endswith('stats.json'):
            with open(os.path.join(root, name), 'r') as f:
                lst = json.load(f)
                records.extend(lst)

In [2]:
import re
for item in records:
    
    if item['lang'] == 'ptbr':
        item['dataset'] = 'ASSIN ( pt-BR )'
    elif item['lang'] == 'pteu':
        item['dataset'] = 'ASSIN ( pt-PT )'
    elif item['lang'] == 'assin2':
        item['dataset'] = 'ASSIN 2'
        
    if 'cbow' in item['test']:
        item['architecture'] = 'CBOW'
    elif 'skip' in item['test']:
        item['architecture'] = 'skip-gram'
    else:
        item['architecture'] = ' '

    if 'embeddings' in item['test']:
        item['embedding'] = item['test'].split('/')[-2]
        item['dimensions'] = int(re.findall('\d+', item['test'].split('/')[-1])[0])
    else:
        item['embedding'] = ' '
        item['dimensions'] = ' '
        
    if 'ELMo' in item['test']:
        if 'custom1' in item['test']:
            item['ELMo'] = 'wiki (reduced)'
        elif 'custom2' in item['test']:
            item['ELMo'] = 'BRWAC'
        else:
            item['ELMo'] = 'wiki'
    else:
        item['ELMo'] = ' '
        
    if 'unk' in item['test']:
        item['unk'] = True
    else:
        item['unk'] = False

In [3]:
import numpy as np
df = pd.DataFrame(records).round(2)
df = df[['dataset', 'ELMo', 'embedding', 'unk', 'architecture', 'dimensions', 'pearson', 'MSE']]\
.rename(columns={'pearson': 'PCC'})
df = df.groupby(['dataset', 'ELMo', 'embedding', 'unk', 'architecture', 'dimensions']).apply(np.mean)
df = df[['PCC', 'MSE']]
df.to_csv('../reports/evaluation.csv')

In [4]:
view = df[df.index.get_level_values('embedding')=='fasttext']
# fasttext_view = fasttext_view[fasttext_view.index.get_level_values('unk')==False]
view = view[view.index.get_level_values('architecture')=='skip-gram']
view = view[view.index.get_level_values('ELMo')==' ']
view = view[view.index.get_level_values('dataset')!='ASSIN 2']
view

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,PCC,MSE
dataset,ELMo,embedding,unk,architecture,dimensions,Unnamed: 6_level_1,Unnamed: 7_level_1


In [5]:
view = df[df.index.get_level_values('embedding')=='word2vec']
# fasttext_view = fasttext_view[fasttext_view.index.get_level_values('unk')==False]
view = view[view.index.get_level_values('architecture')=='skip-gram']
view = view[view.index.get_level_values('ELMo')==' ']
view = view[view.index.get_level_values('dataset')!='ASSIN 2']
view

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,PCC,MSE
dataset,ELMo,embedding,unk,architecture,dimensions,Unnamed: 6_level_1,Unnamed: 7_level_1


In [6]:
print(view.to_latex())

\begin{tabular}{llllllrr}
\toprule
\bottomrule
\end{tabular}



In [7]:
view = df
view = view[(((view.index.get_level_values('ELMo')=='wiki') |\
              (view.index.get_level_values('ELMo')=='wiki (reduced)')) &\
             (view.index.get_level_values('embedding')==' ')) |\
            ((view.index.get_level_values('embedding')=='fasttext')&\
             (view.index.get_level_values('dimensions')==1000)&\
            (view.index.get_level_values('architecture')=='skip-gram')&\
            (view.index.get_level_values('unk')==False)&\
            ((view.index.get_level_values('ELMo')=='wiki') |\
              (view.index.get_level_values('ELMo')=='wiki (reduced)'))) |\
           (((view.index.get_level_values('ELMo')=='wiki') |\
              (view.index.get_level_values('ELMo')=='wiki (reduced)')) &\
            (((view.index.get_level_values('embedding')=='word2vec') &\
           (view.index.get_level_values('architecture')=='CBOW')) |\
            ((view.index.get_level_values('embedding')=='fasttext') &\
           (view.index.get_level_values('architecture')=='skip-gram')) |\
            ((view.index.get_level_values('embedding')=='glove') &\
           (view.index.get_level_values('architecture')==' '))) &\
           (view.index.get_level_values('dimensions')==1000))]
view = view[view.index.get_level_values('dataset')!='ASSIN 2']
view

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,PCC,MSE
dataset,ELMo,embedding,unk,architecture,dimensions,Unnamed: 6_level_1,Unnamed: 7_level_1
ASSIN ( pt-BR ),wiki (reduced),,False,,,0.62,0.47
ASSIN ( pt-BR ),wiki (reduced),fasttext,False,skip-gram,1000.0,0.6,0.48
ASSIN ( pt-BR ),wiki (reduced),glove,False,,1000.0,0.48,0.59
ASSIN ( pt-PT ),wiki (reduced),,False,,,0.63,0.73
ASSIN ( pt-PT ),wiki (reduced),fasttext,False,skip-gram,1000.0,0.58,0.78
ASSIN ( pt-PT ),wiki (reduced),glove,False,,1000.0,0.37,1.01


In [8]:
print(re.sub('False','',view.to_latex()))

\begin{tabular}{llllllrr}
\toprule
                &                &       &       &   &      &   PCC &   MSE \\
dataset & ELMo & embedding & unk & architecture & dimensions &       &       \\
\midrule
ASSIN ( pt-BR ) & wiki (reduced) &   &  &   &   &  0.62 &  0.47 \\
                &                & fasttext &  & skip-gram & 1000 &  0.60 &  0.48 \\
                &                & glove &  &   & 1000 &  0.48 &  0.59 \\
ASSIN ( pt-PT ) & wiki (reduced) &   &  &   &   &  0.63 &  0.73 \\
                &                & fasttext &  & skip-gram & 1000 &  0.58 &  0.78 \\
                &                & glove &  &   & 1000 &  0.37 &  1.01 \\
\bottomrule
\end{tabular}



In [9]:
view = df
view = view[(((view.index.get_level_values('ELMo')=='wiki') |\
              (view.index.get_level_values('ELMo')=='wiki (reduced)')) &\
             (view.index.get_level_values('embedding')==' ')) |\
            ((view.index.get_level_values('embedding')=='fasttext')&\
             (view.index.get_level_values('dimensions')==1000)&\
            (view.index.get_level_values('architecture')=='skip-gram')&\
            (view.index.get_level_values('unk')==False)&\
            ((view.index.get_level_values('ELMo')=='wiki') |\
              (view.index.get_level_values('ELMo')=='wiki (reduced)'))) |\
           (((view.index.get_level_values('ELMo')=='wiki') |\
              (view.index.get_level_values('ELMo')=='wiki (reduced)')) &\
            (((view.index.get_level_values('embedding')=='word2vec') &\
           (view.index.get_level_values('architecture')=='CBOW')) |\
            ((view.index.get_level_values('embedding')=='fasttext') &\
           (view.index.get_level_values('architecture')=='skip-gram')) |\
            ((view.index.get_level_values('embedding')=='glove') &\
           (view.index.get_level_values('architecture')==' '))) &\
           (view.index.get_level_values('dimensions')==1000))]
view = view[view.index.get_level_values('dataset')=='ASSIN 2']
view

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,PCC,MSE
dataset,ELMo,embedding,unk,architecture,dimensions,Unnamed: 6_level_1,Unnamed: 7_level_1
ASSIN 2,wiki (reduced),,False,,,0.57,1.94
ASSIN 2,wiki (reduced),fasttext,False,skip-gram,1000.0,0.58,1.9
ASSIN 2,wiki (reduced),glove,False,,1000.0,0.49,1.99


In [10]:
print(re.sub('False','',view.to_latex()))

\begin{tabular}{llllllrr}
\toprule
        &                &       &       &   &      &   PCC &   MSE \\
dataset & ELMo & embedding & unk & architecture & dimensions &       &       \\
\midrule
ASSIN 2 & wiki (reduced) &   &  &   &   &  0.57 &  1.94 \\
        &                & fasttext &  & skip-gram & 1000 &  0.58 &  1.90 \\
        &                & glove &  &   & 1000 &  0.49 &  1.99 \\
\bottomrule
\end{tabular}

