In [5]:
import os
import yaml
import pandas as pd
%matplotlib inline

In [7]:
exp_dirs = ["../exps/task2/track1/final", "../exps/task2/track1/default"]
task2_track1 = []

for exp_dir in exp_dirs:
    for subdir in os.scandir(exp_dir):
        config_fn = os.path.join(subdir.path, "config.yaml")
        with open(config_fn) as f:
            config = yaml.load(f)
        exp_d = config
        exp_d['language'] = config['train_file'].split('/')[-1].split('-')[0]
        exp_d['train_size'] = config['train_file'].split('/')[-1].split('-')[2]
        res_fn = os.path.join(subdir.path, "result.yaml")
        with open(res_fn) as f:
            exp_d.update(yaml.load(f))
        dev_acc_fn = os.path.join(subdir.path, "dev.word_accuracy")
        if not os.path.exists(dev_acc_fn):
            print("Dev accuracy file does not exist in {}".format(subdir.path))
        else:
            with open(dev_acc_fn) as f:
                exp_d['dev_acc'] = float(f.read())
        test_out_fn = os.path.join(subdir.path, "test.out")
        if not os.path.exists(test_out_fn):
            print("Test output file does not exist in {}".format(subdir.path))
        else:
            exp_d['test_output_path'] = test_out_fn
            
        task2_track1.append(exp_d)
        
task2_track1 = pd.DataFrame(task2_track1)

Dev accuracy file does not exist in ../exps/task2/track1/default/0162
Test output file does not exist in ../exps/task2/track1/default/0162
Dev accuracy file does not exist in ../exps/task2/track1/default/0163
Test output file does not exist in ../exps/task2/track1/default/0163
Dev accuracy file does not exist in ../exps/task2/track1/default/0164
Test output file does not exist in ../exps/task2/track1/default/0164
Dev accuracy file does not exist in ../exps/task2/track1/default/0165
Test output file does not exist in ../exps/task2/track1/default/0165
Dev accuracy file does not exist in ../exps/task2/track1/default/0166
Test output file does not exist in ../exps/task2/track1/default/0166


In [10]:
task2_track1.groupby('train_size').running_time.describe()

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
train_size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
high,55.0,11066.364565,8275.546778,3005.681581,6726.093461,9120.710958,12838.612622,56185.149199
low,56.0,1314.329507,1136.39535,88.384087,444.336687,883.76276,1944.078226,4925.786018
medium,92.0,3921.970107,3777.291153,40.460072,1387.424903,2875.919599,5114.663899,24662.202256


In [16]:
params = [
    'batch_size',
    'early_stopping_window',
    'context_hidden_size',
    'context_num_layers',
    'char_embedding_size',
    'tag_embedding_size',
    'tag_num_layers',
    'word_hidden_size',
    'word_num_layers',
    'decoder_num_layers',
    'dropout',
]
params = sorted(params)

In [27]:
fr = task2_track1[(task2_track1.language=='fr') & (task2_track1.train_size=='medium')]
# filter random experiments
fr = fr[fr.dropout != 0.4]
fr = fr[fr.decoder_num_layers==1]
fr = fr[fr.early_stopping_strategy=='dev_loss_increase']
fr[params].apply(pd.Series.value_counts).stack().swaplevel().to_frame().unstack().stack()

Unnamed: 0,Unnamed: 1,0
batch_size,8.0,5.0
batch_size,16.0,10.0
batch_size,32.0,14.0
batch_size,64.0,9.0
char_embedding_size,30.0,18.0
char_embedding_size,40.0,8.0
char_embedding_size,50.0,12.0
context_hidden_size,64.0,8.0
context_hidden_size,128.0,6.0
context_hidden_size,256.0,24.0


In [30]:
for p in params:
    if p == 'dropout':
        print("{} & {}\\\\".format(p, ", ".join(map(lambda s: str(float(s)), sorted(set(fr[p].values))))))
    else:
        print("{} & {}\\\\".format(p, ", ".join(map(lambda s: str(int(s)), sorted(set(fr[p].values))))))

batch_size & 8, 16, 32, 64\\
char_embedding_size & 30, 40, 50\\
context_hidden_size & 64, 128, 256\\
context_num_layers & 1, 2\\
decoder_num_layers & 1\\
dropout & 0.0, 0.2\\
early_stopping_window & 5, 10\\
tag_embedding_size & 10, 20, 30\\
tag_num_layers & 1, 2\\
word_hidden_size & 64, 128, 256\\
word_num_layers & 1, 2\\


In [37]:
fr.running_time.describe()

count       38.000000
mean      6237.929404
std       4175.640499
min         40.460072
25%       3968.661016
50%       5779.766970
75%       7949.714321
max      24662.202256
Name: running_time, dtype: float64

In [53]:
t = task2_track1
fr_best_used = t[
    (t.batch_size==32) &
    (t.char_embedding_size==30) &
    (t.context_hidden_size==256) & 
    (t.context_num_layers==1) &
    (t.decoder_hidden_size==64) &
    (t.decoder_num_layers==1) &
    (t.dropout==0.2) &
    (t.early_stopping_window==10) &
    (t.lemma_hidden_size==256) &
    (t.lemma_num_layers==1) &
    (t.tag_embedding_size==30) &
    (t.tag_hidden_size==128) &
    (t.word_hidden_size==64) &
    (t.word_num_layers==2)
]
print(len(fr_best_used))

37


In [55]:
fr_best_used.groupby(['language', 'train_size']).dev_acc.agg(['mean', 'max', 'size'])

Unnamed: 0_level_0,Unnamed: 1_level_0,mean,max,size
language,train_size,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
de,high,0.273273,0.306306,2
de,low,0.003003,0.006006,2
de,medium,0.168168,0.258258,2
en,high,0.282879,0.285084,2
en,low,0.009222,0.009623,2
en,medium,0.209503,0.2498,2
es,high,0.396605,0.410053,2
es,low,0.085097,0.087743,2
es,medium,0.284392,0.286155,2
fi,high,0.315258,0.322169,2


In [56]:
results.groupby(['language', 'train_size']).dev_acc.agg(['mean', 'max', 'size'])

Unnamed: 0_level_0,Unnamed: 1_level_0,mean,max,size
language,train_size,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
de,high,0.593093,0.741742,8
de,low,0.13647,0.27027,9
de,medium,0.420087,0.624625,9
en,high,0.617794,0.732558,9
en,low,0.332353,0.545309,9
en,medium,0.478437,0.610666,9
es,high,0.497256,0.551146,9
es,low,0.152361,0.229718,9
es,medium,0.308299,0.394621,9
fi,high,0.394072,0.46252,8


In [58]:
results[(results.language=='sv') & (results.train_size=='high')][params]

Unnamed: 0,batch_size,char_embedding_size,context_hidden_size,context_num_layers,decoder_num_layers,dropout,early_stopping_window,tag_embedding_size,tag_num_layers,word_hidden_size,word_num_layers
18,32,30,256,1,1,0.2,10.0,30,2,64,2
52,16,40,128,1,1,0.0,,20,1,128,1
74,16,40,128,1,1,0.0,,20,1,128,1
94,16,40,256,1,1,0.4,,20,1,256,1
118,32,40,256,1,1,0.0,,20,1,256,1
138,32,40,256,1,1,0.0,5.0,20,1,256,1


In [60]:
results.early_stopping_strategy.value_counts()

dev_loss_increase    98
ratio                21
Name: early_stopping_strategy, dtype: int64

In [63]:
exp_dirs = ["../exps/task2/track2/default"]
task2_track2 = []

for exp_dir in exp_dirs:
    for subdir in os.scandir(exp_dir):
        config_fn = os.path.join(subdir.path, "config.yaml")
        with open(config_fn) as f:
            config = yaml.load(f)
        exp_d = config
        exp_d['language'] = config['train_file'].split('/')[-1].split('-')[0]
        exp_d['train_size'] = config['train_file'].split('/')[-1].split('-')[2]
        res_fn = os.path.join(subdir.path, "result.yaml")
        with open(res_fn) as f:
            exp_d.update(yaml.load(f))
        dev_acc_fn = os.path.join(subdir.path, "dev.word_accuracy")
        if not os.path.exists(dev_acc_fn):
            print("Dev accuracy file does not exist in {}".format(subdir.path))
        else:
            with open(dev_acc_fn) as f:
                exp_d['dev_acc'] = float(f.read())
        test_out_fn = os.path.join(subdir.path, "test.out")
        if not os.path.exists(test_out_fn):
            print("Test output file does not exist in {}".format(subdir.path))
        else:
            exp_d['test_output_path'] = test_out_fn
            
        task2_track2.append(exp_d)
        
task2_track2 = pd.DataFrame(task2_track2)

In [65]:
t = task2_track2
t.groupby(['language', 'train_size']).size()

language  train_size
de        high          3
          low           3
          medium        3
en        high          3
          low           3
          medium        3
es        high          3
          low           3
          medium        3
fi        high          3
          low           3
          medium        3
fr        high          3
          low           3
          medium        3
ru        high          3
          low           3
          medium        3
sv        high          3
          low           3
          medium        3
dtype: int64

In [70]:
params = [
    'batch_size',
    'char_embedding_size',
    'context_hidden_size',
    'context_num_layers',
    'decoder_num_layers',
    'dropout',
    #'early_stopping_strategy',
    'early_stopping_window',
    'lemma_hidden_size',
    'lemma_num_layers',
    'word_hidden_size',
    'word_num_layers',
]
t[params].apply(pd.Series.value_counts).stack().swaplevel().to_frame().unstack().stack()

Unnamed: 0,Unnamed: 1,0
batch_size,32.0,63.0
char_embedding_size,50.0,63.0
context_hidden_size,256.0,63.0
context_num_layers,1.0,21.0
context_num_layers,2.0,42.0
decoder_num_layers,1.0,21.0
decoder_num_layers,2.0,42.0
dropout,0.2,63.0
early_stopping_window,5.0,17.0
early_stopping_window,10.0,46.0


In [73]:
t[(t.language=='de') & (t.train_size=='high')][params]

Unnamed: 0,batch_size,char_embedding_size,context_hidden_size,context_num_layers,decoder_num_layers,dropout,early_stopping_window,lemma_hidden_size,lemma_num_layers,word_hidden_size,word_num_layers
0,32,50,256,1,1,0.2,10,256,1,256,1
21,32,50,256,2,2,0.2,10,256,2,256,2
42,32,50,256,2,2,0.2,10,256,2,256,2


In [62]:
fr_best_used = t[
    (t.batch_size==32) &
    (t.char_embedding_size==30) &
    (t.context_hidden_size==256) & 
    (t.context_num_layers==1) &
    (t.decoder_hidden_size==64) &
    (t.decoder_num_layers==1) &
    (t.dropout==0.2) &
    (t.early_stopping_window==10) &
    (t.lemma_hidden_size==256) &
    (t.lemma_num_layers==1) &
    (t.tag_embedding_size==30) &
    (t.tag_hidden_size==128) &
    (t.word_hidden_size==64) &
    (t.word_num_layers==2)
]
print(len(fr_best_used))

AttributeError: 'DataFrame' object has no attribute 'decoder_hidden_size'

In [79]:
task2 = pd.read_table("detailed_results_task_2.tsv", names=["track", "eval_type", "train_size", "language", "acc"])
task2.head()

Unnamed: 0,track,eval_type,train_size,language,acc
0,track1,original,high,de,73.21
1,track1,original,high,en,76.23
2,track1,original,high,es,56.1
3,track1,original,high,fi,53.75
4,track1,original,high,fr,67.21


In [111]:
t = task2[task2.eval_type=='original'][['track', 'train_size', 'language', 'acc']]
t.set_index(['language', 'train_size', 'track']).unstack(level=[2, 1])

Unnamed: 0_level_0,acc,acc,acc,acc,acc,acc
track,track1,track1,track1,track2,track2,track2
train_size,high,medium,low,high,medium,low
language,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3
de,73.21,56.83,30.64,64.61,52.17,27.81
en,76.23,66.77,61.33,69.89,64.05,56.9
es,56.1,42.5,29.17,41.65,32.12,27.77
fi,53.75,22.11,10.29,30.24,17.15,8.89
fr,67.21,51.12,26.27,45.42,23.63,9.57
ru,67.67,38.76,21.59,56.73,33.73,19.68
sv,65.64,41.91,26.06,54.26,34.89,22.34


In [112]:
print(t.set_index(['language', 'train_size', 'track']).unstack(level=[2, 1]).to_latex())

\begin{tabular}{lrrrrrr}
\toprule
{} & \multicolumn{6}{l}{acc} \\
track & \multicolumn{3}{l}{track1} & \multicolumn{3}{l}{track2} \\
train\_size &   high & medium &    low &   high & medium &    low \\
language &        &        &        &        &        &        \\
\midrule
de       &  73.21 &  56.83 &  30.64 &  64.61 &  52.17 &  27.81 \\
en       &  76.23 &  66.77 &  61.33 &  69.89 &  64.05 &  56.90 \\
es       &  56.10 &  42.50 &  29.17 &  41.65 &  32.12 &  27.77 \\
fi       &  53.75 &  22.11 &  10.29 &  30.24 &  17.15 &   8.89 \\
fr       &  67.21 &  51.12 &  26.27 &  45.42 &  23.63 &   9.57 \\
ru       &  67.67 &  38.76 &  21.59 &  56.73 &  33.73 &  19.68 \\
sv       &  65.64 &  41.91 &  26.06 &  54.26 &  34.89 &  22.34 \\
\bottomrule
\end{tabular}

