In [1]:
# %matplotlib inline
%load_ext autoreload
%autoreload 2

In [23]:
import glob
import itertools
import sys
import os

import matplotlib
import matplotlib.pyplot as plt
import numpy as np

sys.path.append('..')

from run import Run
from run_db import db as run_db
from run_utils import init_runs, find_runs

tasks = ['dep', 'lmo', 'ner', 'pos']
langs = ['cs', 'de', 'en', 'es']

log_path = '/home/mpikuliak/logs/'

runs = init_runs(log_path, run_db)

np.set_printoptions(precision=2)

In [3]:
def draw_graphs(runs, tasks, langs, role, metric=None, focused=False, label=None):
    
    if label is None:
        label = lambda run: f'{run.name}-{run.type}'
    
    fig, axes = plt.subplots(
        len(tasks),
        len(langs),
        figsize=(5*len(langs), 4*len(tasks)),
        squeeze=False)

    for ax_row, task in zip(axes, tasks):
        for ax, lang in zip(ax_row, langs):
            for run in runs:
                if not focused or run.config['focus_on'] == f'{task}-{lang}':
                    history = run.history(
                        metric=metric,
                        task=task,
                        language=lang,
                        role=role)
                    ax.plot(list(history), label=label(run))

    for ax, lang in zip(axes[0], langs):
        ax.set_title(lang)

    for ax, task in zip(axes[:, 0], tasks):
        ax.set_ylabel(task, rotation=0, size='large')
        
    for ax_row in axes:
        for ax in ax_row:
            ax.legend()

    plt.show()

    
def results(runs, tasks, langs, focused=True, values_only=False):
    
    def get_results(run):
        out = {}
        for task, lang in itertools.product(tasks, langs):
            run.load()
            if (not focused and run.contains(task, lang)) or run.config['focus_on'] == f'{task}-{lang}':
                res, epoch = run.metric_eval(task=task, language=lang)
                if res <= 1.01:
                    res *= 100
                out[(task, lang)] = res, epoch
        return out
    
    out = {
        run: get_results(run)
        for run in runs}
    
    if values_only:
        return np.array([
            list(run.values())[0][0] for run in out.values()
        ])
    else:
        return out
    
def np_results(name, type='all', focused=True):
    return results(
        runs=find_runs(
            runs,
            type=type,
            name=name
        ),
        tasks=tasks,
        langs=langs,
        values_only=True,
        focused=focused
    )

def gen_table(runs, upper, baseline=None):
    metrics = np.vstack([
        np.array(np_results(code, type)) for _, type, code in runs
    ])
    
    if baseline is None:
        baseline = metrics[0]
    aer = (metrics - baseline) / (upper - baseline)
    
    return np.vstack([
        np.mean(metrics[:,:4], axis=1),
        np.mean(metrics[:,4:8], axis=1),
        np.mean(metrics[:,8:], axis=1),
        np.mean(aer, axis=1)*100
    ]).T


def tempo(name, type='all', mean=True):    
    out = np_results(name, type)
    for value in out:
        print(f'{value:.2f} ', end='')
    if mean:
        print(np.round([np.mean(out[:4]), np.mean(out[4:8]), np.mean(out[8:])], decimals=2), end='')
    print()
    
simple_baseline = np_results('normal-training', 'stsl', focused=False)
two_two_baseline = simple_baseline[[11, 9, 11, 0, 6, 3, 0, 6, 7, 8, 7, 8, 8, 3, 2, 2, 6, 10, 9, 0,]]
print(simple_baseline)

[86.51712732 76.41776356 82.4424392  85.47905546 69.05721193 78.51971038
 81.48148148 79.70171514 97.60281313 92.61286751 94.36329058 96.22726861]


In [None]:
two_by_two = results(
    runs=find_runs(
        runs,
        type='var',
        name='zero-shot-two-by-two'
    ),
    tasks=tasks,
    langs=langs,
    values_only=True
)

adv = results(
    runs=find_runs(
        runs,
        type='var',
        name='zero-shot-two-by-two-adversarial'
    ),
    tasks=tasks,
    langs=langs,
    values_only=True
)

for i, val in enumerate(two_by_two):
    print(f'{val:.2f}', end=' ')
    if i % 6 == 5:
        print(f'{adv[i // 6]:.2f}')

In [None]:
tempo('zero-shot-embs', 'rel')
tempo('zero-shot-embs-again', 'rel')



In [None]:
tmp = find_runs(runs, name='zero-shot')
print(tmp[0].data[0])
draw_graphs(tmp, tasks, langs, 'test', metric='loss', focused=True)


In [None]:
tmp = results(
    runs=find_runs(
        runs,
        type='var',
        name='low-resource'
    ),
    tasks=tasks,
    langs=langs,
    values_only=True
)

tmp2 = results(
    runs=find_runs(
        runs,
        type='var',
        name='low-resource-2'
    ),
    tasks=tasks,
    langs=langs,
    values_only=True
)

tmpa = results(
    runs=find_runs(
        runs,
        type='var',
        name='low-resource-advanced'
    ),
    tasks=tasks,
    langs=langs,
    values_only=True
)

adv = iter(tmpa)
baselines = iter(tmp[0::5] + tmp2[0::5])
bsl = next(baselines)

i = 0
for r in tmp:
    print(f'{r/bsl:.2f}', end=' ')
    i += 1
    if i % 5 == 0:
        for _ in range(3):
           print(f'{next(adv)/bsl:.2f}', end=' ')     
        print()
        bsl = next(baselines)
for r in tmp2:
    print(f'{r/bsl:.2f}', end=' ')
    i += 1
    if i % 5 == 0:
        for _ in range(3):
           print(f'{next(adv)/bsl:.2f}', end=' ')    
        print()
        bsl = next(baselines)



In [None]:
tmp = results(
    runs=find_runs(
        runs,
        type='var',
        name='zero-shot-two-by-two'
    ),
    tasks=tasks,
    langs=langs,
    values_only=True
)

tmpa = results(
    runs=find_runs(
        runs,
        type='var',
        name='zero-shot-two-by-two-adversarial'
    ),
    tasks=tasks,
    langs=langs,
    values_only=True
)

baselines = iter(tmp[0::6])
bsl = next(baselines)
adv = iter(tmpa)

i = 0
for r in tmp:
    print(f'{r/bsl:.2f}', end=' ')
    i += 1
    if i % 6 == 0:
        print(f'{next(adv)/bsl:.2f}')   
        bsl = next(baselines)




In [4]:
# Porovnanie CL, CL+CT, CL+Co, CL+CT+Co trenovania pre 4x4 a 2x2 nastavenie.
print(gen_table([
    (12, 'ml-3', 'zero-shot'),
    (12, 'rel', 'zero-shot'),
    (12, 'ml-unrel-12', 'zero-shot'),
    (12, 'all', 'zero-shot'),
    (12, 'all', 'zero-shot-400'),  
], simple_baseline))
                       


mat = np_results('zero-shot-two-by-two', 'var')
mat = np.reshape(mat, (20, 6))[:,:4].T
# print(mat)
mat = (mat - mat[0]) / (two_two_baseline - mat[0])
mat = np.mean(mat, axis=1)
print(mat*100)

[[38.75 49.3  69.05  0.  ]
 [37.38 53.13 77.34 12.62]
 [36.57 47.84 68.95 -3.47]
 [46.25 56.62 84.09 32.4 ]
 [49.5  57.54 84.36 36.86]]
[ 0.    7.01  0.28 18.11]


In [10]:
tabl = gen_table([
    (12, 'ml-3', 'zero-shot'),
    (12, 'all', 'zero-shot-task'),
    (12, 'all', 'zero-shot-lang'),
    (12, 'all', 'zero-shot-task-lang-no-global'), 
    (12, 'all', 'zero-shot-task-lang'),
], simple_baseline)

print(tabl)

mat = np_results('zero-shot-two-by-two', 'var')
mat = np.reshape(mat, (20, 6))[:,[0,-1]].T
mat = (mat - mat[0]) / (two_two_baseline - mat[0])
mat = np.mean(mat, axis=1)
print(mat*100)
                      

[[38.75 49.3  69.05  0.  ]
 [53.19 55.35 86.4  38.7 ]
 [50.7  56.32 78.09 28.17]
 [57.1  56.38 85.14 42.14]
 [57.87 57.55 85.95 44.89]]
[ 0.   13.82]


In [12]:
tabl = gen_table([
    (12, 'ml-3', 'zero-shot'),
    (12, 'all', 'zero-shot-task-emb'),
    (12, 'all', 'zero-shot-lang-emb'),
    (12, 'all', 'zero-shot-embs'),
], simple_baseline)

print(tabl)

mat = np_results('zero-shot-two-by-two', 'var')
mat = np.reshape(mat, (20, 6))[:,[0,-2]].T
mat = (mat - mat[0]) / (two_two_baseline - mat[0])
mat = np.mean(mat, axis=1)
print(mat*100)


[[38.75 49.3  69.05  0.  ]
 [57.79 56.58 84.84 42.51]
 [50.56 56.6  85.32 37.21]
 [58.45 57.54 84.88 44.08]]
[ 0.   25.33]


In [14]:
tabl = gen_table([
    (12, 'ml-3', 'zero-shot'),
    (12, 'all', 'zero-shot-adversarial'),
], simple_baseline)

print(tabl)

mat = np.vstack([
    np.reshape(
        np_results('zero-shot-two-by-two', 'var'), (20, 6)
    )[:,0],
    np_results('zero-shot-two-by-two-adversarial', 'var')
])
mat = (mat - mat[0]) / (two_two_baseline - mat[0])
mat = np.mean(mat, axis=1)
print(mat*100)


[[38.75 49.3  69.05  0.  ]
 [59.03 57.26 84.79 44.13]]
[ 0.  35.5]


In [16]:
tabl = gen_table([
    (12, 'ml-3', 'zero-shot'),
    (12, 'all', 'zero-shot-task-lang'),
    (12, 'all', 'zero-shot-embs-400'),
    ...
], simple_baseline)

print(tabl)

[[38.75 49.3  69.05  0.  ]
 [57.87 57.55 85.95 44.89]
 [60.12 57.26 85.42 45.61]]


In [28]:
tabl = gen_table([
    (12, 'ml-3', 'zero-shot'),
    (12, 'all', 'zero-shot'),
    (12, 'ml-3', 'zero-shot-rotated'),    
    (12, 'all', 'zero-shot-rotated'),
    (12, 'all', 'zero-shot-task-lang-rotated'),
    (12, 'all', 'zero-shot-embs-rotated'),
    (12, 'all', 'zero-shot-rotated-adversarial'),
    (12, 'ml-3', 'zero-shot-char-level'),
    (12, 'all', 'zero-shot-char-level'),
    (12, 'all', 'zero-shot-task-lang-char-level'),        
    (12, 'all', 'zero-shot-embs-char-level'),
    (12, 'all', 'zero-shot-char-level-adversarial'),
], simple_baseline)

for row in tabl:
    for num in row:
        print(' & ', f'{num:.2f}', end='')
    print(' \\\\')

 &  38.75 &  49.30 &  69.05 &  0.00 \\
 &  46.25 &  56.62 &  84.09 &  32.40 \\
 &  2.55 &  0.77 &  13.78 &  -174.87 \\
 &  29.59 &  21.11 &  63.90 &  -57.02 \\
 &  48.63 &  36.95 &  77.15 &  -2.42 \\
 &  44.05 &  20.76 &  63.20 &  -45.16 \\
 &  47.97 &  27.05 &  66.73 &  -27.54 \\
 &  15.07 &  32.00 &  41.62 &  -89.29 \\
 &  40.44 &  38.74 &  70.75 &  -21.02 \\
 &  53.04 &  37.48 &  80.27 &  5.43 \\
 &  48.50 &  37.04 &  73.66 &  -11.23 \\
 &  53.61 &  44.95 &  75.86 &  8.75 \\


In [29]:
tabl = gen_table([
    (12, 'ml-3', 'zero-shot'),
    (12, 'all', 'zero-shot'),
    (12, 'all', 'zero-shot-limited-task-200'),
    (12, 'all', 'zero-shot-task-lang-limited-task-200'),
    (12, 'all', 'zero-shot-embs-limited-task-200'),
    (12, 'all', 'zero-shot-adversarial-limited-task-200'),
    (12, 'all', 'zero-shot-limited-lang-200'),
    (12, 'all', 'zero-shot-task-lang-limited-lang-200'),
    (12, 'all', 'zero-shot-embs-limited-lang-200'),
    (12, 'all', 'zero-shot-adversarial-limited-lang-200'),
], simple_baseline)

for row in tabl:
    for num in row:
        print(' & ', f'{num:.2f}', end='')
    print(' \\\\')

[('deepnet5', 501), ('deepnet2070', 608), ('deepnet6-1', 36), ('deepnet6-2', 24)]
 &  38.75 &  49.30 &  69.05 &  0.00 \\
 &  46.25 &  56.62 &  84.09 &  32.40 \\
 &  43.20 &  51.10 &  81.59 &  19.60 \\
 &  47.37 &  50.73 &  82.17 &  21.77 \\
 &  50.76 &  51.22 &  81.46 &  25.27 \\
 &  52.93 &  50.86 &  80.87 &  25.23 \\
 &  45.89 &  48.37 &  81.61 &  17.56 \\
 &  48.60 &  46.92 &  81.55 &  16.70 \\
 &  51.95 &  47.54 &  80.71 &  20.27 \\
 &  55.46 &  54.32 &  84.12 &  36.74 \\


In [None]:
test = [
        (12, 'all', 'zero-shot-task-lang-ortho-50-again'),
        (12, 'all', 'zero-shot-task-lang-ortho-50'),
        (12, 'rel', 'zero-shot-rel-again'),
        (12, 'rel', 'zero-shot'),
        (12, 'all', 'zero-shot-adv-again'), 
        (12, 'all', 'zero-shot-adversarial'), 
]

mat = np.vstack([
    np.array(np_results(name, type)) for _, type, name in test
])

aer = (mat - mat[0]) / (100 - mat[0])
aer = np.mean(aer, axis=1)

agr = np.vstack([
  np.mean(mat[:,:4], axis=1),
  np.mean(mat[:,4:8], axis=1),
  np.mean(mat[:,8:], axis=1),
  aer,
]).T
print(agr)