In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib
import numpy as np
import os
import yaml
import shutil
from collections import defaultdict

matplotlib.style.use('seaborn-notebook')
matplotlib.pyplot.rcParams['figure.figsize'] = (16, 10)
matplotlib.pyplot.rcParams['font.family'] = 'sans-serif'

%matplotlib inline
%load_ext autoreload
%autoreload 2

# Task1

In [2]:
%time
exp_dir = "../exps/task1/two_headed_best/"
dev_outputs = defaultdict(list)

def load_exps(exp_dir):
    experiments = []
    for subdir in os.scandir(exp_dir):
        dev_fn = os.path.join(subdir.path, "dev.out")
        if not os.path.exists(dev_fn):
            print("No dev.out: {}".format(subdir.path))
            continue
        config_fn = os.path.join(subdir.path, "config.yaml")
        with open(config_fn) as f:
            config = yaml.load(f)
        config['config_language'] = config['language']
        config['language'] = '-'.join(config['dev_file'].split('/')[-1].split('-')[:-1])
        config['train_size'] = config['train_file'].split('/')[-1].split('-')[-1]
        exp_d = config
        dev_output = pd.read_table(dev_fn, names=["lemma", "inflected", "tags"])
        if len(dev_output['inflected']) == 0:
            print(subdir.path)
        
        dev_outputs[(config['language'], config['train_size'])].append(dev_output)
        
        dev_acc_fn = os.path.join(subdir.path, "dev.word_accuracy")
        if not os.path.exists(dev_acc_fn):
            print("Dev accuracy file does not exist in dir: {}".format(subdir.path))
        else:
            with open(dev_acc_fn) as f:
                exp_d['dev_acc'] = float(f.read())
        experiments.append(exp_d)
    experiments = pd.DataFrame(experiments)
    return experiments
        
experiments = load_exps(exp_dir)
sum(len(v) for v in dev_outputs.values())

CPU times: user 0 ns, sys: 0 ns, total: 0 ns
Wall time: 2.86 µs
No dev.out: ../exps/task1/two_headed_best/3630


4364

In [3]:
gold = {}

for fn in os.scandir("../data/conll2018/task1/all"):
    if not fn.path.endswith('-dev'):
        continue
    language = '-'.join(fn.name.split('-')[:-1])
    with open(fn.path) as f:
        inflected = [l.strip().split('\t')[1] for l in f]
        gold[language] = pd.Series(inflected)

for fn in os.scandir("../data/conll2018/task1/surprise"):
    if not fn.path.endswith('-dev'):
        continue
    language = '-'.join(fn.name.split('-')[:-1])
    with open(fn.path) as f:
        inflected = [l.strip().split('\t')[1] for l in f]
        gold[language] = pd.Series(inflected)

In [4]:
merged = {}
for (lang, size), outputs in dev_outputs.items():
    output = pd.concat(outputs, axis=1)
    merged[(lang, size)] = pd.concat(outputs, axis=1)['inflected']
    
merged[('latin', 'high')].head()

Unnamed: 0,inflected,inflected.1,inflected.2,inflected.3,inflected.4,inflected.5,inflected.6,inflected.7,inflected.8,inflected.9,inflected.10,inflected.11,inflected.12,inflected.13,inflected.14
0,ovillī,ovillī,ovillī,ovillī,ovillī,ovillī,ovillī,ovillī,ovillī,obellī,ovillī,ovillī,ovillī,ovillī,ovillī
1,trȳgōnibus,trȳgōnibus,trygōnibus,trygōnibus,trȳgōnibus,trȳgōnibus,trȳgōnibus,trȳginibus,trȳgōnibus,torgīs,trȳgōnibus,trygonibus,trȳgīs,trȳgōnibus,trygōnibus
2,largīvissētis,largīvissētis,largīvissētis,largīvissētis,largīvissētis,largissētis,largīvissētis,largīvissētis,largīvissētis,largīvissētis,largīvissētis,largīvissētis,largīvissētis,largīvissētis,largīvissētis
3,compensāverit,compensāverit,compenserit,compensāverit,compensāverit,compenserit,compenserit,compensāverit,compensāverit,compensāverit,compensāverit,compensāverit,compensāverit,compensāverit,compenserit
4,comperīrer,comperiārer,comperīrer,comperiārer,comperīrer,comperīrer,comperierer,comperīrer,comperiēbar,comperīrer,comperīrer,comperiārer,comperīrer,comperīrer,comperīrer


In [5]:
results = []
lang = 'french'

for (lang, size), outputs in merged.items():
    d = {'language': lang, 'train_size': size}
    for i in range(outputs.shape[1]):
        #print(lang, size, outputs.iloc[:, i].shape, gold[lang].shape)
        acc = (outputs.iloc[:, i] == gold[lang]).mean()
        d[i] = acc
    d['majority'] = (outputs.mode(axis=1).iloc[:, 0] == gold[lang]).mean()
    results.append(d)

In [6]:
dev_results = pd.DataFrame(results)

In [7]:
dev_results['majority_wins'] = dev_results[dev_results.columns[2:-1]].max(axis=1) < dev_results.majority

In [8]:
dev_results[dev_results['train_size']=='low'].majority_wins.value_counts()
dev_results.majority_wins.value_counts()

False    226
True      65
Name: majority_wins, dtype: int64

# Submission 1

Model with the highest dev acc, no majority voting

In [9]:
output_dir = "../submissions/task1/01/task1"
for (lang, train_size), idx in experiments.groupby(['language', 'train_size']).dev_acc.idxmax().iteritems():
    exp = experiments.loc[idx]
    test_output = os.path.join(exp.experiment_dir, "test.out")
    submission_path = os.path.join(output_dir, "{}-{}-out".format(lang, train_size))
    shutil.copy2(test_output, submission_path)

# Submission 2

Simple majority vote

In [10]:
output_dir = "../submissions/task1/02/task1"
for (lang, train_size), group in experiments.groupby(['language', 'train_size']).groups.items():
    all_outputs = None
    for i, idx in enumerate(group):
        exp = experiments.loc[idx]
        test_fn = os.path.join(exp.experiment_dir, "test.out")
        test_output = pd.read_table(test_fn, names=['lemma', i, 'tags'])
        if all_outputs is None:
            all_outputs = test_output[[i]]
        else:
            all_outputs = pd.concat((all_outputs, test_output[[i]]), axis=1)
    majority = pd.concat((test_output[['lemma']], all_outputs.mode(axis=1)[0], test_output[['tags']]), axis=1)
    output_fn = os.path.join(output_dir, "{}-{}-out".format(lang, train_size))
    majority.to_csv(output_fn, index=False, header=False, sep="\t", na_rep='nan')

# Submission 3

batch size = 16

In [11]:
%time
exp_dir = "../exps/task1/two_headed_best/"

def load_exps(exp_dir):
    experiments = []
    for subdir in os.scandir(exp_dir):
        dev_fn = os.path.join(subdir.path, "dev.batch16.out")
        if not os.path.exists(dev_fn):
            print("No dev.out: {}".format(subdir.path))
            continue
        config_fn = os.path.join(subdir.path, "config.yaml")
        with open(config_fn) as f:
            config = yaml.load(f)
        config['config_language'] = config['language']
        config['language'] = '-'.join(config['dev_file'].split('/')[-1].split('-')[:-1])
        config['train_size'] = config['train_file'].split('/')[-1].split('-')[-1]
        exp_d = config
        dev_acc_fn = os.path.join(subdir.path, "dev.batch16.word_accuracy")
        if not os.path.exists(dev_acc_fn):
            print("Dev accuracy file does not exist in dir: {}".format(subdir.path))
        else:
            with open(dev_acc_fn) as f:
                exp_d['dev_acc'] = float(f.read())
        experiments.append(exp_d)
    experiments = pd.DataFrame(experiments)
    return experiments
        
experiments = load_exps(exp_dir)
sum(len(v) for v in dev_outputs.values())

CPU times: user 0 ns, sys: 0 ns, total: 0 ns
Wall time: 2.86 µs


4364

In [12]:
output_dir = "../submissions/task1/03/task1"
for (lang, train_size), idx in experiments.groupby(['language', 'train_size']).dev_acc.idxmax().iteritems():
    exp = experiments.loc[idx]
    test_output = os.path.join(exp.experiment_dir, "test.out")
    submission_path = os.path.join(output_dir, "{}-{}-out".format(lang, train_size))
    shutil.copy2(test_output, submission_path)

# Task2 - Track 1

In [13]:
exp_dirs = ["../exps/task2/track1/final", "../exps/task2/track1/default"]
task2_track1 = []

for exp_dir in exp_dirs:
    for subdir in os.scandir(exp_dir):
        config_fn = os.path.join(subdir.path, "config.yaml")
        with open(config_fn) as f:
            config = yaml.load(f)
        exp_d = config
        exp_d['language'] = config['train_file'].split('/')[-1].split('-')[0]
        exp_d['train_size'] = config['train_file'].split('/')[-1].split('-')[2]
        dev_acc_fn = os.path.join(subdir.path, "dev.word_accuracy")
        if not os.path.exists(dev_acc_fn):
            print("Dev accuracy file does not exist in {}".format(subdir.path))
        else:
            with open(dev_acc_fn) as f:
                exp_d['dev_acc'] = float(f.read())
        test_out_fn = os.path.join(subdir.path, "test.out")
        if not os.path.exists(test_out_fn):
            print("Test output file does not exist in {}".format(subdir.path))
        else:
            exp_d['test_output_path'] = test_out_fn
            
        task2_track1.append(exp_d)
        
task2_track1 = pd.DataFrame(task2_track1)

In [14]:
task2_track1.groupby(['language', 'train_size']).dev_acc.max().to_frame()

Unnamed: 0_level_0,Unnamed: 1_level_0,dev_acc
language,train_size,Unnamed: 2_level_1
de,high,0.741742
de,low,0.27027
de,medium,0.624625
en,high,0.732558
en,low,0.545309
en,medium,0.610666
es,high,0.551146
es,low,0.229718
es,medium,0.394621
fi,high,0.46252


In [15]:
# for row in task2_track1[task2_track1.language != 'en'].iterrows():
#     expdir = row[1].experiment_dir
#     os.remove(os.path.join(expdir, 'dev.word_accuracy'))
#     os.remove(os.path.join(expdir, 'dev.out'))
#     os.remove(os.path.join(expdir, 'test.out'))

In [16]:
# for row in task2_track2[task2_track2.language != 'en'].iterrows():
#     expdir = row[1].experiment_dir
#     os.remove(os.path.join(expdir, 'dev.word_accuracy'))
#     os.remove(os.path.join(expdir, 'dev.out'))
#     os.remove(os.path.join(expdir, 'test.out'))

## Submission - there is only one

choose the one with the highest dev acc

In [17]:
submission_dir = "../submissions/task2/01/task2"
for (language, train_size), idx in task2_track1.groupby(['language', 'train_size']).dev_acc.idxmax().iteritems():
    experiment = task2_track1.loc[idx]
    target_name = "{}-1-{}-out".format(language, train_size)
    target_fn = os.path.join(submission_dir, target_name)
    shutil.copy2(experiment.test_output_path, target_fn)

In [18]:
exp_dirs = ["../exps/task2/track2/default/"]
task2_track2 = []

for exp_dir in exp_dirs:
    for subdir in os.scandir(exp_dir):
        config_fn = os.path.join(subdir.path, "config.yaml")
        with open(config_fn) as f:
            config = yaml.load(f)
        exp_d = config
        exp_d['language'] = config['train_file'].split('/')[-1].split('-')[0]
        exp_d['train_size'] = config['train_file'].split('/')[-1].split('-')[2]
        dev_acc_fn = os.path.join(subdir.path, "dev.word_accuracy")
        if not os.path.exists(dev_acc_fn):
            print("Dev accuracy file does not exist in {}".format(subdir.path))
        else:
            with open(dev_acc_fn) as f:
                exp_d['dev_acc'] = float(f.read())
        test_out_fn = os.path.join(subdir.path, "test.out")
        if not os.path.exists(test_out_fn):
            print("Test output file does not exist in {}".format(subdir.path))
        else:
            exp_d['test_output_path'] = test_out_fn
            
        task2_track2.append(exp_d)
        
task2_track2 = pd.DataFrame(task2_track2)

In [19]:
task2_track2.groupby(['language', 'train_size']).dev_acc.max().to_frame()

Unnamed: 0_level_0,Unnamed: 1_level_0,dev_acc
language,train_size,Unnamed: 2_level_1
de,high,0.594595
de,low,0.264264
de,medium,0.531532
en,high,0.652366
en,low,0.511628
en,medium,0.563352
es,high,0.380511
es,low,0.24515
es,medium,0.284832
fi,high,0.287613


In [20]:
submission_dir = "../submissions/task2/01/task2"
for (language, train_size), idx in task2_track2.groupby(['language', 'train_size']).dev_acc.idxmax().iteritems():
    experiment = task2_track2.loc[idx]
    target_name = "{}-2-{}-out".format(language, train_size)
    target_fn = os.path.join(submission_dir, target_name)
    shutil.copy2(experiment.test_output_path, target_fn)

# Sanity checks

## Task1

1. Is every submission file the same length as the input?
2. Does every line have 3 fields?
3. Do the inputs match?

In [21]:
test_dir = "../data/conll2018/task1/all"
submission_dir = "../submissions/task1"

for submission_no in os.listdir(submission_dir):
    for subm_file in os.scandir(os.path.join(submission_dir, submission_no, 'task1')):
        language = '-'.join(subm_file.name.split('-')[:-2])
        test_fn = os.path.join(test_dir, '{}-covered-test'.format(language))
        if not os.path.exists(test_fn):
            test_fn = os.path.join(test_dir, "..", "surprise", '{}-covered-test'.format(language))
        with open(test_fn) as input_f, open(subm_file) as output_f:
            for inp_line in input_f:
                outp_line = next(output_f)
                infd = inp_line.strip().split('\t')
                outfd = outp_line.strip().split('\t')
                try:
                    assert len(outfd) == 3
                    assert infd[0] == outfd[0]
                    assert infd[-1] == outfd[-1]
                except AssertionError:
                    print(submission_no, subm_file.path, infd, outfd)

## Task2

In [22]:
test_dir = "../data/conll2018/task2/testsets/"
submission_dir = "../submissions/task2"

for submission_no in os.listdir(submission_dir):
    for subm_file in os.scandir(os.path.join(submission_dir, submission_no, 'task2')):
        language = subm_file.name.split('-')[0]
        track = subm_file.name.split('-')[1]
        test_fn = os.path.join(test_dir, '{}-track{}-covered'.format(language, track))
        with open(test_fn) as input_f, open(subm_file) as output_f:
            for inp_line in input_f:
                outp_line = next(output_f)
                if not inp_line.strip():
                    assert not outp_line.strip()
                    continue
                infd = inp_line.strip().split('\t')
                outfd = outp_line.strip().split('\t')
                if track == '1':
                    try:
                        assert len(outfd) == 3
                        assert infd[2] == outfd[2]
                    except AssertionError:
                        print(inp_line)
                        print(outp_line)
                        print(submission_no, subm_file.path, infd, outfd)
                elif track == '2':
                    pass
                else:
                    raise ValueError("unknown track: {}".format(track))

# Create submission archives

## Task1

In [23]:
%%bash

for sub in $( ls ../submissions/task1); do
    tar_name="BME-HAS-$sub-1.tgz"
    cd ../submissions/task1/$sub
    tar czf $tar_name task1
    mv $tar_name ../../tgz
    cd -
done

/mnt/permanent/home/judit/projects/sigmorphon2018/notebooks
/mnt/permanent/home/judit/projects/sigmorphon2018/notebooks
/mnt/permanent/home/judit/projects/sigmorphon2018/notebooks


In [24]:
%%bash

for sub in $( ls ../submissions/task2); do
    tar_name="BME-HAS-$sub-2.tgz"
    cd ../submissions/task2/$sub
    tar czf $tar_name task2
    mv $tar_name ../../tgz
    cd -
done

/mnt/permanent/home/judit/projects/sigmorphon2018/notebooks
