In [1]:
import wandb
import matplotlib.pyplot as plt
import scipy
import numpy as np
import json
from IPython.display import clear_output

plt.rcParams["axes.grid"] = False

import sys
sys.path.append('..')

In [2]:
language = 'french'

sys.argv = [
"--device cuda",
"--data-folder", "..\\data",
"--seed", "13",
"--configuration", "char-to-char-encoder-decoder",
"--language", language,
"--challenge", "post-ocr-correction"]

In [3]:
# Configure container:
from dependency_injection.ioc_container import IocContainer

container = IocContainer()

In [4]:
entity = 'eval-historical-texts'
project = 'post-ocr-correction'

In [5]:
def get_wandb_runs():
    api = wandb.Api()
    runs = api.runs(path=f'{entity}/{project}', filters={
        'createdAt': {
            '$gt': '20200622000000'
        },
        'state': 'finished'
#         'state': {
#             '$ne': 'running'
#         }
    })

    return runs

In [6]:
runs = get_wandb_runs()
print(f'Loaded {len(runs)} runs')

Loaded 85 runs


In [7]:
def get_summary_value(run, key: str):
    if key not in run.summary.keys():
        return None

    return run.summary[key]

pretrained_model_type_key = 'pretrained_model_type'
fine_tune_key = 'fine_tune'

for i, run in enumerate(runs):
    if 'pretrained_model_type' in run.config.keys():
        continue
 
    clear_output(wait=True)
    print(f'Updating... ({i}/{len(runs)})')
    if len(run.config.keys()) == 0:# or fine_tune_key in run.config.keys() or pretrained_model_type_key in run.config.keys():
        continue

    if 'pretrained_model_type' in run.config.keys():
        continue

    # if 'pretrained_model_type' not in run.config.keys():
    include_pretrained_model = run.config['include_pretrained_model']
    include_fasttext_model = run.config['include_fasttext_model']
    fine_tune_pretrained = run.config['fine_tune_pretrained']
    fine_tune_after_convergence = run.config['fine_tune_after_convergence']

    pretr_type = 'none'
    if include_pretrained_model and include_fasttext_model:
        pretr_type = 'both'
    elif include_pretrained_model:
        pretr_type = 'bert'
    elif include_fasttext_model:
        pretr_type = 'fast-text'

    fine_tune = (fine_tune_pretrained or fine_tune_after_convergence)
    fine_tune_type = 'none'
    if fine_tune_pretrained:
        fine_tune_type = 'from-start'
    elif fine_tune_after_convergence:
        fine_tune_type = 'after-convergence'
    
    print('Updating configuration...')
    run.config.update({
        'pretrained_model_type': pretr_type,
        'fine_tune': fine_tune,
        'fine_tune_type': fine_tune_type
    })


    # all_scores = [
    #     get_summary_value(run, 'Best - f1-score-micro-partial-all-component'),
    #     get_summary_value(run, 'Best - f1-score-micro-partial-all-literal-coarse'),
    #     get_summary_value(run, 'Best - f1-score-micro-partial-all-literal-fine'),
    #     get_summary_value(run, 'Best - f1-score-micro-partial-all-metonymic-coarse'),
    #     get_summary_value(run, 'Best - f1-score-micro-partial-all-metonymic-fine'),
    #     get_summary_value(run, 'Best - f1-score-micro-partial-all-nested')
    # ]

    # all_scores = [x for x in all_scores if x is not None]
    # avg_score = np.mean(all_scores)
        
    # print('Updating summary...')
    # run.summary.update({
    #     'average_score': avg_score
    # })

    run.update()

Updating... (16/85)
Updating configuration...


In [8]:
updated_runs = get_wandb_runs()
print(f'Loaded {len(updated_runs)} runs')


Loaded 85 runs


In [9]:
convergence_speed_per_lang_and_pretr_type = {}

for run in updated_runs:
        
    pretr_type = run.config[pretrained_model_type_key]
    fine_tune_type = run.config['fine_tune_type']
    model_key = f'{pretr_type} [{fine_tune_type}]'
    
    if model_key not in convergence_speed_per_lang_and_pretr_type.keys():
        convergence_speed_per_lang_and_pretr_type[model_key] = {}
        
        
    language = run.config['language']
    if language not in convergence_speed_per_lang_and_pretr_type[model_key].keys():
        convergence_speed_per_lang_and_pretr_type[model_key][language] = []
        
    runtime_minutes = float(run.summary['_runtime']) / 60
    
    convergence_speed_per_lang_and_pretr_type[model_key][language].append(runtime_minutes)

In [10]:
languages = ['english', 'french', 'german']
for pretr_type, convergence_speed_per_lang in convergence_speed_per_lang_and_pretr_type.items():
    print(f'{pretr_type} | ', end='')
    
    for lang in languages:
        times = convergence_speed_per_lang[lang]
#         print(lang)
        print(f'{round(np.mean(times), 3)} & ', end='')
#         print('')
#         print(times)
        
    print('')

bert [after-convergence] | 990.149 & 1277.19 & 2639.135 & 
both [after-convergence] | 1087.442 & 1473.291 & 2393.629 & 
bert [from-start] | 1218.266 & 1292.461 & 3153.515 & 
both [none] | 659.277 & 1037.03 & 1873.405 & 
bert [none] | 710.549 & 1070.933 & 1593.157 & 
both [from-start] | 1300.52 & 1529.498 & 3064.282 & 
none [none] | 475.882 & 662.735 & 1560.595 & 
fast-text [none] | 475.494 & 735.93 & 1481.484 & 
