In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import yaml
import json

In [2]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [3]:
def load_yaml(path):
    with open(path, 'r') as f:
        return yaml.safe_load(f)
def load_json(path):
    with open(path, 'r') as f:
        return json.load(f)

In [4]:
model_path = '../models/'

In [5]:
config_list = []
metric_list = []

for f in os.listdir(model_path):
    subdir = model_path + f + '/'
    config_path = subdir + 'config.yaml'
    metrics_path = subdir + 'metrics.json'
    if not (os.path.exists(config_path) and os.path.exists(metrics_path)):
        continue
    config_list.append(load_yaml(config_path))
    metric_list.append(load_json(metrics_path))

In [6]:
def expand_config_item(config_item):
    expanded = {}
    expanded['input_padding'] = config_item['data']['input_padding']
    expanded['train_batch_size'] = config_item['loader']['train']['batch_size']
    expanded['n_beams'] = config_item['metrics']['n_beams']
    for k, v in config_item['model_args'].items():
        expanded[k] = v
    expanded['optimizer'] = config_item['optimizer']['type']
    for k, v in config_item['optimizer']['opt_args'].items():
        expanded[k] = v
#     handle all model args
#     handle all opt args
    expanded['n_warmup_steps'] = config_item['scheduler']['n_warmup_steps']
    expanded['nb_epochs'] = config_item['scheduler']['nb_epochs']
    return expanded

In [7]:
config_list = [expand_config_item(c) for c in config_list]
config_df = pd.DataFrame.from_dict(config_list)
drop_cols = []
for c in list(config_df):
    if config_df[c].nunique()==1:
        drop_cols.append(c)
config_df.drop(drop_cols, axis=1, inplace=True)

In [8]:
def get_nice_metrics(metric_list):
    correct = pd.DataFrame.from_dict([l['correct'] for l in metric_list])
    beam = pd.DataFrame.from_dict([{
        'beam_0_product' : l['beam_accuracy']['correct_product']['0'],
        'beam_0_factorization' : l['beam_accuracy']['correct_factorization']['0'],
        'beam_1_product' : l['beam_accuracy']['correct_product']['1'],
        'beam_1_factorization' : l['beam_accuracy']['correct_factorization']['1']
    } for l in metric_list])
    
    return [correct, beam]

In [9]:
# metric_list[0]

In [10]:
merged = pd.concat([config_df] + get_nice_metrics(metric_list), axis=1)

In [11]:
merged

Unnamed: 0,input_padding,shared_embeddings,correct_product,correct_factorization,beam_0_product,beam_0_factorization,beam_1_product,beam_1_factorization
0,pad,True,0.999923,0.283706,0.999846,0.215718,0.272797,0.067988
1,pad,False,0.999923,0.280633,0.99977,0.215487,0.262887,0.065146
2,zeros,True,0.99977,0.279097,0.999616,0.215564,0.25774,0.063532
3,zeros,False,0.999846,0.277406,0.999616,0.215487,0.25459,0.061919
