In [121]:
import os
from os import path
import glob
import pandas as pd
import json
from collections import defaultdict

In [122]:
perf_dir = "/share/data/speech/shtoshni/research/events/models/perf/"
slurm_id = "5691508"

files = sorted(glob.glob(path.join(perf_dir, slurm_id + "*")), key=lambda x: int(path.splitext(path.basename(x))[0].split('_')[1]))
print(len(files))

16


In [123]:
model_dict_list = []
for file in files:
    model_dict_list.append(json.loads(open(file).read()))

In [124]:
def determine_varying_attributes(model_dict_list, ignore_attribs=['model_dir', 'train', 'test', 'dev', 
                                                                  'slurm_id', 'best_model_dir']):
    attrib_to_vals = defaultdict(set)
    for model_dict in model_dict_list:
        for attrib, val in model_dict.items():
            if attrib in ignore_attribs:
                continue
            else:
                attrib_to_vals[attrib].add(val)
    
    
    varying_attribs = []
    for attrib, vals in attrib_to_vals.items():
        if len(vals) > 1:
            varying_attribs.append(attrib)
            
    return varying_attribs

In [125]:
varying_attribs = determine_varying_attributes(model_dict_list)
perf_attribs = ['Entity', 'Event', 'Joint']

In [130]:
perf_df = pd.DataFrame(columns=(varying_attribs + perf_attribs))

for model_dict in model_dict_list:
    perf_dict = {}
    for attrib in varying_attribs:
        perf_dict[attrib] = model_dict[attrib]
        
    
    for perf_attrib in perf_attribs:
        if perf_attrib.lower() in model_dict['dev']:
            perf_dict[perf_attrib] = round(model_dict['dev'][perf_attrib.lower()]['fscore'], 1)
#             print(model_dict['dev'][perf_attrib.lower()]['fscore'])
#             print(model_dict['dev'][perf_attrib.lower()])
        else:
            perf_dict[perf_attrib] = '-'
                
    perf_df = perf_df.append(perf_dict, ignore_index=True)

In [131]:
print(perf_df.to_markdown()) 


|    | model_size   | focus_group   | use_srl   | label_smoothing_other   | Entity   | Event   | Joint   |
|---:|:-------------|:--------------|:----------|:------------------------|:---------|:--------|:--------|
|  0 | base         | joint         |           | True                    | 71.8     | 57.2    | 64.8    |
|  1 | base         | joint         | joint     | True                    | 71.2     | 57.4    | 64.5    |
|  2 | base         | joint         |           | False                   | 74.9     | 52.6    | 65.4    |
|  3 | base         | joint         | joint     | False                   | 72.7     | 55.0    | 64.9    |
|  4 | large        | joint         |           | True                    | 72.8     | 56.4    | 65.1    |
|  5 | large        | joint         | joint     | True                    | 73.2     | 56.9    | 65.2    |
|  6 | large        | joint         |           | False                   | 74.3     | 60.2    | 66.7    |
|  7 | large        | joint         |

In [128]:
print(perf_df.to_csv()) 


,model_size,focus_group,use_srl,label_smoothing_other,Entity,Event,Joint
0,base,joint,,True,71.78,57.24,64.81
1,base,joint,joint,True,71.19,57.4,64.5
2,base,joint,,False,74.87,52.59,65.42
3,base,joint,joint,False,72.72,55.03,64.9
4,large,joint,,True,72.77,56.39,65.06
5,large,joint,joint,True,73.15,56.92,65.21
6,large,joint,,False,74.33,60.22,66.71
7,large,joint,joint,False,75.33,55.36,66.88
8,base,entity,,True,74.63,-,-
9,base,entity,,False,72.83,-,-
10,large,entity,,True,73.18,-,-
11,large,entity,,False,72.77,-,-
12,base,event,,True,-,51.29,-
13,base,event,,False,-,51.05,-
14,large,event,,True,-,52.4,-
15,large,event,,False,-,53.31,-

