In [17]:
import os
from os import path
import glob
import pandas as pd
import json
from collections import defaultdict

In [18]:
perf_dir = "/share/data/speech/shtoshni/research/events/models/perf/"
slurm_id = "5708281"

files = sorted(glob.glob(path.join(perf_dir, slurm_id + "*")), key=lambda x: int(path.splitext(path.basename(x))[0].split('_')[1]))
print(len(files))

24


In [19]:
model_dict_list = []
for file in files:
    model_dict_list.append(json.loads(open(file).read()))

In [20]:
def determine_varying_attributes(model_dict_list, ignore_attribs=['model_dir', 'train', 'test', 'dev', 
                                                                  'slurm_id', 'best_model_dir', 'data_dir']):
    attrib_to_vals = defaultdict(set)
    for model_dict in model_dict_list:
        for attrib, val in model_dict.items():
            if attrib in ignore_attribs:
                continue
            else:
                attrib_to_vals[attrib].add(val)
    
    
    varying_attribs = []
    for attrib, vals in attrib_to_vals.items():
        if len(vals) > 1:
            varying_attribs.append(attrib)
            
    return varying_attribs

In [21]:
varying_attribs = determine_varying_attributes(model_dict_list)
perf_attribs = ['Entity', 'Event', 'Joint']

In [22]:
perf_df = pd.DataFrame(columns=(varying_attribs + perf_attribs))

for model_dict in model_dict_list:
    perf_dict = {}
    for attrib in varying_attribs:
        perf_dict[attrib] = model_dict[attrib]
        
    
    for perf_attrib in perf_attribs:
        if perf_attrib.lower() in model_dict['dev']:
            perf_dict[perf_attrib] = round(model_dict['dev'][perf_attrib.lower()]['fscore'], 1)
#             print(model_dict['dev'][perf_attrib.lower()]['fscore'])
#             print(model_dict['dev'][perf_attrib.lower()])
        else:
            perf_dict[perf_attrib] = '-'
                
    perf_df = perf_df.append(perf_dict, ignore_index=True)

In [23]:
print(perf_df.to_markdown()) 


|    | model_size   | proc_strategy   | focus_group   |   ft_lr |   Entity | Event   | Joint   |
|---:|:-------------|:----------------|:--------------|--------:|---------:|:--------|:--------|
|  0 | base         | duplicate       | joint         |   2e-05 |     73.3 | 53.0    | 64.6    |
|  1 | base         | duplicate       | joint         |   3e-05 |     73.7 | 56.4    | 66.1    |
|  2 | base         | duplicate       | joint         |   5e-05 |     73.6 | 56.7    | 65.9    |
|  3 | base         | default         | joint         |   2e-05 |     72.7 | 55.2    | 64.7    |
|  4 | base         | default         | joint         |   3e-05 |     73.3 | 54.7    | 64.8    |
|  5 | base         | default         | joint         |   5e-05 |     74   | 55.3    | 65.5    |
|  6 | large        | duplicate       | joint         |   2e-05 |     74.5 | 59.4    | 67.1    |
|  7 | large        | duplicate       | joint         |   3e-05 |     72.1 | 61.2    | 65.8    |
|  8 | large        | duplicat

In [24]:
print(perf_df.to_csv()) 


,model_size,proc_strategy,focus_group,ft_lr,Entity,Event,Joint
0,base,duplicate,joint,2e-05,73.3,53.0,64.6
1,base,duplicate,joint,3e-05,73.7,56.4,66.1
2,base,duplicate,joint,5e-05,73.6,56.7,65.9
3,base,default,joint,2e-05,72.7,55.2,64.7
4,base,default,joint,3e-05,73.3,54.7,64.8
5,base,default,joint,5e-05,74.0,55.3,65.5
6,large,duplicate,joint,2e-05,74.5,59.4,67.1
7,large,duplicate,joint,3e-05,72.1,61.2,65.8
8,large,duplicate,joint,5e-05,72.6,53.5,64.1
9,large,default,joint,2e-05,73.3,61.5,67.0
10,large,default,joint,3e-05,74.5,58.9,66.9
11,large,default,joint,5e-05,74.1,55.8,65.8
12,base,duplicate,entity,2e-05,72.7,-,-
13,base,duplicate,entity,3e-05,74.1,-,-
14,base,duplicate,entity,5e-05,72.9,-,-
15,base,default,entity,2e-05,73.3,-,-
16,base,default,entity,3e-05,73.2,-,-
17,base,default,entity,5e-05,71.3,-,-
18,large,duplicate,entity,2e-05,73.9,-,-
19,large,duplicate,entity,3e-05,73.3,-,-
20,large,duplicate,entity,5e-05,73.4,-,-
21,large,default,entity,2e-05,73.6,-,-
22,large,default,