In [25]:
import os
from os import path
import glob
import pandas as pd
import json
from collections import defaultdict

In [26]:
perf_dir = "/share/data/speech/shtoshni/research/events/models/perf/"
slurm_id = "5708785"

files = sorted(glob.glob(path.join(perf_dir, slurm_id + "*")), key=lambda x: int(path.splitext(path.basename(x))[0].split('_')[1]))
print(len(files))

16


In [27]:
model_dict_list = []
for file in files:
    model_dict_list.append(json.loads(open(file).read()))

In [28]:
def determine_varying_attributes(model_dict_list, ignore_attribs=['model_dir', 'train', 'test', 'dev', 
                                                                  'slurm_id', 'best_model_dir', 'data_dir']):
    attrib_to_vals = defaultdict(set)
    for model_dict in model_dict_list:
        for attrib, val in model_dict.items():
            if attrib in ignore_attribs:
                continue
            else:
                attrib_to_vals[attrib].add(val)
    
    
    varying_attribs = []
    for attrib, vals in attrib_to_vals.items():
        if len(vals) > 1:
            varying_attribs.append(attrib)
            
    return varying_attribs

In [29]:
varying_attribs = determine_varying_attributes(model_dict_list)
perf_attribs = ['Entity', 'Event', 'Joint']

In [30]:
perf_df = pd.DataFrame(columns=(varying_attribs + perf_attribs))

for model_dict in model_dict_list:
    perf_dict = {}
    for attrib in varying_attribs:
        perf_dict[attrib] = model_dict[attrib]
        
    
    for perf_attrib in perf_attribs:
        if perf_attrib.lower() in model_dict['dev']:
            perf_dict[perf_attrib] = round(model_dict['dev'][perf_attrib.lower()]['fscore'], 1)
#             print(model_dict['dev'][perf_attrib.lower()]['fscore'])
#             print(model_dict['dev'][perf_attrib.lower()])
        else:
            perf_dict[perf_attrib] = '-'
                
    perf_df = perf_df.append(perf_dict, ignore_index=True)

In [31]:
print(perf_df.to_markdown()) 


|    | model_size   | proc_strategy   | focus_group   |   ft_lr |   Entity | Event   | Joint   |
|---:|:-------------|:----------------|:--------------|--------:|---------:|:--------|:--------|
|  0 | base         | duplicate       | joint         |   2e-05 |     73.4 | 55.5    | 65.2    |
|  1 | base         | duplicate       | joint         |   3e-05 |     72.6 | 56.1    | 65.1    |
|  2 | base         | default         | joint         |   2e-05 |     75.1 | 54.6    | 65.9    |
|  3 | base         | default         | joint         |   3e-05 |     73.6 | 55.7    | 65.7    |
|  4 | large        | duplicate       | joint         |   2e-05 |     74.2 | 58.7    | 66.6    |
|  5 | large        | duplicate       | joint         |   3e-05 |     73.7 | 60.3    | 66.6    |
|  6 | large        | default         | joint         |   2e-05 |     73.5 | 58.7    | 66.2    |
|  7 | large        | default         | joint         |   3e-05 |     75.3 | 56.0    | 66.6    |
|  8 | base         | duplicat

In [32]:
print(perf_df.to_csv()) 


,model_size,proc_strategy,focus_group,ft_lr,Entity,Event,Joint
0,base,duplicate,joint,2e-05,73.4,55.5,65.2
1,base,duplicate,joint,3e-05,72.6,56.1,65.1
2,base,default,joint,2e-05,75.1,54.6,65.9
3,base,default,joint,3e-05,73.6,55.7,65.7
4,large,duplicate,joint,2e-05,74.2,58.7,66.6
5,large,duplicate,joint,3e-05,73.7,60.3,66.6
6,large,default,joint,2e-05,73.5,58.7,66.2
7,large,default,joint,3e-05,75.3,56.0,66.6
8,base,duplicate,entity,2e-05,72.4,-,-
9,base,duplicate,entity,3e-05,73.1,-,-
10,base,default,entity,2e-05,72.7,-,-
11,base,default,entity,3e-05,74.7,-,-
12,large,duplicate,entity,2e-05,73.7,-,-
13,large,duplicate,entity,3e-05,75.3,-,-
14,large,default,entity,2e-05,73.9,-,-
15,large,default,entity,3e-05,75.6,-,-

