In [17]:
import os
from os import path
import glob
import pandas as pd
import json
from collections import defaultdict

In [18]:
perf_dir = "/share/data/speech/shtoshni/research/events/models/perf/"
slurm_id = "5704576"

files = sorted(glob.glob(path.join(perf_dir, slurm_id + "*")), key=lambda x: int(path.splitext(path.basename(x))[0].split('_')[1]))
print(len(files))

20


In [19]:
model_dict_list = []
for file in files:
    model_dict_list.append(json.loads(open(file).read()))

In [20]:
def determine_varying_attributes(model_dict_list, ignore_attribs=['model_dir', 'train', 'test', 'dev', 
                                                                  'slurm_id', 'best_model_dir']):
    attrib_to_vals = defaultdict(set)
    for model_dict in model_dict_list:
        for attrib, val in model_dict.items():
            if attrib in ignore_attribs:
                continue
            else:
                attrib_to_vals[attrib].add(val)
    
    
    varying_attribs = []
    for attrib, vals in attrib_to_vals.items():
        if len(vals) > 1:
            varying_attribs.append(attrib)
            
    return varying_attribs

In [21]:
varying_attribs = determine_varying_attributes(model_dict_list)
perf_attribs = ['Entity', 'Event', 'Joint']

In [22]:
perf_df = pd.DataFrame(columns=(varying_attribs + perf_attribs))

for model_dict in model_dict_list:
    perf_dict = {}
    for attrib in varying_attribs:
        perf_dict[attrib] = model_dict[attrib]
        
    
    for perf_attrib in perf_attribs:
        if perf_attrib.lower() in model_dict['dev']:
            perf_dict[perf_attrib] = round(model_dict['dev'][perf_attrib.lower()]['fscore'], 1)
#             print(model_dict['dev'][perf_attrib.lower()]['fscore'])
#             print(model_dict['dev'][perf_attrib.lower()])
        else:
            perf_dict[perf_attrib] = '-'
                
    perf_df = perf_df.append(perf_dict, ignore_index=True)

In [23]:
print(perf_df.to_markdown()) 


|    | model_size   | focus_group   |   mlp_size | use_srl   | Entity   | Event   | Joint   |
|---:|:-------------|:--------------|-----------:|:----------|:---------|:--------|:--------|
|  0 | base         | joint         |       1024 |           | 73.4     | 54.7    | 65.5    |
|  1 | base         | joint         |       1024 | event     | 73.9     | 53.4    | 64.9    |
|  2 | base         | joint         |       1024 | joint     | 72.3     | 54.4    | 64.0    |
|  3 | base         | joint         |       2048 |           | 74.0     | 54.0    | 65.0    |
|  4 | base         | joint         |       2048 | event     | 73.2     | 56.6    | 65.4    |
|  5 | base         | joint         |       2048 | joint     | 71.7     | 56.4    | 64.6    |
|  6 | large        | joint         |       1024 |           | 74.4     | 52.5    | 65.3    |
|  7 | large        | joint         |       1024 | event     | 74.5     | 56.0    | 66.2    |
|  8 | large        | joint         |       1024 | joint    

In [24]:
print(perf_df.to_csv()) 


,model_size,focus_group,mlp_size,use_srl,Entity,Event,Joint
0,base,joint,1024,,73.4,54.7,65.5
1,base,joint,1024,event,73.9,53.4,64.9
2,base,joint,1024,joint,72.3,54.4,64.0
3,base,joint,2048,,74.0,54.0,65.0
4,base,joint,2048,event,73.2,56.6,65.4
5,base,joint,2048,joint,71.7,56.4,64.6
6,large,joint,1024,,74.4,52.5,65.3
7,large,joint,1024,event,74.5,56.0,66.2
8,large,joint,1024,joint,72.4,54.6,64.2
9,large,joint,2048,,74.3,55.5,66.0
10,large,joint,2048,event,72.2,56.6,64.9
11,large,joint,2048,joint,72.9,56.9,65.9
12,base,entity,1024,,73.6,-,-
13,base,entity,2048,,74.2,-,-
14,large,entity,1024,,72.3,-,-
15,large,entity,2048,,72.7,-,-
16,base,event,1024,,-,50.6,-
17,base,event,2048,,-,51.8,-
18,large,event,1024,,-,53.7,-
19,large,event,2048,,-,55.3,-

