In [111]:
import os
from os import path
import glob
import pandas as pd
import json
from collections import defaultdict

In [112]:
perf_dir = "/share/data/speech/shtoshni/research/litbank_coref/models/perf/"
files = []

slurm_ids = ["6108893", "6108890"]

for slurm_id in slurm_ids:
    files.extend(sorted(glob.glob(path.join(perf_dir, slurm_id + "*")), 
                        key=lambda x: int(path.splitext(path.basename(x))[0].split('_')[1])))


print(len(files))

12


In [113]:
model_dict_list = []
for file in files:
    model_dict_list.append(json.loads(open(file).read()))

In [123]:
def determine_varying_attributes(model_dict_list, ignore_attribs=['train', 'test', 'dev', 
                                                                  'slurm_id', 'best_model_dir', 'data_dir']):
    attrib_to_vals = defaultdict(set)
    for model_dict in model_dict_list:
        for attrib, val in model_dict.items():
            if attrib in ignore_attribs:
                continue
            else:
                attrib_to_vals[attrib].add(val)
    
    
    varying_attribs = []
    for attrib, vals in attrib_to_vals.items():
        if len(vals) > 1:
            varying_attribs.append(attrib)
            
    return varying_attribs

In [124]:
varying_attribs = determine_varying_attributes(model_dict_list)
perf_attribs = ['MUC' , 'Bcub', 'CEAFE']
print(varying_attribs)

['model_dir', 'sample_invalid', 'label_smoothing_wt']


In [125]:
perf_df = pd.DataFrame(columns=(varying_attribs +  ['devf'] + ['testf'] + perf_attribs))

for model_dict in model_dict_list:
    perf_dict = {}
    for attrib in varying_attribs:
        perf_dict[attrib] = model_dict[attrib]
        
    
    for perf_attrib in perf_attribs:
        if perf_attrib in model_dict['test']:
            attrib_dict = model_dict['test'][perf_attrib]
            perf_dict[perf_attrib] = (attrib_dict['recall'], attrib_dict['precision'], attrib_dict['fscore'])
        else:
            perf_dict[perf_attrib] = '-'
                
    perf_dict['devf'] = model_dict['dev']['fscore']
    perf_dict['testf'] = model_dict['test']['fscore']
    perf_df = perf_df.append(perf_dict, ignore_index=True)

In [126]:
perf_df = perf_df.rename(columns={"label_smoothing_wt": "ls_wt", "sample_invalid": "samp"})
perf_df

Unnamed: 0,model_dir,samp,ls_wt,devf,testf,MUC,Bcub,CEAFE
0,/share/data/speech/shtoshni/research/litbank_c...,0.25,0.1,78.7,78.2,"(84.8, 84.1, 84.5)","(77.2, 76.3, 76.8)","(73.1, 73.8, 73.4)"
1,/share/data/speech/shtoshni/research/litbank_c...,0.25,0.01,79.0,79.0,"(86.4, 83.1, 84.7)","(80.0, 75.3, 77.5)","(76.6, 72.7, 74.6)"
2,/share/data/speech/shtoshni/research/litbank_c...,0.25,0.0,78.2,77.9,"(86.9, 81.5, 84.1)","(80.1, 73.3, 76.5)","(77.4, 69.5, 73.2)"
3,/share/data/speech/shtoshni/research/litbank_c...,0.5,0.1,78.7,78.3,"(84.7, 84.5, 84.6)","(77.4, 76.3, 76.8)","(71.9, 75.2, 73.5)"
4,/share/data/speech/shtoshni/research/litbank_c...,0.5,0.01,79.5,79.3,"(85.8, 84.5, 85.1)","(78.7, 76.9, 77.8)","(75.3, 74.8, 75.1)"
5,/share/data/speech/shtoshni/research/litbank_c...,0.5,0.0,79.5,79.3,"(86.5, 83.7, 85.1)","(79.9, 75.8, 77.8)","(76.5, 73.5, 75.0)"
6,/share/data/speech/shtoshni/research/litbank_c...,0.75,0.1,78.7,78.1,"(83.9, 85.3, 84.6)","(75.9, 77.3, 76.6)","(70.9, 75.7, 73.2)"
7,/share/data/speech/shtoshni/research/litbank_c...,0.75,0.01,79.6,79.6,"(85.5, 85.1, 85.3)","(78.7, 77.3, 78.0)","(74.2, 76.5, 75.3)"
8,/share/data/speech/shtoshni/research/litbank_c...,0.75,0.0,79.5,79.4,"(86.2, 84.0, 85.1)","(79.4, 76.0, 77.7)","(75.6, 75.0, 75.3)"
9,/share/data/speech/shtoshni/research/litbank_c...,1.0,0.1,78.0,78.0,"(82.8, 86.4, 84.5)","(74.5, 79.0, 76.7)","(69.7, 76.6, 73.0)"


In [127]:
idx = perf_df['devf'].idxmax()
perf_df.iloc[idx]

model_dir    /share/data/speech/shtoshni/research/litbank_c...
samp                                                      0.75
ls_wt                                                     0.01
devf                                                      79.6
testf                                                     79.6
MUC                                         (85.5, 85.1, 85.3)
Bcub                                        (78.7, 77.3, 78.0)
CEAFE                                       (74.2, 76.5, 75.3)
Name: 7, dtype: object

In [128]:
perf_df.iloc[idx]['model_dir']

'/share/data/speech/shtoshni/research/litbank_coref/models/coref_ee8cfc9648968abf06ff64cd4c7fa4a8'