In [1]:
%cd ../results

/home/tk/repos/erc/results


In [2]:
from glob import glob
import os
import json
import numpy as np
import pprint

def parse_path(path):
    if 'val' in os.path.basename(path):
        SPLIT = 'val'
    elif 'test' in os.path.basename(path):
        SPLIT = 'test'
    else:
        raise ValueError
    splits = path.split('/')
    DATASET = splits[0]
    roberta = splits[1]
    seed_details = splits[3].split('-')
    
    timestamp = '-'.join(seed_details[:6])   
    
    speaker_mode = seed_details[7]
    assert speaker_mode in ['upper', 'None', 'title']
    num_past_utterances = int(seed_details[9])
    num_future_utterances = int(seed_details[11])
    bsz = int(seed_details[13])
    seed = int(seed_details[15])
    
    to_return = {'DATASET': DATASET, 
                 'SPLIT': SPLIT,  
                 'roberta': roberta, 
                 'speaker_mode': speaker_mode, 
                 'num_past_utterances': num_past_utterances, 
                 'num_future_utterances': num_future_utterances, 
                 'bsz': bsz, 
                 'seed': seed,
                'timestamp':timestamp} 
    return to_return

def read_json(path):
    with open(path, 'r') as stream:
        foo = json.load(stream)
    return foo

def merge_two_dicts(a, b):
    c = {}
    for key, val in a.items():
        c[key] = val
    for key, val in b.items():
        c[key] = val
        
    return c

def return_by_timestamp(seed_results):
    timestamps = set([foo['timestamp'] for foo in seed_results])
    seed_results = {timestamp: [foo for foo in seed_results if foo['timestamp'] == timestamp] for timestamp in timestamps}
        
    return seed_results

def return_metrics(seed_results):
    
    to_return = {}
    for timestamp, foo in seed_results.items():
        stats_val = [bar for bar in foo if bar['SPLIT'] == 'val']
        stats_test = [bar for bar in foo if bar['SPLIT'] == 'test']
        
        for metric in  ['DATASET', 'roberta', 'speaker_mode', 'num_past_utterances', 'num_future_utterances']:
            assert len(set([bar[metric] for bar in foo]))
            
        keyname = ', '.join([f"{metric}: {foo[0][metric]}" 
                             for metric in ['DATASET', 'roberta', 'speaker_mode', 'num_past_utterances', 'num_future_utterances']])
        
        keyname += f', {str(timestamp)}'
        stats_test_ = {}
        for metric in  ['test_loss', 'test_f1_weighted', 'test_f1_micro', 'test_f1_macro']:
            stats_test_[metric] = (
                np.mean([foo[metric] for foo in stats_test]), 
                np.std([foo[metric] for foo in stats_test]), 
                len([foo[metric] for foo in stats_test])
            )
            
        stats_val_ = {}
        for metric in  ['eval_loss', 'eval_f1_weighted', 'eval_f1_micro', 'eval_f1_macro']:
            stats_val_[metric] = (
                np.mean([foo[metric] for foo in stats_val]), 
                np.std([foo[metric] for foo in stats_val]), 
                len([foo[metric] for foo in stats_val])
            )
            
        to_return[keyname] = (stats_val_, stats_test_)
        
    to_return = {foo: to_return[foo] for foo in sorted(list(to_return.keys()))}
        
    return to_return        

def get_average(seed_results):
    timestamps = set(list(seed_results.keys()))

    to_return = {}
    for timestamp in timestamps:
        to_return[timestamp] = {key: (np.mean(list(val)), len(list(val))) for key, val in seed_results[timestamp].items()}
    
    return to_return

In [3]:
seed_results = glob('*/*/final/*/*-results.json')
seed_results = [foo for foo in seed_results if 'MELD' in foo or 'IEMOCAP' in foo]
seed_results = [(parse_path(foo), read_json(foo)) for foo in seed_results]
seed_results = [merge_two_dicts(foo[0], foo[1]) for foo in seed_results]
seed_results = return_by_timestamp(seed_results)
seed_results = return_metrics(seed_results)
for key, val in seed_results.items():
    print(key)
    pprint.pprint(val)
    print()

DATASET: IEMOCAP, roberta: roberta-large, speaker_mode: None, num_past_utterances: 0, num_future_utterances: 0, 2021-05-09-11-21-35
({'eval_f1_macro': (0.5522587758713577, 0.005700503695160694, 5),
  'eval_f1_micro': (0.5687755102040817, 0.006505052020616037, 5),
  'eval_f1_weighted': (0.575727996274017, 0.005460606873912623, 5),
  'eval_loss': (1.2397764921188354, 0.06936472873560774, 5)},
 {'test_f1_macro': (0.5378345315121387, 0.006522106517741987, 5),
  'test_f1_micro': (0.5617755856966707, 0.0073301425142795786, 5),
  'test_f1_weighted': (0.5587721595281857, 0.006840271384498524, 5),
  'test_loss': (1.2600319862365723, 0.03722152681838191, 5)})

DATASET: IEMOCAP, roberta: roberta-large, speaker_mode: None, num_past_utterances: 1000, num_future_utterances: 1000, 2021-05-17-22-02-20
({'eval_f1_macro': (0.5918548396452185, 0.013224703460383593, 5),
  'eval_f1_micro': (0.6218367346938776, 0.006848147647168361, 5),
  'eval_f1_weighted': (0.6177202952197651, 0.008984988114688916, 5),
  

In [4]:
seed_results = glob('*/*/final/*/*-results.json')
seed_results = [foo for foo in seed_results if 'MELD' in foo or 'IEMOCAP' in foo]
seed_results = [(parse_path(foo), read_json(foo)) for foo in seed_results]
seed_results = [merge_two_dicts(foo[0], foo[1]) for foo in seed_results]

In [5]:
def get_by_filter(seed_results, DATASET, SPLIT, num_past_utterances, num_future_utterances, speaker_mode):
    seed_results = [foo for foo in seed_results 
                    if foo['DATASET'] == DATASET \
                    and foo['SPLIT'] == SPLIT \
                    and foo['num_past_utterances'] == num_past_utterances \
                    and foo['num_future_utterances'] == num_future_utterances \
                    and foo['speaker_mode'] == speaker_mode]
    
    if SPLIT == 'test':
        seed_results = sorted(seed_results, key=lambda k: k['test_f1_weighted'])
    else:
        seed_results = sorted(seed_results, key=lambda k: k['eval_f1_weighted'])
    return seed_results

In [6]:
get_by_filter(seed_results, 'MELD', 'test', 0, 0, 'upper')[-1]

{'DATASET': 'MELD',
 'SPLIT': 'test',
 'roberta': 'roberta-large',
 'speaker_mode': 'upper',
 'num_past_utterances': 0,
 'num_future_utterances': 0,
 'bsz': 16,
 'seed': 3,
 'timestamp': '2021-05-09-09-46-30',
 'test_loss': 1.0628687143325806,
 'test_f1_weighted': 0.6427050027040432,
 'test_f1_micro': 0.6540229885057471,
 'test_f1_macro': 0.48005284601944886,
 'test_runtime': 5.8908,
 'test_samples_per_second': 443.067}

In [7]:
get_by_filter(seed_results, 'IEMOCAP', 'test', 0, 0, 'upper')[-1]

{'DATASET': 'IEMOCAP',
 'SPLIT': 'test',
 'roberta': 'roberta-large',
 'speaker_mode': 'upper',
 'num_past_utterances': 0,
 'num_future_utterances': 0,
 'bsz': 16,
 'seed': 0,
 'timestamp': '2021-05-09-11-49-51',
 'test_loss': 1.3592785596847534,
 'test_f1_weighted': 0.5717553437280344,
 'test_f1_micro': 0.5752157829839704,
 'test_f1_macro': 0.5521338637986823,
 'test_runtime': 5.8949,
 'test_samples_per_second': 275.151}

In [8]:
get_by_filter(seed_results, 'MELD', 'test', 1000, 0, 'upper')[-1]

{'DATASET': 'MELD',
 'SPLIT': 'test',
 'roberta': 'roberta-large',
 'speaker_mode': 'upper',
 'num_past_utterances': 1000,
 'num_future_utterances': 0,
 'bsz': 4,
 'seed': 4,
 'timestamp': '2021-05-09-18-15-01',
 'test_loss': 1.3221737146377563,
 'test_f1_weighted': 0.6557133000420554,
 'test_f1_micro': 0.6647509578544061,
 'test_f1_macro': 0.5012291200656293,
 'test_runtime': 28.7962,
 'test_samples_per_second': 90.637}

In [9]:
get_by_filter(seed_results, 'IEMOCAP', 'test', 1000, 0, 'upper')[-1]

{'DATASET': 'IEMOCAP',
 'SPLIT': 'test',
 'roberta': 'roberta-large',
 'speaker_mode': 'upper',
 'num_past_utterances': 1000,
 'num_future_utterances': 0,
 'bsz': 4,
 'seed': 4,
 'timestamp': '2021-05-09-12-19-54',
 'test_loss': 1.578665018081665,
 'test_f1_weighted': 0.7119324785334752,
 'test_f1_micro': 0.7120838471023428,
 'test_f1_macro': 0.6983733181480981,
 'test_runtime': 44.1226,
 'test_samples_per_second': 36.761}

In [10]:
get_by_filter(seed_results, 'MELD', 'test', 0, 1000, 'upper')[-1]

{'DATASET': 'MELD',
 'SPLIT': 'test',
 'roberta': 'roberta-large',
 'speaker_mode': 'upper',
 'num_past_utterances': 0,
 'num_future_utterances': 1000,
 'bsz': 4,
 'seed': 2,
 'timestamp': '2021-05-10-09-49-06',
 'test_loss': 1.064085841178894,
 'test_f1_weighted': 0.6466501132599567,
 'test_f1_micro': 0.6628352490421456,
 'test_f1_macro': 0.46634103620119893,
 'test_runtime': 28.7181,
 'test_samples_per_second': 90.883}

In [11]:
get_by_filter(seed_results, 'IEMOCAP', 'test', 0, 1000, 'upper')[-1]

{'DATASET': 'IEMOCAP',
 'SPLIT': 'test',
 'roberta': 'roberta-large',
 'speaker_mode': 'upper',
 'num_past_utterances': 0,
 'num_future_utterances': 1000,
 'bsz': 4,
 'seed': 1,
 'timestamp': '2021-05-09-15-16-25',
 'test_loss': 1.7993016242980957,
 'test_f1_weighted': 0.673464603187326,
 'test_f1_micro': 0.6738594327990136,
 'test_f1_macro': 0.6561482679271721,
 'test_runtime': 43.9186,
 'test_samples_per_second': 36.932}

In [12]:
get_by_filter(seed_results, 'MELD', 'test', 1000, 1000, 'upper')[-1]

{'DATASET': 'MELD',
 'SPLIT': 'test',
 'roberta': 'roberta-large',
 'speaker_mode': 'upper',
 'num_past_utterances': 1000,
 'num_future_utterances': 1000,
 'bsz': 4,
 'seed': 4,
 'timestamp': '2021-05-08-19-57-31',
 'test_loss': 1.4145874977111816,
 'test_f1_weighted': 0.6622864224519749,
 'test_f1_micro': 0.6674329501915709,
 'test_f1_macro': 0.49737579178046865,
 'test_runtime': 37.6047,
 'test_samples_per_second': 69.406}

In [13]:
get_by_filter(seed_results, 'IEMOCAP', 'test', 1000, 1000, 'upper')[-1]

{'DATASET': 'IEMOCAP',
 'SPLIT': 'test',
 'roberta': 'roberta-large',
 'speaker_mode': 'upper',
 'num_past_utterances': 1000,
 'num_future_utterances': 1000,
 'bsz': 4,
 'seed': 2,
 'timestamp': '2021-05-08-16-15-19',
 'test_loss': 0.9692837595939636,
 'test_f1_weighted': 0.6920397733913108,
 'test_f1_micro': 0.6911220715166461,
 'test_f1_macro': 0.6808140109741574,
 'test_runtime': 47.1837,
 'test_samples_per_second': 34.376}

In [14]:
get_by_filter(seed_results, 'MELD', 'test', 1000, 1000, 'None')[-1]

{'DATASET': 'MELD',
 'SPLIT': 'test',
 'roberta': 'roberta-large',
 'speaker_mode': 'None',
 'num_past_utterances': 1000,
 'num_future_utterances': 1000,
 'bsz': 4,
 'seed': 4,
 'timestamp': '2021-05-17-18-24-48',
 'test_loss': 1.2838237285614014,
 'test_f1_weighted': 0.6642043485642859,
 'test_f1_micro': 0.6739463601532567,
 'test_f1_macro': 0.501331044903872,
 'test_runtime': 29.0112,
 'test_samples_per_second': 89.965}

In [15]:
get_by_filter(seed_results, 'IEMOCAP', 'test', 1000, 1000, 'None')[-1]

{'DATASET': 'IEMOCAP',
 'SPLIT': 'test',
 'roberta': 'roberta-large',
 'speaker_mode': 'None',
 'num_past_utterances': 1000,
 'num_future_utterances': 1000,
 'bsz': 4,
 'seed': 4,
 'timestamp': '2021-05-17-22-02-20',
 'test_loss': 0.951850950717926,
 'test_f1_weighted': 0.6565862745691114,
 'test_f1_micro': 0.655980271270037,
 'test_f1_macro': 0.6398377711685964,
 'test_runtime': 46.2573,
 'test_samples_per_second': 35.065}