In [7]:
from glob import glob
import os
import json
import numpy as np
import pprint

def parse_path(path):
    if 'val' in os.path.basename(path):
        SPLIT = 'val'
    elif 'test' in os.path.basename(path):
        SPLIT = 'test'
    else:
        raise ValueError
    splits = path.split('/')
    DATASET = splits[0]
    roberta = splits[1]
    seed_details = splits[3].split('-')
    
    timestamp = '-'.join(seed_details[:6])   
    
    speaker_mode = seed_details[7]
    assert speaker_mode in ['upper', 'None', 'title']
    num_past_utterances = int(seed_details[9])
    num_future_utterances = int(seed_details[11])
    bsz = int(seed_details[13])
    seed = int(seed_details[15])
    
    to_return = {'DATASET': DATASET, 
                 'SPLIT': SPLIT,  
                 'roberta': roberta, 
                 'speaker_mode': speaker_mode, 
                 'num_past_utterances': num_past_utterances, 
                 'num_future_utterances': num_future_utterances, 
                 'bsz': bsz, 
                 'seed': seed,
                'timestamp':timestamp} 
    return to_return

def read_json(path):
    with open(path, 'r') as stream:
        foo = json.load(stream)
    return foo

def merge_two_dicts(a, b):
    c = {}
    for key, val in a.items():
        c[key] = val
    for key, val in b.items():
        c[key] = val
        
    return c

def return_by_timestamp(seed_results):
    timestamps = set([foo['timestamp'] for foo in seed_results])
    seed_results = {timestamp: [foo for foo in seed_results if foo['timestamp'] == timestamp] for timestamp in timestamps}
        
    return seed_results

def return_metrics(seed_results):
    
    to_return = {}
    for timestamp, foo in seed_results.items():
        stats_val = [bar for bar in foo if bar['SPLIT'] == 'val']
        stats_test = [bar for bar in foo if bar['SPLIT'] == 'test']
        
        for metric in  ['DATASET', 'roberta', 'speaker_mode', 'num_past_utterances', 'num_future_utterances']:
            assert len(set([bar[metric] for bar in foo]))
            
        keyname = ', '.join([f"{metric}: {foo[0][metric]}" 
                             for metric in ['DATASET', 'roberta', 'speaker_mode', 'num_past_utterances', 'num_future_utterances']])
        
        keyname += f', {str(timestamp)}'
        stats_test_ = {}
        for metric in  ['test_loss', 'test_f1_weighted', 'test_f1_micro', 'test_f1_macro']:
            stats_test_[metric] = (np.mean([foo[metric] for foo in stats_test]), len([foo[metric] for foo in stats_test]))
            
        stats_val_ = {}
        for metric in  ['eval_loss', 'eval_f1_weighted', 'eval_f1_micro', 'eval_f1_macro']:
            stats_val_[metric] = (np.mean([foo[metric] for foo in stats_val]), len([foo[metric] for foo in stats_val]))
            
        to_return[keyname] = (stats_val_, stats_test_)
        
    to_return = {foo: to_return[foo] for foo in sorted(list(to_return.keys()))}
        
    return to_return        

def get_average(seed_results):
    timestamps = set(list(seed_results.keys()))

    to_return = {}
    for timestamp in timestamps:
        to_return[timestamp] = {key: (np.mean(list(val)), len(list(val))) for key, val in seed_results[timestamp].items()}
    
    return to_return

In [13]:
seed_results = glob('*/*/SEEDS/*/*-results.json')
seed_results = [foo for foo in seed_results if 'MELD' in foo or 'IEMOCAP' in foo]
seed_results = [(parse_path(foo), read_json(foo)) for foo in seed_results]
seed_results = [merge_two_dicts(foo[0], foo[1]) for foo in seed_results]
seed_results = return_by_timestamp(seed_results)
seed_results = return_metrics(seed_results)
for key, val in seed_results.items():
    print(key)
    pprint.pprint(val)
    print()

DATASET: IEMOCAP, roberta: roberta-large, speaker_mode: None, num_past_utterances: 0, num_future_utterances: 0, 2021-05-09-11-21-35
({'eval_f1_macro': (0.5522587758713577, 5),
  'eval_f1_micro': (0.5687755102040817, 5),
  'eval_f1_weighted': (0.575727996274017, 5),
  'eval_loss': (1.2397764921188354, 5)},
 {'test_f1_macro': (0.5378345315121387, 5),
  'test_f1_micro': (0.5617755856966707, 5),
  'test_f1_weighted': (0.5587721595281857, 5),
  'test_loss': (1.2600319862365723, 5)})

DATASET: IEMOCAP, roberta: roberta-large, speaker_mode: upper, num_past_utterances: 0, num_future_utterances: 0, 2021-05-09-11-49-51
({'eval_f1_macro': (0.5269594714657198, 5),
  'eval_f1_micro': (0.5724489795918368, 5),
  'eval_f1_weighted': (0.564236788122536, 5),
  'eval_loss': (1.2878952741622924, 5)},
 {'test_f1_macro': (0.5431934834368952, 5),
  'test_f1_micro': (0.5637484586929716, 5),
  'test_f1_weighted': (0.560872794884953, 5),
  'test_loss': (1.3399529695510863, 5)})

DATASET: IEMOCAP, roberta: rober