# Full Experiment Results Optimization Results

To effectively decide which parameters to use on our models, we have to evaluate the HPO results on the GoEmotions dataset.

In [1]:
import glob
import json
import pandas as pd

results = []
paths = glob.glob('../output/*/*/*.json', recursive=True)
for path in paths:
    with open(path) as fp:
        result_dict = json.load(fp)
        result_tuple = (path, result_dict)
        results.append(result_tuple)

results_df = pd.DataFrame(results, columns=['Path', 'Dict'])

In [2]:
EXPERIMENT_METRICS = ['macro_f1', 'micro_f1']
EXPERIMENT_COLUMNS = ['Dataset', 'ModelType', 'Experiment']
INTERNAL_METRICS = ['precision', 'recall']
WEIGHT_FIELD = 'support'


def parse_path(path):
    dataset, model_type, experiment = path.split('/')[-3:]
    experiment = experiment.split('.')[0]
    return dataset, model_type, experiment


def build_experiment_columns(dataframe):
    path_parts = [parse_path(path) for path in dataframe.Path]
    for i, col in enumerate(EXPERIMENT_COLUMNS):
        dataframe[col] = [parts[i] for parts in path_parts]
    return dataframe


def extract_macro_stats(dataframe, metrics=EXPERIMENT_METRICS):
    try:
        split_names = dataframe['Dict'][0]['config']['data_config']['split_names']
    except IndexError:
        split_names = ['train', 'valid', 'test']
    for split in split_names:
        for metric in metrics:
            new_col_name = '{}_{}'.format(split, metric)
            dataframe[new_col_name] = [exp_dict['results'][split][metric] for exp_dict in dataframe['Dict']]
    return dataframe


def extract_category_stats(dataframe, metrics=INTERNAL_METRICS, weight_field=WEIGHT_FIELD):
    try:
        split_names = dataframe['Dict'][0]['config']['data_config']['split_names']
    except IndexError:
        split_names = ['train', 'valid', 'test']
    for split in split_names:
        for metric in metrics:
            new_col_name = '{}_{}'.format(split, metric)
            metric_dict = []
            for exp_dict in dataframe['Dict']:
                label_values = list(exp_dict['results'][split]['labels'].values())
                global_weight = sum(label_results[weight_field] for label_results in label_values)
                metric_score = sum(label_results[weight_field] * label_results[metric] / global_weight
                                   for label_results in label_values)
                metric_dict.append(metric_score)
            dataframe[new_col_name] = metric_dict
    return dataframe


def extract_model_details(dataframe):
    dataframe['Extractor'] = [exp_dict['config']['extractor_config']['ex_type'] for exp_dict in dataframe['Dict']]
    dataframe['Model'] = [exp_dict['config']['model_config']['model_name'] for exp_dict in dataframe['Dict']]
    return dataframe


def parse_df(dataframe):
    dataframe = build_experiment_columns(dataframe)
    dataframe = extract_model_details(dataframe)
    dataframe = extract_macro_stats(dataframe)
    dataframe = extract_category_stats(dataframe)
    return dataframe


In [3]:
parsed_df = parse_df(results_df)

parsed_df.sort_values('valid_macro_f1')

Unnamed: 0,Path,Dict,Dataset,ModelType,Experiment,Extractor,Model,train_macro_f1,train_micro_f1,valid_macro_f1,valid_micro_f1,test_macro_f1,test_micro_f1,train_precision,train_recall,valid_precision,valid_recall,test_precision,test_recall
536,../output/Vent/neural/52ce66a7a5c985ed9d0d9368...,{'config': {'data_config': {'raw_path': 'prepr...,Vent,neural,52ce66a7a5c985ed9d0d9368b83743dd,bert,dnnpool,0.017744,0.029671,0.015231,0.027161,0.015436,0.028582,0.017805,0.511462,0.021659,0.435901,0.016890,0.412615
482,../output/Vent/neural/3d433974ce46ca09abbf59ef...,{'config': {'data_config': {'raw_path': 'prepr...,Vent,neural,3d433974ce46ca09abbf59ef50b23c04,fasttext,dnnpool,0.020536,0.031562,0.017813,0.027809,0.016795,0.027072,0.020763,0.592551,0.022148,0.380454,0.021086,0.247270
489,../output/Vent/neural/cd0b01d095b96f8a7b3f8935...,{'config': {'data_config': {'raw_path': 'prepr...,Vent,neural,cd0b01d095b96f8a7b3f8935c3b125ec,fasttext,dnnpool,0.020063,0.028314,0.017923,0.026142,0.016516,0.026958,0.020251,0.564646,0.019656,0.373224,0.021301,0.231763
502,../output/Vent/neural/1f1c44af1b857830da5e3e72...,{'config': {'data_config': {'raw_path': 'prepr...,Vent,neural,1f1c44af1b857830da5e3e7283e5b698,fasttext,dnnpool,0.020919,0.031359,0.018866,0.028082,0.017998,0.029292,0.021351,0.621047,0.020987,0.348442,0.021385,0.249713
422,../output/Vent/neural/0036bc2fddce4ef1dc6bfc9c...,{'config': {'data_config': {'raw_path': 'prepr...,Vent,neural,0036bc2fddce4ef1dc6bfc9cf84beeb1,fasttext,dnnpool,0.021072,0.030897,0.018890,0.029476,0.017877,0.029394,0.020822,0.630147,0.022004,0.345849,0.022280,0.237447
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1766,../output/GoEmotions/neural/0ae5faa3b9dd41390d...,{'config': {'data_config': {'raw_path': 'prepr...,GoEmotions,neural,0ae5faa3b9dd41390d57d7e006254968,bert,dnnpool,0.593322,0.634571,0.515087,0.559888,0.499480,0.556102,0.609850,0.668141,0.510231,0.642633,0.519813,0.621899
4532,../output/GoEmotions/neural/dd9f3603b2210ed1f5...,{'config': {'data_config': {'raw_path': 'prepr...,GoEmotions,neural,dd9f3603b2210ed1f58e963a99102c59,bert,dnnpool,0.556554,0.597283,0.515675,0.558934,0.510514,0.555689,0.553376,0.662760,0.514209,0.646395,0.515199,0.634065
3525,../output/GoEmotions/neural/7e372907c3f585085f...,{'config': {'data_config': {'raw_path': 'prepr...,GoEmotions,neural,7e372907c3f585085f6d037c3849ca63,bert,dnnpool,0.556554,0.597283,0.515675,0.558934,0.510514,0.555689,0.553376,0.662760,0.514209,0.646395,0.515199,0.634065
2308,../output/GoEmotions/neural/f7cacae6393f216913...,{'config': {'data_config': {'raw_path': 'prepr...,GoEmotions,neural,f7cacae6393f2169131c6f7c394e5403,bert,dnnpool,0.564260,0.612822,0.516185,0.562844,0.498440,0.559146,0.580436,0.661801,0.513887,0.652508,0.522387,0.622689


In [4]:
results_df = parsed_df
results_df['DataSource'] = [val['config']['data_config']['cache_path'].split('/')[-2] for val in results_df.Dict]
results_df['Seed'] = [val['config']['seed'] for val in results_df.Dict]
results_df = results_df[results_df.Seed < 5]

final_df = results_df[['Dict', 'Seed', 'DataSource', 'Dataset', 'ModelType', 'Extractor', 'Model', 'test_macro_f1', 'test_micro_f1', 'test_precision', 'test_recall']].round(2)
final_df = final_df.sort_values(['DataSource', 'Extractor', 'Model', 'Seed'])
final_df

Unnamed: 0,Dict,Seed,DataSource,Dataset,ModelType,Extractor,Model,test_macro_f1,test_micro_f1,test_precision,test_recall
1206,{'config': {'data_config': {'raw_path': 'prepr...,0,GoEmotions-split-cache,GoEmotions,replica,bert,dnnpool,0.48,0.55,0.52,0.61
1187,{'config': {'data_config': {'raw_path': 'prepr...,1,GoEmotions-split-cache,GoEmotions,replica,bert,dnnpool,0.49,0.56,0.51,0.63
1205,{'config': {'data_config': {'raw_path': 'prepr...,2,GoEmotions-split-cache,GoEmotions,replica,bert,dnnpool,0.49,0.55,0.52,0.60
1221,{'config': {'data_config': {'raw_path': 'prepr...,3,GoEmotions-split-cache,GoEmotions,replica,bert,dnnpool,0.48,0.55,0.52,0.61
1173,{'config': {'data_config': {'raw_path': 'prepr...,4,GoEmotions-split-cache,GoEmotions,replica,bert,dnnpool,0.48,0.55,0.52,0.61
...,...,...,...,...,...,...,...,...,...,...,...
332,{'config': {'data_config': {'raw_path': 'prepr...,0,vent-split-robust-random-cache,Vent,replica-full-random,tfidf,sgd,0.14,0.16,0.14,0.20
328,{'config': {'data_config': {'raw_path': 'prepr...,1,vent-split-robust-random-cache,Vent,replica-full-random,tfidf,sgd,0.14,0.16,0.14,0.20
299,{'config': {'data_config': {'raw_path': 'prepr...,2,vent-split-robust-random-cache,Vent,replica-full-random,tfidf,sgd,0.14,0.16,0.14,0.20
296,{'config': {'data_config': {'raw_path': 'prepr...,3,vent-split-robust-random-cache,Vent,replica-full-random,tfidf,sgd,0.14,0.16,0.14,0.20


In [5]:
final_df.drop(['Seed'], axis='columns').groupby(['DataSource', 'Extractor', 'Model']).agg(['mean', 'std']).round(2).reset_index()

Unnamed: 0_level_0,DataSource,Extractor,Model,test_macro_f1,test_macro_f1,test_micro_f1,test_micro_f1,test_precision,test_precision,test_recall,test_recall
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,mean,std,mean,std,mean,std,mean,std
0,GoEmotions-split-cache,bert,dnnpool,0.48,0.01,0.55,0.0,0.52,0.0,0.61,0.01
1,GoEmotions-split-cache,bert,lstm,0.47,0.01,0.57,0.01,0.53,0.01,0.62,0.01
2,GoEmotions-split-cache,bow,naivebayes,0.34,0.0,0.46,0.0,0.43,0.0,0.52,0.0
3,GoEmotions-split-cache,bow,rf,0.45,0.01,0.52,0.0,0.5,0.0,0.59,0.0
4,GoEmotions-split-cache,bow,sgd,0.45,0.0,0.53,0.0,0.48,0.0,0.61,0.0
5,GoEmotions-split-cache,fasttext,dnnpool,0.42,0.0,0.49,0.0,0.45,0.01,0.61,0.01
6,GoEmotions-split-cache,fasttext,lstm,0.44,0.01,0.54,0.0,0.51,0.0,0.58,0.01
7,GoEmotions-split-cache,tfidf,naivebayes,0.32,0.0,0.43,0.0,0.42,0.0,0.48,0.0
8,GoEmotions-split-cache,tfidf,rf,0.45,0.01,0.52,0.0,0.48,0.0,0.6,0.0
9,GoEmotions-split-cache,tfidf,sgd,0.45,0.01,0.53,0.0,0.49,0.0,0.59,0.0


In [6]:
import sys
import copy
import json
sys.path.append('../src')
from config import ExperimentConfig

filter_condition = ((final_df.DataSource == 'GoEmotions-split-cache') | \
                    (final_df.DataSource == 'vent-split-robust-cache')) & \
                    (((final_df.Extractor == 'bert') & (final_df.Model == 'lstm')) | \
                    ((final_df.Extractor == 'tfidf') & (final_df.Model == 'sgd')))
filter_condition = filter_condition & (final_df.Seed == 0)

for i, row in final_df[filter_condition].iterrows():
    exp_config = row.Dict['config']
    
    # Extract the relevant fields
    dataset = row.Dataset
    seed = exp_config['seed']
    extractor = row.Extractor
    model = row.Model
    
    # Create the config with a logical output path
    exp_config['model_path'] = f'models/{dataset}/'
    
    # Build the experiment object
    exp_cfg = ExperimentConfig.from_dict(exp_config)
    as_json = json.dumps(exp_cfg._as_flat_dict(), indent=2)
    exp_hash = exp_cfg.hash()

    # Save as json
    model_type = 'classic' if extractor == 'tfidf' else 'neural'
    save_path = f'../configs/{dataset}/models/{model_type}/{exp_hash}.json' 
    with open(save_path, 'w') as f:
        f.write(as_json)
