# Hyper-parameter Optimization Results

To effectively decide which parameters to use on our models, we have to evaluate the HPO results on the GoEmotions dataset.

In [1]:
import glob
import json
import pandas as pd

results = []
paths = glob.glob('../output/*/*/*.json', recursive=True)
for path in paths:
    with open(path) as fp:
        result_dict = json.load(fp)
        result_tuple = (path, result_dict)
        results.append(result_tuple)

results_df = pd.DataFrame(results, columns=['Path', 'Dict'])

In [2]:
EXPERIMENT_METRICS = ['macro_f1', 'micro_f1']
EXPERIMENT_COLUMNS = ['Dataset', 'ModelType', 'Experiment']
INTERNAL_METRICS = ['precision', 'recall']
WEIGHT_FIELD = 'support'


def parse_path(path):
    dataset, model_type, experiment = path.split('/')[-3:]
    experiment = experiment.split('.')[0]
    return dataset, model_type, experiment


def build_experiment_columns(dataframe):
    path_parts = [parse_path(path) for path in dataframe.Path]
    for i, col in enumerate(EXPERIMENT_COLUMNS):
        dataframe[col] = [parts[i] for parts in path_parts]
    return dataframe


def extract_macro_stats(dataframe, metrics=EXPERIMENT_METRICS):
    split_names = dataframe['Dict'][0]['config']['data_config']['split_names']
    for split in split_names:
        for metric in metrics:
            new_col_name = '{}_{}'.format(split, metric)
            dataframe[new_col_name] = [exp_dict['results'][split][metric] for exp_dict in dataframe['Dict']]
    return dataframe


def extract_category_stats(dataframe, metrics=INTERNAL_METRICS, weight_field=WEIGHT_FIELD):
    split_names = dataframe['Dict'][0]['config']['data_config']['split_names']
    for split in split_names:
        for metric in metrics:
            new_col_name = '{}_{}'.format(split, metric)
            metric_dict = []
            for exp_dict in dataframe['Dict']:
                label_values = list(exp_dict['results'][split]['labels'].values())
                global_weight = sum(label_results[weight_field] for label_results in label_values)
                metric_score = sum(label_results[weight_field] * label_results[metric] / global_weight
                                   for label_results in label_values)
                metric_dict.append(metric_score)
            dataframe[new_col_name] = metric_dict
    return dataframe


def extract_model_details(dataframe):
    dataframe['Extractor'] = [exp_dict['config']['extractor_config']['ex_type'] for exp_dict in dataframe['Dict']]
    dataframe['Model'] = [exp_dict['config']['model_config']['model_name'] for exp_dict in dataframe['Dict']]
    return dataframe


def parse_df(dataframe):
    dataframe = build_experiment_columns(dataframe)
    dataframe = extract_model_details(dataframe)
    dataframe = extract_macro_stats(dataframe)
    dataframe = extract_category_stats(dataframe)
    return dataframe


In [3]:
parsed_df = parse_df(results_df)

parsed_df.sort_values('valid_macro_f1')

Unnamed: 0,Path,Dict,Dataset,ModelType,Experiment,Extractor,Model,train_macro_f1,train_micro_f1,valid_macro_f1,valid_micro_f1,test_macro_f1,test_micro_f1,train_precision,train_recall,valid_precision,valid_recall,test_precision,test_recall
238,../output/GoEmotions/classic/98f6bca9a4ab65148...,{'config': {'data_config': {'raw_path': 'prepr...,GoEmotions,classic,98f6bca9a4ab651482d7e1b22f78f86b,tfidf,sgd,0.086467,0.237511,0.086287,0.237473,0.087133,0.243009,0.203933,0.911316,0.207818,0.905956,0.234923,0.879444
91,../output/GoEmotions/classic/a93108cca090c2dfb...,{'config': {'data_config': {'raw_path': 'prepr...,GoEmotions,classic,a93108cca090c2dfbbbc383d76859d23,tfidf,sgd,0.087561,0.239064,0.087443,0.238925,0.088382,0.244441,0.239599,0.878285,0.225533,0.888558,0.221263,0.887818
203,../output/GoEmotions/classic/bc505ed7a45b78401...,{'config': {'data_config': {'raw_path': 'prepr...,GoEmotions,classic,bc505ed7a45b78401005670115de7804,tfidf,sgd,0.087547,0.219247,0.087490,0.218868,0.089887,0.224112,0.190787,0.911316,0.196505,0.904545,0.192659,0.917207
155,../output/GoEmotions/classic/6fe54fe68812f68c6...,{'config': {'data_config': {'raw_path': 'prepr...,GoEmotions,classic,6fe54fe68812f68c66347614314b3d77,tfidf,sgd,0.087060,0.217988,0.087628,0.218700,0.088405,0.223826,0.177105,0.928478,0.180299,0.924765,0.181679,0.924001
179,../output/GoEmotions/classic/4c756098a05e6be85...,{'config': {'data_config': {'raw_path': 'prepr...,GoEmotions,classic,4c756098a05e6be8553407e62531326e,tfidf,sgd,0.087445,0.218576,0.088262,0.219786,0.088820,0.224594,0.176947,0.928791,0.180338,0.926019,0.181139,0.924633
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1995,../output/GoEmotions/neural/812213d5b056aac532...,{'config': {'data_config': {'raw_path': 'prepr...,GoEmotions,neural,812213d5b056aac532335d8800486c5d,bert,dnnpool,0.593322,0.634571,0.515087,0.559888,0.499480,0.556102,0.609850,0.668141,0.510231,0.642633,0.519813,0.621899
668,../output/GoEmotions/neural/0ae5faa3b9dd41390d...,{'config': {'data_config': {'raw_path': 'prepr...,GoEmotions,neural,0ae5faa3b9dd41390d57d7e006254968,bert,dnnpool,0.593322,0.634571,0.515087,0.559888,0.499480,0.556102,0.609850,0.668141,0.510231,0.642633,0.519813,0.621899
1854,../output/GoEmotions/neural/7e372907c3f585085f...,{'config': {'data_config': {'raw_path': 'prepr...,GoEmotions,neural,7e372907c3f585085f6d037c3849ca63,bert,dnnpool,0.556554,0.597283,0.515675,0.558934,0.510514,0.555689,0.553376,0.662760,0.514209,0.646395,0.515199,0.634065
1028,../output/GoEmotions/neural/f7cacae6393f216913...,{'config': {'data_config': {'raw_path': 'prepr...,GoEmotions,neural,f7cacae6393f2169131c6f7c394e5403,bert,dnnpool,0.564260,0.612822,0.516185,0.562844,0.498440,0.559146,0.580436,0.661801,0.513887,0.652508,0.522387,0.622689


In [7]:
keyed_groups = parsed_df.groupby(['Dataset', 'ModelType', 'Extractor', 'Model'])
best_valid_df = keyed_groups.valid_macro_f1.max().to_frame().reset_index()

results_df = parsed_df.merge(best_valid_df, on=['Dataset', 'ModelType', 'Extractor', 'Model', 'valid_macro_f1'], how='inner')

final_df = results_df[['Dict', 'Dataset', 'Extractor', 'Model', 'test_macro_f1', 'test_micro_f1', 'test_precision', 'test_recall']].round(2)
final_df = final_df.drop_duplicates(['Dataset', 'Extractor', 'Model']).sort_values(['Dataset', 'Extractor', 'Model'])
final_df

Unnamed: 0,Dict,Dataset,Extractor,Model,test_macro_f1,test_micro_f1,test_precision,test_recall
6,{'config': {'data_config': {'raw_path': 'prepr...,GoEmotions,bert,dnnpool,0.5,0.56,0.52,0.63
7,{'config': {'data_config': {'raw_path': 'prepr...,GoEmotions,bert,lstm,0.49,0.57,0.53,0.63
3,{'config': {'data_config': {'raw_path': 'prepr...,GoEmotions,bow,naivebayes,0.35,0.46,0.43,0.53
5,{'config': {'data_config': {'raw_path': 'prepr...,GoEmotions,bow,sgd,0.46,0.53,0.49,0.61
9,{'config': {'data_config': {'raw_path': 'prepr...,GoEmotions,fasttext,dnnpool,0.44,0.5,0.44,0.63
8,{'config': {'data_config': {'raw_path': 'prepr...,GoEmotions,fasttext,lstm,0.47,0.54,0.52,0.59
1,{'config': {'data_config': {'raw_path': 'prepr...,GoEmotions,tfidf,naivebayes,0.33,0.44,0.43,0.49
0,{'config': {'data_config': {'raw_path': 'prepr...,GoEmotions,tfidf,sgd,0.47,0.53,0.49,0.6


# Generate the final experiment configuration

Create the jsons with a number of seeds for the final experiment with multiple runs.

In [18]:
import sys
import copy
import json
sys.path.append('../src')
from config import ExperimentConfig

NUM_EXPERIMENTS = 5

for experiment in range(NUM_EXPERIMENTS):
    for cfg in final_df[final_df.Dataset == 'GoEmotions'].Dict:
        # Clone the config
        # Not needed for seeds, but if we change something in the future we'll be grateful
        cfg = copy.deepcopy(cfg)
        cfg = cfg['config']
        cfg['seed'] = experiment
        model_family = cfg['output_path'].split('/')[-2]
        cfg['output_path'] = f'output/GoEmotions/replica/'
        
        # Build the experiment object
        exp_cfg = ExperimentConfig.from_dict(cfg)        
        as_json = json.dumps(exp_cfg._as_flat_dict(), indent=2)
        exp_hash = exp_cfg.hash()
        
        # Save as json
        save_path = f'../configs/GoEmotions/replica/{model_family}/{exp_hash}.json' 
        with open(save_path, 'w') as f:
            f.write(as_json)