# Hyper-parameter Optimization Results

To effectively decide which parameters to use on our models, we have to evaluate the HPO results on the GoEmotions dataset.

In [13]:
import glob
import json
import pandas as pd

results = []
paths = glob.glob('../output/*/*/*.json', recursive=True)
for path in paths:
    with open(path) as fp:
        result_dict = json.load(fp)
        result_tuple = (path, result_dict)
        results.append(result_tuple)

results_df = pd.DataFrame(results, columns=['Path', 'Dict'])

In [14]:
EXPERIMENT_METRICS = ['macro_f1', 'micro_f1']
EXPERIMENT_COLUMNS = ['Dataset', 'ModelType', 'Experiment']
INTERNAL_METRICS = ['precision', 'recall']
WEIGHT_FIELD = 'support'


def parse_path(path):
    dataset, model_type, experiment = path.split('/')[-3:]
    experiment = experiment.split('.')[0]
    return dataset, model_type, experiment


def build_experiment_columns(dataframe):
    path_parts = [parse_path(path) for path in dataframe.Path]
    for i, col in enumerate(EXPERIMENT_COLUMNS):
        dataframe[col] = [parts[i] for parts in path_parts]
    return dataframe


def extract_macro_stats(dataframe, metrics=EXPERIMENT_METRICS):
    split_names = dataframe['Dict'][0]['config']['data_config']['split_names']
    for split in split_names:
        for metric in metrics:
            new_col_name = '{}_{}'.format(split, metric)
            dataframe[new_col_name] = [exp_dict['results'][split][metric] for exp_dict in dataframe['Dict']]
    return dataframe


def extract_category_stats(dataframe, metrics=INTERNAL_METRICS, weight_field=WEIGHT_FIELD):
    split_names = dataframe['Dict'][0]['config']['data_config']['split_names']
    for split in split_names:
        for metric in metrics:
            new_col_name = '{}_{}'.format(split, metric)
            metric_dict = []
            for exp_dict in dataframe['Dict']:
                label_values = list(exp_dict['results'][split]['labels'].values())
                global_weight = sum(label_results[weight_field] for label_results in label_values)
                metric_score = sum(label_results[weight_field] * label_results[metric] / global_weight
                                   for label_results in label_values)
                metric_dict.append(metric_score)
            dataframe[new_col_name] = metric_dict
    return dataframe


def extract_model_details(dataframe):
    dataframe['Extractor'] = [exp_dict['config']['extractor_config']['ex_type'] for exp_dict in dataframe['Dict']]
    dataframe['Model'] = [exp_dict['config']['model_config']['model_name'] for exp_dict in dataframe['Dict']]
    return dataframe


def parse_df(dataframe):
    dataframe = build_experiment_columns(dataframe)
    dataframe = extract_model_details(dataframe)
    dataframe = extract_macro_stats(dataframe)
    dataframe = extract_category_stats(dataframe)
    return dataframe


In [15]:
parsed_df = parse_df(results_df)

parsed_df.sort_values('valid_macro_f1')

Unnamed: 0,Path,Dict,Dataset,ModelType,Experiment,Extractor,Model,train_macro_f1,train_micro_f1,valid_macro_f1,valid_micro_f1,test_macro_f1,test_micro_f1,train_precision,train_recall,valid_precision,valid_recall,test_precision,test_recall
343,../output/Vent/neural/52ce66a7a5c985ed9d0d9368...,{'config': {'data_config': {'raw_path': 'prepr...,Vent,neural,52ce66a7a5c985ed9d0d9368b83743dd,bert,dnnpool,0.017744,0.029671,0.015231,0.027161,0.015436,0.028582,0.017805,0.511462,0.021659,0.435901,0.016890,0.412615
294,../output/Vent/neural/3d433974ce46ca09abbf59ef...,{'config': {'data_config': {'raw_path': 'prepr...,Vent,neural,3d433974ce46ca09abbf59ef50b23c04,fasttext,dnnpool,0.020536,0.031562,0.017813,0.027809,0.016795,0.027072,0.020763,0.592551,0.022148,0.380454,0.021086,0.247270
301,../output/Vent/neural/cd0b01d095b96f8a7b3f8935...,{'config': {'data_config': {'raw_path': 'prepr...,Vent,neural,cd0b01d095b96f8a7b3f8935c3b125ec,fasttext,dnnpool,0.020063,0.028314,0.017923,0.026142,0.016516,0.026958,0.020251,0.564646,0.019656,0.373224,0.021301,0.231763
243,../output/Vent/neural/0036bc2fddce4ef1dc6bfc9c...,{'config': {'data_config': {'raw_path': 'prepr...,Vent,neural,0036bc2fddce4ef1dc6bfc9cf84beeb1,fasttext,dnnpool,0.021072,0.030897,0.018890,0.029476,0.017877,0.029394,0.020822,0.630147,0.022004,0.345849,0.022280,0.237447
287,../output/Vent/neural/9995cde7caf6cf2aedf49373...,{'config': {'data_config': {'raw_path': 'prepr...,Vent,neural,9995cde7caf6cf2aedf49373535ee0ae,fasttext,dnnpool,0.022267,0.034091,0.018988,0.030314,0.017469,0.028138,0.021566,0.644220,0.020912,0.427823,0.021098,0.248267
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1458,../output/GoEmotions/neural/0ae5faa3b9dd41390d...,{'config': {'data_config': {'raw_path': 'prepr...,GoEmotions,neural,0ae5faa3b9dd41390d57d7e006254968,bert,dnnpool,0.593322,0.634571,0.515087,0.559888,0.499480,0.556102,0.609850,0.668141,0.510231,0.642633,0.519813,0.621899
4224,../output/GoEmotions/neural/dd9f3603b2210ed1f5...,{'config': {'data_config': {'raw_path': 'prepr...,GoEmotions,neural,dd9f3603b2210ed1f58e963a99102c59,bert,dnnpool,0.556554,0.597283,0.515675,0.558934,0.510514,0.555689,0.553376,0.662760,0.514209,0.646395,0.515199,0.634065
3217,../output/GoEmotions/neural/7e372907c3f585085f...,{'config': {'data_config': {'raw_path': 'prepr...,GoEmotions,neural,7e372907c3f585085f6d037c3849ca63,bert,dnnpool,0.556554,0.597283,0.515675,0.558934,0.510514,0.555689,0.553376,0.662760,0.514209,0.646395,0.515199,0.634065
2000,../output/GoEmotions/neural/f7cacae6393f216913...,{'config': {'data_config': {'raw_path': 'prepr...,GoEmotions,neural,f7cacae6393f2169131c6f7c394e5403,bert,dnnpool,0.564260,0.612822,0.516185,0.562844,0.498440,0.559146,0.580436,0.661801,0.513887,0.652508,0.522387,0.622689


In [16]:
keyed_groups = parsed_df.groupby(['Dataset', 'ModelType', 'Extractor', 'Model'])
best_valid_df = keyed_groups.valid_macro_f1.max().to_frame().reset_index()

results_df = parsed_df.merge(best_valid_df, on=['Dataset', 'ModelType', 'Extractor', 'Model', 'valid_macro_f1'], how='inner')

final_df = results_df[['Dict', 'Dataset', 'ModelType', 'Extractor', 'Model', 'test_macro_f1', 'test_micro_f1', 'test_precision', 'test_recall']].round(2)
final_df = final_df.drop_duplicates(['Dataset', 'Extractor', 'Model']).sort_values(['Dataset', 'Extractor', 'Model'])
final_df

Unnamed: 0,Dict,Dataset,ModelType,Extractor,Model,test_macro_f1,test_micro_f1,test_precision,test_recall
18,{'config': {'data_config': {'raw_path': 'prepr...,GoEmotions,neural,bert,dnnpool,0.5,0.56,0.52,0.63
19,{'config': {'data_config': {'raw_path': 'prepr...,GoEmotions,neural,bert,lstm,0.49,0.57,0.53,0.63
15,{'config': {'data_config': {'raw_path': 'prepr...,GoEmotions,classic,bow,naivebayes,0.35,0.46,0.43,0.53
11,{'config': {'data_config': {'raw_path': 'prepr...,GoEmotions,classic,bow,rf,0.47,0.53,0.5,0.59
17,{'config': {'data_config': {'raw_path': 'prepr...,GoEmotions,classic,bow,sgd,0.46,0.53,0.49,0.61
21,{'config': {'data_config': {'raw_path': 'prepr...,GoEmotions,neural,fasttext,dnnpool,0.45,0.5,0.45,0.62
20,{'config': {'data_config': {'raw_path': 'prepr...,GoEmotions,neural,fasttext,lstm,0.47,0.54,0.52,0.59
13,{'config': {'data_config': {'raw_path': 'prepr...,GoEmotions,classic,tfidf,naivebayes,0.33,0.44,0.43,0.49
10,{'config': {'data_config': {'raw_path': 'prepr...,GoEmotions,classic,tfidf,rf,0.46,0.52,0.48,0.6
12,{'config': {'data_config': {'raw_path': 'prepr...,GoEmotions,classic,tfidf,sgd,0.47,0.53,0.49,0.6


# Generate the final experiment configuration

Create the jsons with a number of seeds for the final experiment with multiple runs.

In [17]:
import sys
import copy
import json
sys.path.append('../src')
from config import ExperimentConfig

NUM_EXPERIMENTS = 5

for experiment in range(NUM_EXPERIMENTS):
    for cfg in final_df[final_df.Dataset == 'GoEmotions'].Dict:
        # Clone the config
        # Not needed for seeds, but if we change something in the future we'll be grateful
        cfg = copy.deepcopy(cfg)
        cfg = cfg['config']
        cfg['seed'] = experiment
        model_family = cfg['output_path'].split('/')[-2]
        cfg['output_path'] = f'output/GoEmotions/replica/'
        
        # Build the experiment object
        exp_cfg = ExperimentConfig.from_dict(cfg)        
        as_json = json.dumps(exp_cfg._as_flat_dict(), indent=2)
        exp_hash = exp_cfg.hash()
        
        # Save as json
        save_path = f'../configs/GoEmotions/replica/{model_family}/{exp_hash}.json' 
        with open(save_path, 'w') as f:
            f.write(as_json)

# Generate the experiment configuration for Sampled Robust Vent

In [19]:
import sys
import copy
import json
sys.path.append('../src')
from config import ExperimentConfig

NUM_EXPERIMENTS = 5

for experiment in range(NUM_EXPERIMENTS):
    for cfg in final_df[final_df.Dataset == 'Vent'].Dict:
        # Clone the config
        # Not needed for seeds, but if we change something in the future we'll be grateful
        cfg = copy.deepcopy(cfg)
        cfg = cfg['config']
        cfg['seed'] = experiment
        model_family = cfg['output_path'].split('/')[-2]
        if model_family == 'replica':
            model_family = 'neural'
        cfg['output_path'] = f'output/Vent/replica/'
        
        # Build the experiment object
        exp_cfg = ExperimentConfig.from_dict(cfg)        
        as_json = json.dumps(exp_cfg._as_flat_dict(), indent=2)
        exp_hash = exp_cfg.hash()
        
        # Save as json
        save_path = f'../configs/Vent/replica/{model_family}/{exp_hash}.json' 
        with open(save_path, 'w') as f:
            f.write(as_json)