# Hyper-parameter Optimization Results

To effectively decide which parameters to use on our models, we have to evaluate the HPO results on the GoEmotions dataset.

In [8]:
import glob
import json
import pandas as pd

results = []
paths = glob.glob('../output/*/*/*.json', recursive=True)
for path in paths:
    with open(path) as fp:
        result_dict = json.load(fp)
        result_tuple = (path, result_dict)
        results.append(result_tuple)

results_df = pd.DataFrame(results, columns=['Path', 'Dict'])

In [24]:
EXPERIMENT_METRICS = ['macro_f1', 'micro_f1']
EXPERIMENT_COLUMNS = ['Dataset', 'ModelType', 'Experiment']


def parse_path(path):
    dataset, model_type, experiment = path.split('/')[-3:]
    experiment = experiment.split('.')[0]
    return dataset, model_type, experiment


def build_experiment_columns(dataframe):
    path_parts = [parse_path(path) for path in dataframe.Path]
    for i, col in enumerate(EXPERIMENT_COLUMNS):
        dataframe[col] = [parts[i] for parts in path_parts]
    return dataframe


def extract_macro_stats(dataframe, metrics=EXPERIMENT_METRICS):
    split_names = dataframe['Dict'][0]['config']['data_config']['split_names']
    for split in split_names:
        for metric in metrics:
            new_col_name = '{}_{}'.format(split, metric)
            dataframe[new_col_name] = [exp_dict['results'][split][metric] for exp_dict in dataframe['Dict']]
    return dataframe


def extract_model_details(dataframe):
    dataframe['Extractor'] = [exp_dict['config']['extractor_config']['ex_type'] for exp_dict in dataframe['Dict']]
    dataframe['Model'] = [exp_dict['config']['model_config']['model_name'] for exp_dict in dataframe['Dict']]
    return dataframe


def parse_df(dataframe):
    dataframe = build_experiment_columns(dataframe)
    dataframe = extract_model_details(dataframe)
    dataframe = extract_macro_stats(dataframe)
    return dataframe


In [36]:
parsed_df = parse_df(results_df)

parsed_df.sort_values('valid_macro_f1')

Unnamed: 0,Path,Dict,Dataset,ModelType,Experiment,train_macro_f1,train_micro_f1,valid_macro_f1,valid_micro_f1,test_macro_f1,test_micro_f1,Extractor,Model
37,../output/GoEmotions/classic/98f6bca9a4ab65148...,{'config': {'data_config': {'raw_path': 'prepr...,GoEmotions,classic,98f6bca9a4ab651482d7e1b22f78f86b,0.086467,0.237511,0.086287,0.237473,0.087133,0.243009,tfidf,sgd
33,../output/GoEmotions/classic/bc505ed7a45b78401...,{'config': {'data_config': {'raw_path': 'prepr...,GoEmotions,classic,bc505ed7a45b78401005670115de7804,0.087547,0.219247,0.087490,0.218868,0.089887,0.224112,tfidf,sgd
27,../output/GoEmotions/classic/6fe54fe68812f68c6...,{'config': {'data_config': {'raw_path': 'prepr...,GoEmotions,classic,6fe54fe68812f68c66347614314b3d77,0.087060,0.217988,0.087628,0.218700,0.088405,0.223826,tfidf,sgd
8,../output/GoEmotions/classic/c808bb5b774020981...,{'config': {'data_config': {'raw_path': 'prepr...,GoEmotions,classic,c808bb5b774020981fb49c9981607980,0.093371,0.226716,0.091599,0.225249,0.091479,0.228818,tfidf,sgd
24,../output/GoEmotions/classic/7e768ca12a4c364fe...,{'config': {'data_config': {'raw_path': 'prepr...,GoEmotions,classic,7e768ca12a4c364fe99b225121512b68,0.099974,0.236545,0.096866,0.232470,0.100733,0.238507,tfidf,sgd
...,...,...,...,...,...,...,...,...,...,...,...,...,...
594,../output/GoEmotions/neural/867e906ec3424def23...,{'config': {'data_config': {'raw_path': 'prepr...,GoEmotions,neural,867e906ec3424def23047b0f84b0ad9a,0.558377,0.598313,0.511532,0.558837,0.508289,0.555420,bert,dnnpool
426,../output/GoEmotions/neural/166f622070a0b55d14...,{'config': {'data_config': {'raw_path': 'prepr...,GoEmotions,neural,166f622070a0b55d144984bc6ac61d90,0.573776,0.608303,0.511793,0.556911,0.512084,0.554972,bert,dnnpool
188,../output/GoEmotions/neural/19f0c349f25ed12523...,{'config': {'data_config': {'raw_path': 'prepr...,GoEmotions,neural,19f0c349f25ed125238e9c08c60c55da,0.575695,0.609094,0.512066,0.556384,0.506743,0.554735,bert,dnnpool
347,../output/GoEmotions/neural/066bf75e0a90f449f5...,{'config': {'data_config': {'raw_path': 'prepr...,GoEmotions,neural,066bf75e0a90f449f51743fda65e4f02,0.564646,0.613024,0.512732,0.562093,0.496617,0.557918,bert,dnnpool
