In [1]:
import json

with open('training_results.json') as f:
    training_results = json.load(f)


In [2]:
import pandas as pd

def create_df_from_json(json_path):
    with open(json_path) as f:
        training_results = json.load(f)

    all_results = []

    for dataset, results in training_results.items():
        for algorithm in results:
            all_results.append({
                'dataset': dataset,
                'model_name': algorithm,
                'mean_bal_acc': results[algorithm]['mean_bal_acc'],
                'std_dev_bal_acc': results[algorithm]['std_dev_bal_acc'],
                'min_bal_acc': results[algorithm]['min'],
                'max_bal_acc': results[algorithm]['max'],
                'accuracies': results[algorithm]['accuracies'],
                'n_iter': results[algorithm]['n_iter'],
                'test_size': results[algorithm]['test_size'],
                'train_costs': results[algorithm]['train_costs'],
                'training_times' : results[algorithm]['training_times'],
                'avg_training_time': results[algorithm]['avg_training_time'],
                'std_dev_training_time': results[algorithm]['std_dev_training_time'],
                'min_training_time': results[algorithm]['min_training_time'],
                'max_training_time': results[algorithm]['max_training_time'],
                'train_betas' : results[algorithm]['iteration_betas'],
            })

    return pd.DataFrame(all_results)

In [3]:
df = create_df_from_json('training_results.json')


In [4]:
# extract values from training_times
df_exploded1= df.explode('training_times')
df_exploded1 = df_exploded1.groupby('model_name').agg({
    'training_times':['mean', 'std', 'min', 'max'],
}).reset_index().sort_values(('training_times', 'mean'))
df_exploded1


Unnamed: 0_level_0,model_name,training_times,training_times,training_times,training_times
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std,min,max
2,SGD,3.449062,6.356242,0.023609,21.72336
0,ADAM,6.901837,5.662733,0.921828,21.442195
1,IWLS,65.174553,114.273382,0.049723,437.69415


In [9]:
df_exploded2= df.explode('train_betas')
df_exploded2['train_epochs'] = df_exploded2['train_betas'].apply(lambda x: len(x))
df_exploded2 = df_exploded2.groupby('model_name').agg({
    'train_epochs':['mean', 'std', 'min', 'max'],
}).reset_index().sort_values(('train_epochs', 'mean'))
df_exploded2


Unnamed: 0_level_0,model_name,train_epochs,train_epochs,train_epochs,train_epochs
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std,min,max
1,IWLS,44.822222,42.243498,11,144
2,SGD,122.6,135.545095,8,416
0,ADAM,235.044444,137.453621,97,501


In [10]:
# join the two exploded dataframes
df_exploded = df_exploded1.merge(df_exploded2, on=['model_name'], how='inner')
df_exploded

  df_exploded = df_exploded1.merge(df_exploded2, on=['model_name'], how='inner')


Unnamed: 0_level_0,model_name,training_times,training_times,training_times,training_times,train_epochs,train_epochs,train_epochs,train_epochs
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std,min,max,mean,std,min,max
0,SGD,3.449062,6.356242,0.023609,21.72336,122.6,135.545095,8,416
1,ADAM,6.901837,5.662733,0.921828,21.442195,235.044444,137.453621,97,501
2,IWLS,65.174553,114.273382,0.049723,437.69415,44.822222,42.243498,11,144
