In [1]:
import pickle
import os
from collections import defaultdict

In [2]:
agg_dir =  'gp_results/scaling_agg'
ind_dir =  'gp_results/scaling_ind'
fs_dir =   'gp_results/feature_selection_scale_normalize'
full_dir = 'gp_results/full_model/all_features'

In [3]:
agg_files =  os.listdir(agg_dir)
ind_files =  os.listdir(ind_dir)
fs_files =   os.listdir(fs_dir)
full_files = os.listdir(full_dir)

In [4]:
def rec_dd():
    return defaultdict(rec_dd)

def build_results(directory, files):
    models = rec_dd()
    for file in files:
        if file == '.DS_Store': continue
        with open(os.path.join(directory, file), 'rb') as f:
            results = pickle.load(f)
        filename = os.path.splitext(file)[0]
        # Slightly different naming conventions for non-scaled results
        try:
            model, scaling = filename.split('_scaling_')
        except ValueError:
            model = filename
            scaling = ''
        if scaling == 'scale_normalize' or scaling == '':
            models[model] = (results['Avg. mse'], results['Avg. roc'])
        # models[model][scaling]['ROC'] = results['Avg. roc']
    # best = sorted(models.keys(), key=lambda x: models[x]['scale_normalize']['MSE'])
    best = [(x, models[x]) for x in sorted(models, key=models.get)]
    return models, best

In [5]:
agg_models, agg_best =   build_results(agg_dir, agg_files)
ind_models, ind_best =   build_results(ind_dir, ind_files)
fs_models, fs_best =     build_results(fs_dir, fs_files)
full_models, full_best = build_results(full_dir, full_files)

In [6]:
agg_models

defaultdict(<function __main__.rec_dd()>,
            {'Matern52_agg_cont_share': (0.05749827869031793,
              0.5529691395635667),
             'Matern52_agg_cont_share_binned': (0.05876195512020229,
              0.6533239962651727),
             'Matern52_agg_disc_share': (0.06064962982502218,
              0.5796146592199224),
             'Matern52_agg_disc_share_binned': (0.06673681794424709,
              0.5603174603174603),
             'RBF_agg_cont_share': (0.059157318533241875, 0.5889441562622676),
             'RBF_agg_cont_share_binned': (0.06090645879157135,
              0.6438001867413632),
             'RBF_agg_disc_share': (0.06224310773357204, 0.5729739096186465),
             'RBF_agg_disc_share_binned': (0.06878995714555314,
              0.5603174603174603)})

In [7]:
agg_best

[('Matern52_agg_cont_share', (0.05749827869031793, 0.5529691395635667)),
 ('Matern52_agg_cont_share_binned', (0.05876195512020229, 0.6533239962651727)),
 ('RBF_agg_cont_share', (0.059157318533241875, 0.5889441562622676)),
 ('Matern52_agg_disc_share', (0.06064962982502218, 0.5796146592199224)),
 ('RBF_agg_cont_share_binned', (0.06090645879157135, 0.6438001867413632)),
 ('RBF_agg_disc_share', (0.06224310773357204, 0.5729739096186465)),
 ('Matern52_agg_disc_share_binned', (0.06673681794424709, 0.5603174603174603)),
 ('RBF_agg_disc_share_binned', (0.06878995714555314, 0.5603174603174603))]

In [8]:
ind_models

defaultdict(<function __main__.rec_dd()>,
            {'Matern52_ind_cont_share': (0.0627665777019118,
              0.5262861811391223),
             'Matern52_ind_cont_share_binned': (0.051557931692521944,
              0.6907952746315917),
             'Matern52_ind_disc_share': (0.06345882214894141,
              0.5040343915343916),
             'Matern52_ind_disc_share_binned': (0.06874470531204888,
              0.48642512077294686),
             'RBF_ind_cont_share': (0.06172024845399351, 0.5445540730912247),
             'RBF_ind_cont_share_binned': (0.053363341577731906,
              0.6808918929890797),
             'RBF_ind_disc_share': (0.06240509320088754, 0.5232229675495929),
             'RBF_ind_disc_share_binned': (0.07205504928064727,
              0.5584200056834328)})

In [9]:
ind_best

[('Matern52_ind_cont_share_binned',
  (0.051557931692521944, 0.6907952746315917)),
 ('RBF_ind_cont_share_binned', (0.053363341577731906, 0.6808918929890797)),
 ('RBF_ind_cont_share', (0.06172024845399351, 0.5445540730912247)),
 ('RBF_ind_disc_share', (0.06240509320088754, 0.5232229675495929)),
 ('Matern52_ind_cont_share', (0.0627665777019118, 0.5262861811391223)),
 ('Matern52_ind_disc_share', (0.06345882214894141, 0.5040343915343916)),
 ('Matern52_ind_disc_share_binned',
  (0.06874470531204888, 0.48642512077294686)),
 ('RBF_ind_disc_share_binned', (0.07205504928064727, 0.5584200056834328))]

In [10]:
fs_models

defaultdict(<function __main__.rec_dd()>,
            {'Matern52_agg_cont_share': (0.05613452013167133,
              0.5391547205517794),
             'Matern52_agg_cont_share_binned': (0.06223876971170592,
              0.4472922502334267),
             'Matern52_agg_disc_share': (0.045098105571889216,
              0.5978984851004975),
             'Matern52_agg_disc_share_binned': (0.049296998909970725,
              0.542530345471522),
             'Matern52_ind_cont_share': (0.057597840802190034,
              0.5124143692564745),
             'Matern52_ind_cont_share_binned': (0.04430813873419949,
              0.5644638197835128),
             'Matern52_ind_disc_share': (0.04972578512503397,
              0.5741708437761069),
             'Matern52_ind_disc_share_binned': (0.034844428308452975,
              0.6014659197012138),
             'RBF_agg_cont_share': (0.055038718225615915, 0.5404173468144057),
             'RBF_agg_cont_share_binned': (0.06248987181772381,
        

In [11]:
fs_best

[('Matern52_ind_disc_share_binned',
  (0.034844428308452975, 0.6014659197012138)),
 ('RBF_ind_disc_share_binned', (0.03550797768372033, 0.6064909600203718)),
 ('RBF_ind_cont_share_binned', (0.04298313462690708, 0.5821108786070424)),
 ('Matern52_ind_cont_share_binned', (0.04430813873419949, 0.5644638197835128)),
 ('RBF_agg_disc_share', (0.04459332906322678, 0.5978984851004975)),
 ('Matern52_agg_disc_share', (0.045098105571889216, 0.5978984851004975)),
 ('RBF_agg_disc_share_binned', (0.049119964970333475, 0.542530345471522)),
 ('Matern52_agg_disc_share_binned', (0.049296998909970725, 0.542530345471522)),
 ('Matern52_ind_disc_share', (0.04972578512503397, 0.5741708437761069)),
 ('RBF_ind_disc_share', (0.05145366220637594, 0.5691708437761069)),
 ('RBF_agg_cont_share', (0.055038718225615915, 0.5404173468144057)),
 ('Matern52_agg_cont_share', (0.05613452013167133, 0.5391547205517794)),
 ('Matern52_ind_cont_share', (0.057597840802190034, 0.5124143692564745)),
 ('RBF_ind_cont_share', (0.060034