In [None]:
import numpy as np
import os
from tqdm import tqdm
from matplotlib import pyplot as plt
import matplotlib
%matplotlib inline
from ovejero import model_trainer, data_tools, bnn_inference

# Modifies the paths in the config to agree with the paths being used on the current computer.
def recursive_str_checker(cfg_dict):
    for key in cfg_dict:
        if isinstance(cfg_dict[key],str):
            cfg_dict[key] = cfg_dict[key].replace('/home/swagnercarena/ovejero/',root_path)
        if isinstance(cfg_dict[key],dict):
            recursive_str_checker(cfg_dict[key])

# Testing the Performance of Trained Models

__Author:__ Sebastian Wagner-Carena

__Last Run:__ 07/27/2020

__Goals:__ Inspect how the different BNN model types and dropout rates impact performance metrics.

__Before running this notebook:__ You will have to download and unzip the bnn validation samples that can be found here (TODO). Because we already have the BNN samples, the model weights are not neccesary. If you would rather regenerate the results from the weights, simply do not download the weights and not the samples. This pipeline will automatically rerun the sampling and save it to the specified path. I would not recommend this without a GPU (it could easily take over an hour on a CPU). 

## Diagonal Posterior Calibration and MAD Comparison

First, we want to compare the calibration and mean absolute deviation for our diagonal BNN models. Here, we will look at a 30%, 10%, and 5% dropout model. The first step is to load all of the inference classes required to make our plots.

In [None]:
# First specify the config path
root_path = os.getcwd()[:-5]
config_path_nn1_30 = root_path + 'configs/nn1_hr.json'
config_path_nn1_10 = root_path + 'configs/nn1.json'
config_path_nn1_5 = root_path + 'configs/nn1_lr.json'

# Load the configs and then fix the paths.
cfg_nn1_30 = model_trainer.load_config(config_path_nn1_30)
cfg_nn1_10 = model_trainer.load_config(config_path_nn1_10)
cfg_nn1_5 = model_trainer.load_config(config_path_nn1_5)

recursive_str_checker(cfg_nn1_30)
recursive_str_checker(cfg_nn1_10)
recursive_str_checker(cfg_nn1_5)

# We don't need the model, we already have the samples
lite_class = True

# The InferenceClass will do all the heavy lifting of preparing the model from the configuration file,
# initializing the validation dataset, and providing outputs correctly marginalized over the BNN uncertainties.
bnn_infer_nn1_30 = bnn_inference.InferenceClass(cfg_nn1_30,lite_class=lite_class)
bnn_infer_nn1_10 = bnn_inference.InferenceClass(cfg_nn1_10,lite_class=lite_class)
bnn_infer_nn1_5 = bnn_inference.InferenceClass(cfg_nn1_5,lite_class=lite_class)

# Now we just have to ask the InferenceClass to spin up some samples from our BNN. All of these samples should
# already be saved.
num_samples = 1000
save_path_nn1_30 = root_path + 'validation_results/nn1_hr_samps/'
save_path_nn1_10 = root_path + 'validation_results/nn1_samps/'
save_path_nn1_5 = root_path + 'validation_results/nn1_lr_samps/'

bnn_infer_nn1_30.gen_samples(num_samples,save_path_nn1_30)
bnn_infer_nn1_10.gen_samples(num_samples,save_path_nn1_10)
bnn_infer_nn1_5.gen_samples(num_samples,save_path_nn1_5)

Report the MAD for each of our models

In [None]:
print('Diagonal 30% Model')
bnn_infer_nn1_30.report_stats()
print('lens_mass_theta_E:',np.median(np.abs(np.exp(bnn_infer_nn1_30.y_pred)-np.exp(bnn_infer_nn1_30.y_test)),
                                     axis=0)[-1])
print('')

print('Diagonal 10% Model')
bnn_infer_nn1_10.report_stats()
print('lens_mass_theta_E:',np.median(np.abs(np.exp(bnn_infer_nn1_10.y_pred)-np.exp(bnn_infer_nn1_10.y_test)),
                                     axis=0)[-1])
print('')

print('Diagonal 5% Model')
bnn_infer_nn1_5.report_stats()
print('lens_mass_theta_E:',np.median(np.abs(np.exp(bnn_infer_nn1_5.y_pred)-np.exp(bnn_infer_nn1_5.y_test)),
                                     axis=0)[-1])

In the paper, we're interested in comparing the aleatoric and total uncertainty for the Diagonal 30% model.

In [None]:
# For apj, title was set to False.
matplotlib.rcParams.update({'font.size': 11})
figures = bnn_infer_nn1_30.comp_al_ep_unc(norm_diagonal=False,title=False)
figures[0].savefig('figures/diag_hr_cov_comp.pdf')

Finally, we can make the calibration plot with our three dropout rates.

In [None]:
color_map = ['#000000','#1b9e77','#32856c','#3d6b5d']
ls_list =['-','--',':']
n_perc_points = 30

matplotlib.rcParams.update({'font.size': 13})
fig = bnn_infer_nn1_5.plot_calibration(color_map=color_map,n_perc_points=n_perc_points,show_plot=False,ls=ls_list[0])
fig = bnn_infer_nn1_10.plot_calibration(color_map=color_map[1:],n_perc_points=n_perc_points,figure=fig,
                                        show_plot=False,ls=ls_list[1])
fig = bnn_infer_nn1_30.plot_calibration(color_map=color_map[2:],n_perc_points=n_perc_points,figure=fig,show_plot=False,
                                        legend=['Perfect Calibration',r'Dropout = 5%',r'Dropout = 10%',
                                                r'Dropout = 30%'],
                                        title='',ls=ls_list[2])

save_cal_path = 'figures/diag_cal.pdf'
plt.savefig(save_cal_path)
plt.show()

Since each inference class loads the model weights into memory, we're going to want to delete the ones we're not using.

In [None]:
del bnn_infer_nn1_10
del bnn_infer_nn1_5

## Full Posterior Calibration and MAD Comparison

Focusing now on the full posterior, we can once again load our validation samples for our 1%, 0.5%, 0.1%, and 0% dropout models

In [None]:
# First specify the config path
config_path_nn2_1 = root_path + 'configs/nn2.json'
config_path_nn2_05 = root_path + 'configs/nn2_lr.json'
config_path_nn2_01 = root_path + 'configs/nn2_slr.json'
config_path_nn2_0 = root_path + 'configs/nn2_zr.json'

# Load the configs and then fix the paths.
cfg_nn2_1 = model_trainer.load_config(config_path_nn2_1)
cfg_nn2_05 = model_trainer.load_config(config_path_nn2_05)
cfg_nn2_01 = model_trainer.load_config(config_path_nn2_01)
cfg_nn2_0 = model_trainer.load_config(config_path_nn2_0)

recursive_str_checker(cfg_nn2_1)
recursive_str_checker(cfg_nn2_05)
recursive_str_checker(cfg_nn2_01)
recursive_str_checker(cfg_nn2_0)

# The InferenceClass will do all the heavy lifting of preparing the model from the configuration file,
# initializing the validation dataset, and providing outputs correctly marginalized over the BNN uncertainties.
bnn_infer_nn2_1 = bnn_inference.InferenceClass(cfg_nn2_1,lite_class=lite_class)
bnn_infer_nn2_05 = bnn_inference.InferenceClass(cfg_nn2_05,lite_class=lite_class)
bnn_infer_nn2_01 = bnn_inference.InferenceClass(cfg_nn2_01,lite_class=lite_class)
bnn_infer_nn2_0 = bnn_inference.InferenceClass(cfg_nn2_0,lite_class=lite_class)

# Now we just have to ask the InferenceClass to spin up some samples from our BNN. All of these samples should
# already be saved.
num_samples = 1000
save_path_nn2_1 = root_path + 'validation_results/nn2_samps/'
save_path_nn2_05 = root_path + 'validation_results/nn2_lr_samps/'
save_path_nn2_01 = root_path + 'validation_results/nn2_slr_samps/'
save_path_nn2_0 = root_path + 'validation_results/nn2_zr_samps/'

bnn_infer_nn2_1.gen_samples(num_samples,save_path_nn2_1)
bnn_infer_nn2_05.gen_samples(num_samples,save_path_nn2_05)
bnn_infer_nn2_01.gen_samples(num_samples,save_path_nn2_01)
bnn_infer_nn2_0.gen_samples(num_samples,save_path_nn2_0)

Print the MAD performance for our parameters on the full posterior BNNs

In [None]:
print('Full 1% Model')
bnn_infer_nn2_1.report_stats()
print('lens_mass_theta_E:',np.median(np.abs(np.exp(bnn_infer_nn2_1.y_pred)-np.exp(bnn_infer_nn2_1.y_test)),
                                     axis=0)[-1])
print('')

print('Full 0.5% Model')
bnn_infer_nn2_05.report_stats()
print('lens_mass_theta_E:',np.median(np.abs(np.exp(bnn_infer_nn2_05.y_pred)-np.exp(bnn_infer_nn2_05.y_test)),
                                     axis=0)[-1])
print('')

print('Full 0.1% Model')
bnn_infer_nn2_01.report_stats()
print('lens_mass_theta_E:',np.median(np.abs(np.exp(bnn_infer_nn2_01.y_pred)-np.exp(bnn_infer_nn2_01.y_test)),
                                     axis=0)[-1])
print('')

print('Full 0% Model')
bnn_infer_nn2_0.report_stats()
print('lens_mass_theta_E:',np.median(np.abs(np.exp(bnn_infer_nn2_0.y_pred)-np.exp(bnn_infer_nn2_0.y_test)),
                                     axis=0)[-1])

In the paper we're interested in comparing the aleatoric and total uncertainty.

In [None]:
matplotlib.rcParams.update({'font.size': 11})
figures = bnn_infer_nn2_01.comp_al_ep_unc(norm_diagonal=False,title=False)
figures[0].savefig('figures/full_slr_cov_comp.pdf')

For the full models, we can make the same calibration plot comparison as before.

In [None]:
color_map = ['#000000','#d95f02','#cc6b21','#c0753b','#b37c52']
ls_list =['-.','-','--',':']
n_perc_points = 30

matplotlib.rcParams.update({'font.size': 13})
fig = bnn_infer_nn2_0.plot_calibration(color_map=color_map,n_perc_points=n_perc_points,show_plot=False,
                                       ls=ls_list[0])
fig = bnn_infer_nn2_01.plot_calibration(color_map=color_map[1:],n_perc_points=n_perc_points,figure=fig,
                                        show_plot=False,ls=ls_list[1])
fig = bnn_infer_nn2_05.plot_calibration(color_map=color_map[2:],n_perc_points=n_perc_points,figure=fig,
                                        show_plot=False,ls=ls_list[2])
fig = bnn_infer_nn2_1.plot_calibration(color_map=color_map[3:],n_perc_points=n_perc_points,figure=fig,show_plot=False,
                                       legend=['Perfect Calibration',r'No Dropout',r'Dropout = 0.1%',
                                               r'Dropout = 0.5%',r'Dropout = 1%'],
                                       title='',ls=ls_list[3])

save_cal_path = 'figures/full_cal.pdf'
plt.savefig(save_cal_path)
plt.show()

Again, we will delete the models we will not be using anymore.

In [None]:
del bnn_infer_nn2_0
del bnn_infer_nn2_05
del bnn_infer_nn2_1

## GMM Posterior Calibration and MAD Comparison

Load all the GMM models

In [None]:
# First specify the config path
config_path_nn3_1 = root_path + 'configs/nn3.json'
config_path_nn3_05 = root_path + 'configs/nn3_lr.json'
config_path_nn3_01 = root_path + 'configs/nn3_slr.json'
config_path_nn3_0 = root_path + 'configs/nn3_zr.json'

# Load the configs and then fix the paths.
cfg_nn3_1 = model_trainer.load_config(config_path_nn3_1)
cfg_nn3_05 = model_trainer.load_config(config_path_nn3_05)
cfg_nn3_01 = model_trainer.load_config(config_path_nn3_01)
cfg_nn3_0 = model_trainer.load_config(config_path_nn3_0)

recursive_str_checker(cfg_nn3_1)
recursive_str_checker(cfg_nn3_05)
recursive_str_checker(cfg_nn3_01)
recursive_str_checker(cfg_nn3_0)

# The InferenceClass will do all the heavy lifting of preparing the model from the configuration file,
# initializing the validation dataset, and providing outputs correctly marginalized over the BNN uncertainties.
bnn_infer_nn3_1 = bnn_inference.InferenceClass(cfg_nn3_1,lite_class=lite_class)
bnn_infer_nn3_05 = bnn_inference.InferenceClass(cfg_nn3_05,lite_class=lite_class)
bnn_infer_nn3_01 = bnn_inference.InferenceClass(cfg_nn3_01,lite_class=lite_class)
bnn_infer_nn3_0 = bnn_inference.InferenceClass(cfg_nn3_0,lite_class=lite_class)

# Now we just have to ask the InferenceClass to spin up some samples from our BNN. All of these samples should
# already be saved.
num_samples = 1000
save_path_nn3_1 = root_path + 'validation_results/nn3_samps/'
save_path_nn3_05 = root_path + 'validation_results/nn3_lr_samps/'
save_path_nn3_01 = root_path + 'validation_results/nn3_slr_samps/'
save_path_nn3_0 = root_path + 'validation_results/nn3_zr_samps/'

bnn_infer_nn3_1.gen_samples(num_samples,save_path_nn3_1)
bnn_infer_nn3_05.gen_samples(num_samples,save_path_nn3_05)
bnn_infer_nn3_01.gen_samples(num_samples,save_path_nn3_01)
bnn_infer_nn3_0.gen_samples(num_samples,save_path_nn3_0)

Print the MAD performance of the GMM models

In [None]:
print('GMM 1% Model')
bnn_infer_nn3_1.report_stats()
print('lens_mass_theta_E:',np.median(np.abs(np.exp(bnn_infer_nn3_1.y_pred)-np.exp(bnn_infer_nn3_1.y_test)),
                                     axis=0)[-1])
print('')

print('GMM 0.5% Model')
bnn_infer_nn3_05.report_stats()
print('lens_mass_theta_E:',np.median(np.abs(np.exp(bnn_infer_nn3_05.y_pred)-np.exp(bnn_infer_nn3_05.y_test)),
                                     axis=0)[-1])
print('')

print('GMM 0.1% Model')
bnn_infer_nn3_01.report_stats()
print('lens_mass_theta_E:',np.median(np.abs(np.exp(bnn_infer_nn3_01.y_pred)-np.exp(bnn_infer_nn3_01.y_test)),
                                     axis=0)[-1])
print('')

print('GMM 0% Model')
bnn_infer_nn3_0.report_stats()
print('lens_mass_theta_E:',np.median(np.abs(np.exp(bnn_infer_nn3_0.y_pred)-np.exp(bnn_infer_nn3_0.y_test)),
                                     axis=0)[-1])

For the GMM models, we can make the same calibration plot comparison as for the full and diagonal models

In [None]:
color_map = ['#000000','#7570b3','#554fa6','#393299','#21198d']
ls_list =['-.','-','--',':']

matplotlib.rcParams.update({'font.size': 11})
fig = bnn_infer_nn3_0.plot_calibration(color_map=color_map,n_perc_points=n_perc_points,show_plot=False,
                                       ls=ls_list[0])
fig = bnn_infer_nn3_01.plot_calibration(color_map=color_map[1:],n_perc_points=n_perc_points,figure=fig,
                                        show_plot=False,ls=ls_list[1])
fig = bnn_infer_nn3_05.plot_calibration(color_map=color_map[2:],n_perc_points=n_perc_points,figure=fig,
                                        show_plot=False,ls=ls_list[2])
fig = bnn_infer_nn3_1.plot_calibration(color_map=color_map[3:],n_perc_points=n_perc_points,figure=fig,
                                       show_plot=False,legend=['Perfect Calibration',r'No Dropout',r'Dropout = 0.1%',
                                                               r'Dropout = 0.5%',r'Dropout = 1%'],
                                       title='',ls=ls_list[3])

save_cal_path = 'figures/gmm_cal.pdf'
plt.savefig(save_cal_path)
plt.show()

Again, we will delete any models we're not using for memory conservation

In [None]:
del bnn_infer_nn3_0
del bnn_infer_nn3_05
del bnn_infer_nn3_1

## Comparison of Calibration for All Three Model Types

We already have all three models of interest loaded, so it's just a matter of doing the plotting

In [None]:
color_map = ['#000000','#1b9e77','#d95f02','#7570b3','#e7298a']
n_perc_points = 30

matplotlib.rcParams.update({'font.size': 13})
fig = bnn_infer_nn1_30.plot_calibration(color_map=color_map,n_perc_points=n_perc_points,show_plot=False)
fig = bnn_infer_nn2_01.plot_calibration(color_map=color_map[1:],n_perc_points=n_perc_points,figure=fig,
                                        show_plot=False)
fig = bnn_infer_nn3_01.plot_calibration(color_map=color_map[2:],n_perc_points=n_perc_points,figure=fig,
                                        show_plot=False,legend=['Perfect Calibration',r'Diagonal Calibration (30%)',
                                                                r'Full Calibration (0.1%)',r'GMM Calibration (0.1%)'],
                                       title = '')

save_cal_path = 'figures/all_cal.pdf'
plt.savefig(save_cal_path)
plt.show()