# MCMC tutorial publication results and plotting
Part of the Bayesian neural networks via MCMC: a Python-based tutorial

This notebook contains the code used to generate the results and plots for the paper.

### Imports

In [1]:
%load_ext autoreload
%autoreload 2

In [16]:
import os
import numpy as np
import pandas as pd
import xarray as xr
import glob
from ipywidgets import interact, fixed, widgets

from publication_results.models.mcmc import MCMC

os.chdir('/project')


## Load the results from the MCMC runs
Load in the results stored in the netcdf files.

In [14]:
models = ['linear','bnn']
data = ['Sunspot', 'Abalone', 'Iris', 'Ionosphere']
results_dir = os.path.join('publication_results','results')

results = {mm: {dd: None for dd in data} for mm in models}
for this_model in models:
    mod_results_dir = os.path.join(results_dir,'{}_model'.format(this_model))
    for this_data in data:
        results_files = glob.glob(os.path.join(mod_results_dir,'mcmc_{}_*.nc'.format(this_data)))
        print('Found {} results files for {} model on {} data'.format(len(results_files),this_model,this_data))
        tmp_data = xr.open_mfdataset(results_files, combine='nested',concat_dim='chain')
        # re-ID the chains
        tmp_data.coords['chain'] = np.arange(len(results_files))
        results[this_model][this_data] = tmp_data

Found 5 results files for linear model on Sunspot data
Found 5 results files for linear model on Abalone data
Found 5 results files for linear model on Iris data
Found 5 results files for linear model on Ionosphere data
Found 5 results files for bnn model on Sunspot data
Found 5 results files for bnn model on Abalone data
Found 5 results files for bnn model on Iris data
Found 5 results files for bnn model on Ionosphere data


In [22]:
# load the data
x_train = {}
y_train = {}
x_test = {}
y_test = {}

for this_data in data:
    train_data   = np.loadtxt("data/{}/train.txt".format(this_data))
    test_data    = np.loadtxt("data/{}/test.txt".format(this_data))

    x_train[this_data] = train_data[:,:-1]
    y_train[this_data] = train_data[:,-1]
    x_test[this_data] = test_data[:,:-1]
    y_test[this_data] = test_data[:,-1]

# little lookup table
data_case = {'Sunspot':'regression','Abalone':'regression','Iris':'classification','Ionosphere':'classification'}

## Evaluate the model performance
Calculate the model performance metrics for the MCMC samples.

In [30]:
results_table = []# pd.DataFrame(columns=['train','test','model','data'])
for this_model in models:
    for this_data in data:
        train_dim = results[this_model][this_data].coords['train_idx'].shape[0]
        test_dim = results[this_model][this_data].coords['test_idx'].shape[0]
        train_pred = results[this_model][this_data].train_pred.values.reshape((-1,train_dim))
        test_pred = results[this_model][this_data].test_pred.values.reshape((-1,test_dim))
        # Print the train/test RMSE
        if data_case[this_data] == 'regression':
            train_perf = np.array([MCMC.rmse(None,train_pred[_,:], y_train[this_data]) for _ in np.arange(train_pred.shape[0])])
            test_perf = np.array([MCMC.rmse(None,test_pred[_,:], y_test[this_data]) for _ in np.arange(test_pred.shape[0])])
            # print('Train RMSE: {:.5f} ({:.5f})'.format(train_perf.mean(),train_perf.std()))
            # print('Test RMSE: {:.5f} ({:.5f})'.format(test_perf.mean(),test_perf.std()))  
        elif data_case[this_data] == 'classification':
            train_perf = np.array([MCMC.accuracy(None,train_pred[_,:], y_train[this_data]) for _ in np.arange(train_pred.shape[0])])
            test_perf = np.array([MCMC.accuracy(None,test_pred[_,:], y_test[this_data]) for _ in np.arange(test_pred.shape[0])])
            # print('Train Accuracy: {:.5f} ({:.5f})'.format(train_perf.mean(),train_perf.std()))
            # print('Test Accuracy: {:.5f} ({:.5f})'.format(test_perf.mean(),test_perf.std()))

        results_table.append(
            {
                'train_mean':train_perf.mean(),'test_mean':test_perf.mean(),
                'train_std':train_perf.std(),'test_std':test_perf.std(),
                'model':this_model,'data':this_data
            }
        )
results_table = pd.DataFrame(results_table)
results_table

Unnamed: 0,train_mean,test_mean,train_std,test_std,model,data
0,0.025536,0.021937,0.013265,0.011644,linear,Sunspot
1,0.086159,0.086281,0.007322,0.007165,linear,Abalone
2,89.541917,84.248143,3.681478,6.331859,linear,Iris
3,88.739853,85.393912,1.455964,2.266621,linear,Ionosphere
4,0.026671,0.026011,0.004681,0.004635,bnn,Sunspot
5,0.079517,0.079715,0.001996,0.001845,bnn,Abalone
6,98.062019,95.426151,0.660752,1.019753,bnn,Iris
7,99.631869,92.668491,0.356123,1.890454,bnn,Ionosphere
