In [1]:
import sys
sys.path.append("../") 

In [2]:
from __future__ import division

import argparse, time, logging, random, math

import numpy as np
import mxnet as mx

from mxnet import gluon, nd
from mxnet import autograd as ag
from mxnet.gluon import nn
from mxnet.gluon.data.vision import transforms

In [3]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(0.,1.)
])

In [4]:
num_gpus = 1
model_ctx = mx.cpu()

num_workers = 0
batch_size = 128 
train_data = gluon.data.DataLoader(
    gluon.data.vision.MNIST(train=True).transform_first(transform),
    batch_size=batch_size, shuffle=True, last_batch='discard', num_workers=num_workers)

val_data = gluon.data.DataLoader(
    gluon.data.vision.MNIST(train=False).transform_first(transform),
    batch_size=batch_size, shuffle=False, num_workers=num_workers)

In [5]:
for X,y in train_data:
    print(X.shape)
    print(y.shape)
    break

(128, 1, 28, 28)
(128,)


In [6]:
model_ctx

cpu(0)

### Bayesian inference for MNIST

* [Stochastic Gradient Descent](#chapter1)
* [Stochastic Gradient Langevin Dynamics](#chapter2)
* [Bayes By Backprop](#chapter3)
* [Diagnostics](#chapter4)


# Stochastic Gradient Descent <a class="anchor" id="chapter1"></a>

In [7]:
import mxnet as mx
from mxnet import nd, autograd, gluon
hyper={'alpha':10.}
in_units=(1,28,28)
out_units=10

In [8]:
import mxnet as mx
from hamiltonian.inference.sgd import sgd
from hamiltonian.models.softmax import lenet

model=lenet(hyper,in_units,out_units,ctx=model_ctx)
inference=sgd(model,model.par,step_size=0.1,ctx=model_ctx)

In [9]:
import hamiltonian
import importlib

try:
    importlib.reload(hamiltonian.models.lenet)
    importlib.reload(hamiltonian.inference.sgd)
    importlib.reload(hamiltonian.inference.base)
    print('modules re-loaded')
except:
    print('no modules loaded yet')


no modules loaded yet


In [10]:
train_sgd=False
num_epochs=100

if train_sgd:
    par,loss=inference.fit(epochs=num_epochs,batch_size=batch_size,data_loader=train_data,verbose=True)

    fig=plt.figure(figsize=[5,5])
    plt.plot(loss,color='blue',lw=3)
    plt.xlabel('Epoch', size=18)
    plt.ylabel('Loss', size=18)
    plt.title('SGD Lenet MNIST', size=18)
    plt.xticks(size=14)
    plt.yticks(size=14)
    plt.savefig('sgd_lenet.pdf', bbox_inches='tight')
    model.net.save_parameters('../scripts/results/lenet/lenet_sgd_'+str(num_epochs)+'_epochs.params')
else:
    model.net.load_parameters('../scripts/results/lenet/lenet_sgd_'+str(num_epochs)+'_epochs.params',ctx=model_ctx)
    par=dict()
    for name,gluon_par in model.net.collect_params().items():
        par.update({name:gluon_par.data()})
    

In [11]:
model.net.collect_params()

{'0.weight': Parameter (shape=(6, 1, 5, 5), dtype=<class 'numpy.float32'>),
 '0.bias': Parameter (shape=(6,), dtype=<class 'numpy.float32'>),
 '2.weight': Parameter (shape=(16, 6, 5, 5), dtype=<class 'numpy.float32'>),
 '2.bias': Parameter (shape=(16,), dtype=<class 'numpy.float32'>),
 '4.weight': Parameter (shape=(120, 400), dtype=float32),
 '4.bias': Parameter (shape=(120,), dtype=float32),
 '5.weight': Parameter (shape=(84, 120), dtype=float32),
 '5.bias': Parameter (shape=(84,), dtype=float32),
 '6.weight': Parameter (shape=(10, 84), dtype=float32),
 '6.bias': Parameter (shape=(10,), dtype=float32)}

In [12]:
total_samples,total_labels,log_like=inference.predict(par,batch_size=batch_size,num_samples=100,data_loader=val_data)

In [15]:
y_hat=np.quantile(total_samples,.5,axis=0)

In [16]:
from sklearn.metrics import classification_report

print(classification_report(np.int32(total_labels),np.int32(y_hat)))

              precision    recall  f1-score   support

           0       0.97      0.21      0.35       979
           1       0.98      0.35      0.51      1133
           2       0.69      0.59      0.64      1030
           3       0.48      0.77      0.60      1008
           4       0.49      0.52      0.50       980
           5       0.47      0.62      0.53       890
           6       0.69      0.66      0.67       956
           7       0.81      0.51      0.63      1027
           8       0.38      0.87      0.53       973
           9       0.50      0.43      0.46      1008

    accuracy                           0.55      9984
   macro avg       0.65      0.55      0.54      9984
weighted avg       0.65      0.55      0.54      9984



# Stochastic Gradient Langevin Dynamics <a class="anchor" id="chapter2"></a>

In [34]:
from hamiltonian.inference.sgld import sgld
from hamiltonian.models.softmax import lenet

model=lenet(hyper,in_units,out_units,ctx=model_ctx)
inference=sgld(model,model.par,step_size=0.1,ctx=model_ctx)

SyntaxError: invalid syntax (sgld.py, line 134)

In [18]:
import hamiltonian
import importlib

try:
    importlib.reload(hamiltonian.models.softmax)
    importlib.reload(hamiltonian.inference.sgld)
    print('modules re-loaded')
except:
    print('no modules loaded yet')

no modules loaded yet


In [19]:
import matplotlib.pyplot as plt
import seaborn as sns
import glob

train_sgld=False
num_epochs=250

if train_sgld:
    loss,posterior_samples=inference.sample(epochs=num_epochs,batch_size=batch_size,
                                data_loader=train_data,
                                verbose=True,chain_name='chain_nonhierarchical')

    plt.rcParams['figure.dpi'] = 360
    sns.set_style("whitegrid")
    fig=plt.figure(figsize=[5,5])
    plt.plot(loss[0],color='blue',lw=3)
    plt.plot(loss[1],color='red',lw=3)
    plt.xlabel('Epoch', size=18)
    plt.ylabel('Loss', size=18)
    plt.title('SGLD Lenet MNIST', size=18)
    plt.xticks(size=14)
    plt.yticks(size=14)
    plt.savefig('sgld_lenet.pdf', bbox_inches='tight')
else:
    chain1=glob.glob("../scripts/results/lenet/chain_nonhierarchical_0_1_sgld*")
    chain2=glob.glob("../scripts/results/lenet/chain_nonhierarchical_0_sgld*")
    chain1.
    posterior_samples=[chain1,chain2]

In [33]:
posterior_samples_flat=[item for sublist in posterior_samples for item in sublist]

In [30]:
total_samples,total_labels,log_like=inference.predict(posterior_samples_flat,5,data_loader=val_data)

AttributeError: 'list' object has no attribute 'keys'

In [31]:
from sklearn.metrics import classification_report
posterior_samples

y_hat=np.quantile(total_samples,.5,axis=0)

print(classification_report(np.int32(total_labels),np.int32(y_hat)))

              precision    recall  f1-score   support

           0       0.98      0.15      0.26       979
           1       0.99      0.27      0.43      1133
           2       0.77      0.49      0.59      1030
           3       0.55      0.70      0.61      1008
           4       0.64      0.44      0.52       980
           5       0.47      0.60      0.53       890
           6       0.67      0.75      0.71       956
           7       0.74      0.51      0.60      1027
           8       0.34      0.89      0.49       973
           9       0.41      0.55      0.47      1008

    accuracy                           0.53      9984
   macro avg       0.65      0.54      0.52      9984
weighted avg       0.66      0.53      0.52      9984



In [42]:
from sklearn.metrics import f1_score

score=[]
for q in np.arange(.1,.9,.1):
    y_hat=np.quantile(total_samples,q,axis=0)
    score.append(f1_score(np.int32(total_labels),np.int32(y_hat), average='macro'))
print('mean f-1 : {0}, std f-1 : {1}'.format(np.mean(score),2*np.std(score)))

mean f-1 : 0.9748208806697447, std f-1 : 0.024564142639161658


In [29]:
import arviz as az

posterior_samples_multiple_chains=inference.posterior_diagnostics(posterior_samples)
datasets=[az.convert_to_inference_data(sample) for sample in posterior_samples_multiple_chains]
dataset = az.concat(datasets, dim="chain")
mean_r_hat_values={var:float(az.rhat(dataset)[var].mean().data) for var in model.par}
mean_ess_values={var:float(az.ess(dataset)[var].mean().data) for var in model.par}
mean_mcse_values={var:float(az.mcse(dataset)[var].mean().data) for var in model.par}

In [35]:
az.summary(dataset)

Unnamed: 0,mean,sd,hdi_3%,hdi_97%,mcse_mean,mcse_sd,ess_bulk,ess_tail,r_hat
"0.weight[0,0,0,0]",-1.612,1.635,-3.627,1.164,0.985,0.787,3.0,246.0,1.77
"0.weight[0,0,0,1]",-0.462,0.614,-1.657,0.661,0.328,0.254,3.0,166.0,1.59
"0.weight[0,0,0,2]",0.110,1.375,-1.645,2.617,0.845,0.679,3.0,144.0,1.80
"0.weight[0,0,0,3]",-0.200,0.597,-1.099,0.912,0.230,0.171,10.0,120.0,1.23
"0.weight[0,0,0,4]",-0.734,0.995,-2.361,0.737,0.571,0.450,3.0,196.0,1.60
...,...,...,...,...,...,...,...,...,...
6.bias[5],-0.087,0.559,-0.975,0.985,0.083,0.059,30.0,404.0,1.05
6.bias[6],-0.083,0.653,-1.156,1.064,0.397,0.319,3.0,133.0,1.80
6.bias[7],0.034,0.569,-0.937,1.299,0.076,0.054,52.0,282.0,1.05
6.bias[8],0.088,0.814,-1.618,1.821,0.319,0.236,7.0,331.0,1.24


In [30]:
print(mean_r_hat_values)

{'0.weight': 1.4409353657596886, '0.bias': 1.6422148717827536, '2.weight': 1.4883793334949909, '2.bias': 1.20538471476892, '4.weight': 1.477842407782434, '4.bias': 1.4951052840399746, '5.weight': 1.4792690794753212, '5.bias': 1.4814224346622065, '6.weight': 1.4836650804249152, '6.bias': 1.2050696158485181}


In [31]:
print(mean_ess_values)

{'0.weight': 55.79815474787147, '0.bias': 3.955625076894418, '2.weight': 51.283357418128, '2.bias': 156.99221588900593, '4.weight': 56.25778977074355, '4.bias': 58.85076479790838, '5.weight': 55.363197846896846, '5.bias': 67.19298135267451, '6.weight': 57.520367079467874, '6.bias': 186.6966809961362}


In [32]:
print(mean_mcse_values)

{'0.weight': 0.5150988150061814, '0.bias': 0.47465988042427076, '2.weight': 0.677872917280458, '2.bias': 0.22174469742426886, '4.weight': 0.6860386109116371, '4.bias': 0.625784012653258, '5.weight': 0.684771702786955, '5.bias': 0.5720807931282567, '6.weight': 0.6177038148582504, '6.bias': 0.165218720945477}


In [33]:
from hamiltonian.util.psis import *

loo,loos,ks=psisloo(-log_like)

ModuleNotFoundError: No module named 'hamiltonian.psis'

In [41]:
from sklearn.metrics import f1_score

score=[]
for q in np.arange(.1,.9,.1):
    y_hat=np.quantile(total_samples,q,axis=0)
    score.append(f1_score(np.int32(total_labels),np.int32(y_hat), sample_weight=loos,average='weighted'))
print('mean f-1 : {0}, std f-1 : {1}'.format(np.mean(score),2*np.std(score)))

mean f-1 : 0.579054715975662, std f-1 : 0.11209524379518138


# Hierarchical Lenet <a class="anchor" id="chapter3"></a>

In [43]:
from hamiltonian.inference.sgld import sgld
from hamiltonian.models.softmax import hierarchical_lenet

model=hierarchical_lenet(hyper,in_units,out_units,ctx=model_ctx)
inference=sgld(model,model.par,step_size=0.1,ctx=model_ctx)

In [47]:
import matplotlib.pyplot as plt
import seaborn as sns
import glob

train_sgld=False
num_epochs=250

if train_sgld:
    loss,posterior_samples=inference.sample(epochs=num_epochs,batch_size=batch_size,
                                data_loader=train_data,
                                verbose=True,chain_name='chain_hierarchical')

    plt.rcParams['figure.dpi'] = 360
    sns.set_style("whitegrid")
    fig=plt.figure(figsize=[5,5])
    plt.plot(loss[0],color='blue',lw=3)
    plt.plot(loss[1],color='red',lw=3)
    plt.xlabel('Epoch', size=18)
    plt.ylabel('Loss', size=18)
    plt.title('SGLD Hierarchical Lenet MNIST', size=18)
    plt.xticks(size=14)
    plt.yticks(size=14)
    plt.savefig('sgld_hierarchical_lenet.pdf', bbox_inches='tight')
else:
    chain1=glob.glob("../scripts/results/lenet/chain_hierarchical_0_1_sgld*")
    chain2=glob.glob("../scripts/results/lenet/chain_hierarchical_0_sgld*")
    posterior_samples=[chain1,chain2]

In [48]:
posterior_samples_flat=[item for sublist in posterior_samples for item in sublist]

In [49]:
total_samples,total_labels,log_like=inference.predict(posterior_samples_flat,5,data_loader=val_data)

In [50]:
posterior_samples_flat

['../scripts/results/lenet/chain_hierarchical_0_1_sgld_epoch_144_.params',
 '../scripts/results/lenet/chain_hierarchical_0_1_sgld_epoch_115_.params',
 '../scripts/results/lenet/chain_hierarchical_0_1_sgld_epoch_219_.params',
 '../scripts/results/lenet/chain_hierarchical_0_1_sgld_epoch_95_.params',
 '../scripts/results/lenet/chain_hierarchical_0_1_sgld_epoch_244_.params',
 '../scripts/results/lenet/chain_hierarchical_0_1_sgld_epoch_80_.params',
 '../scripts/results/lenet/chain_hierarchical_0_1_sgld_epoch_112_.params',
 '../scripts/results/lenet/chain_hierarchical_0_1_sgld_epoch_195_.params',
 '../scripts/results/lenet/chain_hierarchical_0_1_sgld_epoch_104_.params',
 '../scripts/results/lenet/chain_hierarchical_0_1_sgld_epoch_225_.params',
 '../scripts/results/lenet/chain_hierarchical_0_1_sgld_epoch_160_.params',
 '../scripts/results/lenet/chain_hierarchical_0_1_sgld_epoch_3_.params',
 '../scripts/results/lenet/chain_hierarchical_0_1_sgld_epoch_126_.params',
 '../scripts/results/lenet/ch

# BBB

In [None]:
from sklearn.metrics import classification_report
import mxnet.gluon.probability as mxp

def softplus(x):
    return nd.log(1. + nd.exp(x))

posterior=dict()
for var in par.keys():
    variational_posterior=mxp.normal.Normal(loc=means[var],
                                            scale=softplus(sigmas[var]))
    posterior.update({var:variational_posterior})
    
samples=[]
for _ in range(100):
    post_par=dict()
    for var in par.keys():
        post_par.update({var:posterior[var].sample().as_nd_ndarray()})
    y_pred=model.predict(post_par,nd.array(X_test,ctx=model_ctx))
    samples.append(y_pred.sample().asnumpy())
samples=np.asarray(samples)
y_hat=np.quantile(samples,.5,axis=0)

print(classification_report(np.int32(y_test),np.int32(y_hat)))