In [1]:
import sys
sys.path.append("../") 

In [2]:
from __future__ import division

import argparse, time, logging, random, math

import numpy as np
import mxnet as mx

from mxnet import gluon, nd
from mxnet import autograd as ag
from mxnet.gluon import nn
from mxnet.gluon.data.vision import transforms

In [3]:
transform = transforms.Compose([
    transforms.Resize([150,150]),
    transforms.ToTensor()
])

In [4]:
training_path='/media/sergio/Backup/data/PlantVillage-Dataset/data_distribution_for_SVM/train'
testing_path='/media/sergio/Backup/data/PlantVillage-Dataset/data_distribution_for_SVM/test'

In [5]:
num_gpus = 1
model_ctx = mx.gpu()

num_workers = 2
batch_size = 32 

train_data = mx.gluon.data.vision.datasets.ImageFolderDataset(training_path).transform_first(transform)
test_data = mx.gluon.data.vision.datasets.ImageFolderDataset(testing_path).transform_first(transform)

In [6]:
train_data_loader = mx.gluon.data.DataLoader(train_data, batch_size, shuffle=True, num_workers=num_workers)
valid_data_loader = mx.gluon.data.DataLoader(test_data, batch_size, num_workers=num_workers)

In [7]:
for X,y in train_data_loader:
    print(X.shape)
    print(y.shape)
    break

(32, 3, 150, 150)
(32,)


In [8]:
model_ctx

gpu(0)

### Bayesian inference for Plant Village

* [Stochastic Gradient Descent](#chapter1)
* [Stochastic Gradient Langevin Dynamics](#chapter2)
* [Bayes By Backprop](#chapter3)
* [Diagnostics](#chapter4)


# Stochastic Gradient Descent <a class="anchor" id="chapter1"></a>

In [9]:
import mxnet as mx
from mxnet import nd, autograd, gluon
hyper={'alpha':10.}
in_units=(3,150,150)
out_units=38

In [10]:
import mxnet as mx
from hamiltonian.inference.sgd import sgd
from hamiltonian.models.softmax import vgg_softmax

model=vgg_softmax(hyper,in_units,out_units,n_layers=16,ctx=model_ctx)
inference=sgd(model,model.par,step_size=0.1,ctx=model_ctx)

In [11]:
import hamiltonian
import importlib

try:
    importlib.reload(hamiltonian.models.vgg_softmax)
    importlib.reload(hamiltonian.inference.sgd)
    importlib.reload(hamiltonian.inference.base)
    print('modules re-loaded')
except:
    print('no modules loaded yet')


no modules loaded yet


In [12]:
train_sgd=True
num_epochs=100

if train_sgd:
    par,loss=inference.fit(epochs=num_epochs,batch_size=batch_size,data_loader=train_data_loader,verbose=True)

    fig=plt.figure(figsize=[5,5])
    plt.plot(loss,color='blue',lw=3)
    plt.xlabel('Epoch', size=18)
    plt.ylabel('Loss', size=18)
    plt.title('SGD VGG Plant Village', size=18)
    plt.xticks(size=14)
    plt.yticks(size=14)
    plt.savefig('sgd_vgg.pdf', bbox_inches='tight')
    model.net.save_parameters('../scripts/results/vgg/vgg_sgd_'+str(num_epochs)+'_epochs.params')
else:
    model.net.load_parameters('../scripts/results/vgg/vgg_sgd_'+str(num_epochs)+'_epochs.params',ctx=model_ctx)
    par=dict()
    for name,gluon_par in model.net.collect_params().items():
        par.update({name:gluon_par.data()})
    

  1%|          | 1/100 [00:53<1:27:36, 53.10s/it]

loss: 3.7592


  3%|▎         | 3/100 [08:32<4:36:15, 170.89s/it]


KeyboardInterrupt: 

In [16]:
model.net.collect_params()

{'0.weight': Parameter (shape=(6, 1, 5, 5), dtype=<class 'numpy.float32'>),
 '0.bias': Parameter (shape=(6,), dtype=<class 'numpy.float32'>),
 '2.weight': Parameter (shape=(16, 6, 5, 5), dtype=<class 'numpy.float32'>),
 '2.bias': Parameter (shape=(16,), dtype=<class 'numpy.float32'>),
 '4.weight': Parameter (shape=(120, 400), dtype=float32),
 '4.bias': Parameter (shape=(120,), dtype=float32),
 '5.weight': Parameter (shape=(84, 120), dtype=float32),
 '5.bias': Parameter (shape=(84,), dtype=float32),
 '6.weight': Parameter (shape=(10, 84), dtype=float32),
 '6.bias': Parameter (shape=(10,), dtype=float32)}

In [17]:
total_samples,total_labels,log_like=inference.predict(par,batch_size=batch_size,num_samples=100,data_loader=val_data)

In [18]:
y_hat=np.quantile(total_samples,.9,axis=0)

In [19]:
from sklearn.metrics import classification_report

print(classification_report(np.int32(total_labels),np.int32(y_hat)))

              precision    recall  f1-score   support

           0       1.00      0.99      0.99       979
           1       1.00      0.99      0.99      1133
           2       0.99      0.99      0.99      1030
           3       0.99      0.99      0.99      1008
           4       0.99      0.98      0.99       980
           5       0.99      0.99      0.99       890
           6       0.99      0.99      0.99       956
           7       0.98      0.98      0.98      1027
           8       0.99      0.98      0.99       973
           9       0.96      0.99      0.98      1008

    accuracy                           0.99      9984
   macro avg       0.99      0.99      0.99      9984
weighted avg       0.99      0.99      0.99      9984



# Stochastic Gradient Langevin Dynamics <a class="anchor" id="chapter2"></a>

In [28]:
from hamiltonian.inference.sgld import sgld
from hamiltonian.models.softmax import vgg_softmax

model=vgg_softmax(hyper,in_units,out_units,n_layers=16,ctx=model_ctx)
inference=sgld(model,model.par,step_size=0.1,ctx=model_ctx)

In [29]:
import hamiltonian
import importlib

try:
    importlib.reload(hamiltonian.models.softmax)
    importlib.reload(hamiltonian.inference.sgld)
    print('modules re-loaded')
except:
    print('no modules loaded yet')

modules re-loaded


In [30]:
import matplotlib.pyplot as plt
import seaborn as sns
import glob

train_sgld=False
num_epochs=250

if train_sgld:
    loss,posterior_samples=inference.sample(epochs=num_epochs,batch_size=batch_size,
                                data_loader=train_data,
                                verbose=True,chain_name='chain_nonhierarchical')

    plt.rcParams['figure.dpi'] = 360
    sns.set_style("whitegrid")
    fig=plt.figure(figsize=[5,5])
    plt.plot(loss[0],color='blue',lw=3)
    plt.plot(loss[1],color='red',lw=3)
    plt.xlabel('Epoch', size=18)
    plt.ylabel('Loss', size=18)
    plt.title('SGLD Lenet MNIST', size=18)
    plt.xticks(size=14)
    plt.yticks(size=14)
    plt.savefig('sgld_lenet.pdf', bbox_inches='tight')
else:
    chain1=glob.glob("../scripts/results/vgg/chain_nonhierarchical_0_1_sgld*")
    chain2=glob.glob("../scripts/results/vgg/chain_nonhierarchical_0_sgld*")
    posterior_samples=[chain1,chain2]

In [31]:
posterior_samples_flat=[item for sublist in posterior_samples for item in sublist]

In [32]:
total_samples,total_labels,log_like=inference.predict(posterior_samples_flat,5,data_loader=val_data)

In [33]:
from sklearn.metrics import classification_report
posterior_samples

y_hat=np.quantile(total_samples,.9,axis=0)

print(classification_report(np.int32(total_labels),np.int32(y_hat)))

              precision    recall  f1-score   support

           0       1.00      0.95      0.97       980
           1       1.00      0.96      0.98      1135
           2       0.98      0.90      0.94      1032
           3       0.96      0.93      0.95      1010
           4       0.98      0.92      0.95       982
           5       0.95      0.94      0.94       892
           6       0.94      0.99      0.96       958
           7       0.94      0.96      0.95      1028
           8       0.89      0.98      0.93       974
           9       0.89      1.00      0.94      1009

    accuracy                           0.95     10000
   macro avg       0.95      0.95      0.95     10000
weighted avg       0.95      0.95      0.95     10000



In [42]:
from sklearn.metrics import f1_score

score=[]
for q in np.arange(.1,.9,.1):
    y_hat=np.quantile(total_samples,q,axis=0)
    score.append(f1_score(np.int32(total_labels),np.int32(y_hat), average='macro'))
print('mean f-1 : {0}, std f-1 : {1}'.format(np.mean(score),2*np.std(score)))

mean f-1 : 0.9748208806697447, std f-1 : 0.024564142639161658


In [34]:
import arviz as az

posterior_samples_multiple_chains=inference.posterior_diagnostics(posterior_samples)
datasets=[az.convert_to_inference_data(sample) for sample in posterior_samples_multiple_chains]
dataset = az.concat(datasets, dim="chain")
mean_r_hat_values={var:float(az.rhat(dataset)[var].mean().data) for var in model.par}
mean_ess_values={var:float(az.ess(dataset)[var].mean().data) for var in model.par}
mean_mcse_values={var:float(az.mcse(dataset)[var].mean().data) for var in model.par}

In [35]:
az.summary(dataset)

Unnamed: 0,mean,sd,hdi_3%,hdi_97%,mcse_mean,mcse_sd,ess_bulk,ess_tail,r_hat
"0.weight[0,0,0,0]",-1.612,1.635,-3.627,1.164,0.985,0.787,3.0,246.0,1.77
"0.weight[0,0,0,1]",-0.462,0.614,-1.657,0.661,0.328,0.254,3.0,166.0,1.59
"0.weight[0,0,0,2]",0.110,1.375,-1.645,2.617,0.845,0.679,3.0,144.0,1.80
"0.weight[0,0,0,3]",-0.200,0.597,-1.099,0.912,0.230,0.171,10.0,120.0,1.23
"0.weight[0,0,0,4]",-0.734,0.995,-2.361,0.737,0.571,0.450,3.0,196.0,1.60
...,...,...,...,...,...,...,...,...,...
6.bias[5],-0.087,0.559,-0.975,0.985,0.083,0.059,30.0,404.0,1.05
6.bias[6],-0.083,0.653,-1.156,1.064,0.397,0.319,3.0,133.0,1.80
6.bias[7],0.034,0.569,-0.937,1.299,0.076,0.054,52.0,282.0,1.05
6.bias[8],0.088,0.814,-1.618,1.821,0.319,0.236,7.0,331.0,1.24


In [36]:
print(mean_r_hat_values)

{'0.weight': 1.4433263528009008, '0.bias': 1.648076802874187, '2.weight': 1.4908360672675494, '2.bias': 1.2081504842012865, '4.weight': 1.480469265487481, '4.bias': 1.4979961639966641, '5.weight': 1.4818768559638393, '5.bias': 1.4850130190063169, '6.weight': 1.4859056781765567, '6.bias': 1.2062475720278303}


In [37]:
print(mean_ess_values)

{'0.weight': 56.85196954744781, '0.bias': 3.7896113973529553, '2.weight': 57.84407245511651, '2.bias': 115.52266689594174, '4.weight': 61.9930606333923, '4.bias': 68.92474001737575, '5.weight': 60.49414603670665, '5.bias': 73.17804413230593, '6.weight': 59.562982006241995, '6.bias': 146.29305834568964}


In [38]:
print(mean_mcse_values)

{'0.weight': 0.5342363969670794, '0.bias': 0.47947446717104025, '2.weight': 0.6957483945243773, '2.bias': 0.24764692875686375, '4.weight': 0.7057690437286156, '4.bias': 0.6422108762806051, '5.weight': 0.7041383246810111, '5.bias': 0.594246269900714, '6.weight': 0.634655255318478, '6.bias': 0.18597643886440535}


In [39]:
from hamiltonian.psis import *

loo,loos,ks=psisloo(-log_like)

In [41]:
from sklearn.metrics import f1_score

score=[]
for q in np.arange(.1,.9,.1):
    y_hat=np.quantile(total_samples,q,axis=0)
    score.append(f1_score(np.int32(total_labels),np.int32(y_hat), sample_weight=loos,average='weighted'))
print('mean f-1 : {0}, std f-1 : {1}'.format(np.mean(score),2*np.std(score)))

mean f-1 : 0.579054715975662, std f-1 : 0.11209524379518138
