In [1]:
from sklearn import datasets


iris = datasets.load_iris()
X = iris.data
y = iris.target

In [2]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.2)

In [3]:
import sys
sys.path.append("../") 

# Stochastic Gradient Descent

In [4]:
import mxnet as mx
import numpy as np
from mxnet import ndarray as nd
from hamiltonian.inference.sgd import sgd
from hamiltonian.models.softmax import softmax

model_ctx=mx.cpu()
hyper={'alpha':1}
in_units=X.shape[1]
out_units=len(np.unique(y))
model=softmax(hyper,in_units,out_units,ctx=model_ctx)
inference=sgd(model,model.par,step_size=0.01,ctx=model_ctx)

In [5]:
par,loss=inference.fit(epochs=1000,batch_size=60,gamma=0.9,
                       X_train=nd.array(X_train,ctx=model_ctx),y_train=nd.array(y_train,ctx=model_ctx),verbose=False)

100%|██████████| 1000/1000 [00:11<00:00, 89.79it/s]


In [6]:
import hamiltonian
import importlib

try:
    importlib.reload(hamiltonian.models.softmax)
    importlib.reload(hamiltonian.inference.sgd)
    print('modules re-loaded')
except:
    print('no modules loaded yet')

modules re-loaded


In [7]:
model.net.collect_params()

{'0.weight': Parameter (shape=(3, 4), dtype=float32),
 '0.bias': Parameter (shape=(3,), dtype=float32)}

In [8]:
y_pred=model.predict(model.par,nd.array(X_test,ctx=model_ctx))

In [9]:
from sklearn.metrics import classification_report

print(classification_report(np.int32(y_test),np.int32(y_pred.sample().asnumpy())))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        12
           1       0.78      0.78      0.78         9
           2       0.78      0.78      0.78         9

    accuracy                           0.87        30
   macro avg       0.85      0.85      0.85        30
weighted avg       0.87      0.87      0.87        30



# Hamiltonian Monte Carlo

In [235]:
model_ctx=mx.cpu()
hyper={'alpha':25.}
in_units=X.shape[1]
out_units=len(np.unique(y))

In [236]:
from hamiltonian.inference.hmc import hmc
from hamiltonian.models.softmax import softmax

model=softmax(hyper,in_units,out_units,ctx=model_ctx)
inference=hmc(model,model.par,step_size=0.1,ctx=model_ctx)

In [237]:
import hamiltonian
import importlib

try:
    importlib.reload(hamiltonian.models.softmax)
    importlib.reload(hamiltonian.inference.hmc)
    print('modules re-loaded')
except:
    print('no modules loaded yet')

modules re-loaded


In [238]:
samples=inference.sample(epochs=3000,burn_in=1000,path_length=1.0,chains=4,X_train=nd.array(X_train,ctx=model_ctx),y_train=nd.array(y_train,ctx=model_ctx))

100%|██████████| 4000/4000 [05:16<00:00, 12.65it/s]
100%|██████████| 4000/4000 [05:14<00:00, 12.71it/s]
100%|██████████| 4000/4000 [05:22<00:00, 12.39it/s]
100%|██████████| 4000/4000 [05:17<00:00, 12.58it/s]


In [239]:
len(samples)

4

In [240]:
import arviz as az

datasets=[az.convert_to_inference_data(sample) for sample in samples]

In [241]:
dataset = az.concat(datasets, dim="chain")

In [242]:
az.summary(dataset)

Unnamed: 0,mean,sd,hdi_3%,hdi_97%,mcse_mean,mcse_sd,ess_bulk,ess_tail,r_hat
"0.weight[0,0]",-20.412,9.947,-34.531,-5.143,4.652,3.585,5.0,7.0,2.49
"0.weight[0,1]",-2.11,11.039,-17.662,18.704,5.407,4.128,5.0,13.0,2.69
"0.weight[0,2]",-15.377,7.504,-23.975,1.075,3.554,2.699,5.0,14.0,3.02
"0.weight[0,3]",13.16,18.892,-14.572,37.294,9.346,7.148,5.0,5.0,3.09
"0.weight[1,0]",11.575,6.443,3.8,24.498,3.128,2.385,5.0,11.0,3.12
"0.weight[1,1]",15.294,4.542,7.562,20.344,2.176,1.655,6.0,5.0,1.75
"0.weight[1,2]",-14.648,10.349,-29.156,0.175,5.029,3.835,5.0,11.0,2.7
"0.weight[1,3]",-15.303,11.559,-31.527,2.385,5.533,4.209,5.0,20.0,1.99
"0.weight[2,0]",-7.826,14.872,-35.445,6.964,7.284,5.561,5.0,5.0,2.84
"0.weight[2,1]",-0.7,7.884,-12.365,16.492,3.733,2.835,5.0,7.0,2.61


# PyMC3

In [19]:
import pymc3 as pm
import theano.tensor as tt

In [22]:
with pm.Model() as model:
    alpha = pm.Normal('alpha', mu=0, sd=5, shape=3)
    beta = pm.Normal('beta', mu=0, sd=5, shape=(4,3))
    mu = pm.Deterministic('mu', alpha + pm.math.dot(X_train, beta))
    theta = tt.nnet.softmax(mu)
    y_hat = pm.Categorical('y_hat', p=theta, observed=y_train)
    trace_s = pm.sample(2000)

Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [beta, alpha]


Sampling 4 chains for 1_000 tune and 2_000 draw iterations (4_000 + 8_000 draws total) took 81 seconds.
There were 266 divergences after tuning. Increase `target_accept` or reparameterize.
There were 272 divergences after tuning. Increase `target_accept` or reparameterize.
There were 317 divergences after tuning. Increase `target_accept` or reparameterize.
There were 427 divergences after tuning. Increase `target_accept` or reparameterize.
The number of effective samples is smaller than 10% for some parameters.


In [24]:
az.summary(trace_s,var_names=["alpha", "beta"])

Unnamed: 0,mean,sd,hdi_3%,hdi_97%,mcse_mean,mcse_sd,ess_bulk,ess_tail,r_hat
alpha[0],1.109,4.913,-7.762,10.432,0.226,0.16,498.0,751.0,1.01
alpha[1],4.529,4.465,-2.828,13.372,0.328,0.232,208.0,585.0,1.02
alpha[2],-5.535,4.041,-13.071,2.113,0.11,0.078,1300.0,1394.0,1.02
"beta[0,0]",1.918,3.54,-4.65,8.76,0.14,0.099,644.0,1237.0,1.02
"beta[0,1]",0.876,3.121,-4.801,6.874,0.114,0.081,755.0,1176.0,1.01
"beta[0,2]",-2.448,3.198,-8.691,3.249,0.141,0.1,513.0,1551.0,1.01
"beta[1,0]",4.83,4.026,-1.753,12.752,0.225,0.159,316.0,1765.0,1.01
"beta[1,1]",-0.782,3.304,-7.066,5.15,0.174,0.123,381.0,1067.0,1.02
"beta[1,2]",-4.24,3.32,-10.575,2.185,0.112,0.079,885.0,1181.0,1.01
"beta[2,0]",-6.674,3.582,-13.903,-0.192,0.119,0.09,895.0,987.0,1.01
