In [1]:
from sklearn import datasets


iris = datasets.load_iris()
X = iris.data
y = iris.target

In [2]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.2)

In [3]:
import sys
sys.path.append("../") 

# Stochastic Gradient Descent

In [4]:
import mxnet as mx
import numpy as np
from mxnet import ndarray as nd
from hamiltonian.inference.sgd import sgd
from hamiltonian.models.softmax import softmax

model_ctx=mx.cpu()
hyper={'alpha':1}
in_units=[X.shape[1],1]
out_units=len(np.unique(y))


In [5]:
model=softmax(hyper,in_units,out_units,ctx=model_ctx)
inference=sgd(model,model.par,step_size=0.01,ctx=model_ctx)

In [6]:
par,loss=inference.fit(epochs=1000,batch_size=60,gamma=0.9,
                       X_train=nd.array(X_train,ctx=model_ctx),y_train=nd.array(y_train,ctx=model_ctx),verbose=False)

100%|██████████| 1000/1000 [00:08<00:00, 112.47it/s]


In [7]:
import hamiltonian
import importlib

try:
    importlib.reload(hamiltonian.models.softmax)
    importlib.reload(hamiltonian.inference.sgd)
    print('modules re-loaded')
except:
    print('no modules loaded yet')

modules re-loaded


In [8]:
model.net.collect_params()

{'1.weight': Parameter (shape=(3, 4), dtype=float32),
 '1.bias': Parameter (shape=(3,), dtype=float32)}

In [9]:
y_pred=model.predict(model.par,nd.array(X_test,ctx=model_ctx))

In [11]:
from sklearn.metrics import classification_report

print(classification_report(np.int32(y_test),np.int32(y_pred.sample().asnumpy())))

              precision    recall  f1-score   support

           0       0.92      1.00      0.96        11
           1       0.57      0.80      0.67         5
           2       1.00      0.79      0.88        14

    accuracy                           0.87        30
   macro avg       0.83      0.86      0.83        30
weighted avg       0.90      0.87      0.87        30



# Hamiltonian Monte Carlo

In [29]:
model_ctx=mx.cpu()
hyper={'alpha':25.}
in_units=[X.shape[1],1]
out_units=len(np.unique(y))

In [13]:
from hamiltonian.inference.hmc import hmc
from hamiltonian.models.softmax import softmax

model=softmax(hyper,in_units,out_units,ctx=model_ctx)
inference=hmc(model,model.par,step_size=0.1,ctx=model_ctx)

In [14]:
import hamiltonian
import importlib

try:
    importlib.reload(hamiltonian.models.softmax)
    importlib.reload(hamiltonian.inference.hmc)
    print('modules re-loaded')
except:
    print('no modules loaded yet')

modules re-loaded


In [15]:
samples=inference.sample(epochs=3000,burn_in=1000,path_length=10.0,chains=4,X_train=nd.array(X_train,ctx=model_ctx),y_train=nd.array(y_train,ctx=model_ctx))

100%|██████████| 4000/4000 [05:56<00:00, 11.21it/s]
100%|██████████| 4000/4000 [05:49<00:00, 11.45it/s]
100%|██████████| 4000/4000 [05:44<00:00, 11.60it/s]
100%|██████████| 4000/4000 [05:45<00:00, 11.57it/s]


In [140]:
len(samples)

4

In [145]:
samples[0]['1.weight'].shape

(1, 2999, 3, 4)

In [17]:
import arviz as az

datasets=[az.convert_to_inference_data(sample) for sample in samples]

In [18]:
dataset = az.concat(datasets, dim="chain")

In [19]:
az.summary(dataset)

Unnamed: 0,mean,sd,hdi_3%,hdi_97%,mcse_mean,mcse_sd,ess_bulk,ess_tail,r_hat
"1.weight[0,0]",-87.499,74.573,-201.976,-10.587,36.768,28.104,4.0,4.0,3.57
"1.weight[0,1]",-1.314,64.932,-100.949,84.888,31.938,24.402,4.0,5.0,3.87
"1.weight[0,2]",-5.574,51.361,-70.875,74.838,25.036,19.099,5.0,36.0,3.26
"1.weight[0,3]",-54.049,101.399,-138.983,144.311,50.234,38.429,4.0,5.0,3.18
"1.weight[1,0]",29.27,71.39,-84.487,93.299,34.901,26.638,5.0,4.0,3.93
"1.weight[1,1]",40.095,30.206,8.669,80.464,14.482,11.018,5.0,4.0,2.95
"1.weight[1,2]",-14.001,15.572,-36.892,17.055,5.927,4.382,6.0,17.0,1.76
"1.weight[1,3]",-23.361,88.306,-110.986,104.973,43.588,33.323,5.0,5.0,2.89
"1.weight[2,0]",-22.744,81.296,-116.644,65.272,40.439,30.958,5.0,7.0,2.55
"1.weight[2,1]",-15.473,112.401,-189.414,125.882,55.908,42.799,4.0,5.0,5.46


# PyMC3

In [20]:
import pymc3 as pm
import theano.tensor as tt

In [21]:
with pm.Model() as model:
    alpha = pm.Normal('alpha', mu=0, sd=5, shape=3)
    beta = pm.Normal('beta', mu=0, sd=5, shape=(4,3))
    mu = pm.Deterministic('mu', alpha + pm.math.dot(X_train, beta))
    theta = tt.nnet.softmax(mu)
    y_hat = pm.Categorical('y_hat', p=theta, observed=y_train)
    trace_s = pm.sample(2000)

Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [beta, alpha]


Sampling 4 chains for 1_000 tune and 2_000 draw iterations (4_000 + 8_000 draws total) took 118 seconds.
There were 602 divergences after tuning. Increase `target_accept` or reparameterize.
The acceptance probability does not match the target. It is 0.6508177192963824, but should be close to 0.8. Try to increase the number of tuning steps.
There were 47 divergences after tuning. Increase `target_accept` or reparameterize.
There were 2 divergences after tuning. Increase `target_accept` or reparameterize.
There were 9 divergences after tuning. Increase `target_accept` or reparameterize.
The rhat statistic is larger than 1.05 for some parameters. This indicates slight problems during sampling.
The estimated number of effective samples is smaller than 200 for some parameters.


In [22]:
az.summary(trace_s,var_names=["alpha", "beta"])

Unnamed: 0,mean,sd,hdi_3%,hdi_97%,mcse_mean,mcse_sd,ess_bulk,ess_tail,r_hat
alpha[0],1.484,4.946,-7.465,11.088,0.305,0.216,271.0,558.0,1.02
alpha[1],4.74,4.069,-2.664,12.532,0.098,0.074,1735.0,3535.0,1.01
alpha[2],-5.735,4.039,-13.106,1.89,0.106,0.075,1483.0,2556.0,1.04
"beta[0,0]",2.081,3.814,-5.077,9.388,0.231,0.189,284.0,201.0,1.02
"beta[0,1]",0.737,3.065,-5.31,6.164,0.205,0.145,242.0,1375.0,1.03
"beta[0,2]",-2.494,3.13,-8.463,3.391,0.15,0.106,429.0,810.0,1.02
"beta[1,0]",4.587,4.17,-3.104,11.967,0.471,0.334,83.0,285.0,1.05
"beta[1,1]",-0.686,3.51,-7.02,5.947,0.37,0.263,100.0,213.0,1.04
"beta[1,2]",-4.631,3.568,-10.536,2.429,0.348,0.246,109.0,764.0,1.03
"beta[2,0]",-6.797,3.746,-13.68,0.135,0.178,0.126,434.0,1978.0,1.02


In [28]:
data_loo=az.loo(trace_s,pointwise=True)

In [29]:
data_loo.loo

-10.455271778875396

In [34]:
loo=data_loo.loo_i.data.flatten()

In [40]:
k_hat=data_loo.pareto_k.data.flatten()

In [110]:
from sklearn.metrics import f1_score

score=[]
for i in range(trace_s['mu'].shape[0]):
    y_pred=trace_s['mu'][i,:,:].argmax(axis=1)
    score.append(f1_score(np.int32(y_train),np.int32(y_pred), average='macro'))
print('mean f-1 : {0}, std f-1 : {1}'.format(np.mean(score),2*np.std(score)))

mean f-1 : 0.9725425069936648, std f-1 : 0.02303430770368892


In [100]:
total_samples = pm.sample_posterior_predictive(trace_s,model=model)

In [111]:
score=[]
for q in np.arange(.1,.9,.1):
    y_hat=np.quantile(total_samples['y_hat'],q,axis=0)
    score.append(f1_score(np.int32(y_train),np.int32(y_hat),average='macro'))
print('mean f-1 : {0}, std f-1 : {1}'.format(np.mean(score),2*np.std(score)))

mean f-1 : 0.9619404455625193, std f-1 : 0.04712176330932348


In [139]:
score=[]
for q in np.arange(.1,.9,.1):
    y_hat=np.quantile(total_samples['y_hat'],q,axis=0)
    score.append(f1_score(np.int32(y_train),np.int32(y_hat),sample_weight=1.-np.clip(k_hat,0,1),average='weighted'))
print('mean f-1 : {0}, std f-1 : {1}'.format(np.mean(score),2*np.std(score)))

mean f-1 : 0.9477073784348193, std f-1 : 0.07247340108374999
