In [5]:
!pip install pyro-ppl==1.3.0

Collecting pyro-ppl==1.3.0
  Downloading pyro_ppl-1.3.0-py3-none-any.whl (495 kB)
[K     |████████████████████████████████| 495 kB 658 kB/s eta 0:00:01
Installing collected packages: pyro-ppl
  Attempting uninstall: pyro-ppl
    Found existing installation: pyro-ppl 1.3.1
    Uninstalling pyro-ppl-1.3.1:
      Successfully uninstalled pyro-ppl-1.3.1
Successfully installed pyro-ppl-1.3.0


### Contrasting Pyro HMC Vs. statsmodel results
**For a regression model trained on country topographical rugged index and GDP data.**

#### A. Pyro model training & HMC sampling

In [1]:
import os

import torch
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from torch.distributions import constraints

import pyro
import pyro.distributions as dist
import pyro.optim as optim

DATA_URL = "https://d2hg8soec8ck9v.cloudfront.net/datasets/rugged_data.csv"
rugged_data = pd.read_csv(DATA_URL, encoding="ISO-8859-1")

* **Define pyro model and guide:**
    * Priors are deined in model function for intercept `b`, feature 1 `b_a`, feature 2 `b_r`, feature 3 `b_ar`
    * mean is the expresion `f(feat1, feat2, feat3, b)`

In [2]:
def model(is_cont_africa, ruggedness, log_gdp):
    b = pyro.sample("bias", dist.Normal(0., 10.))
    b_a = pyro.sample("feat_1", dist.Normal(0., 1.))
    b_r = pyro.sample("feat_2", dist.Normal(0., 1.))
    b_ar = pyro.sample("feat_3", dist.Normal(0., 1.))
    sigma = pyro.sample("sigma", dist.Uniform(0., 10.))
    mean = b + b_a * is_cont_africa + b_r * ruggedness + b_ar * is_cont_africa * ruggedness
    with pyro.plate("data", len(ruggedness)):
        pyro.sample("obs", dist.Normal(mean, sigma), obs=log_gdp)

In [None]:
'''
def guide(is_cont_africa, ruggedness, log_gdp):
    b_loc = pyro.param('bias_loc', torch.tensor(0.))
    b_scale = pyro.param('bias_scale', torch.tensor(1.),
                         constraint=constraints.positive)
    sigma_loc = pyro.param('sigma_loc', torch.tensor(1.),
                             constraint=constraints.positive)
    weights_loc = pyro.param('weights_loc', torch.randn(3))
    weights_scale = pyro.param('weights_scale', torch.ones(3),
                               constraint=constraints.positive)
    b = pyro.sample("bias", dist.Normal(b_loc, b_scale))
    b_a = pyro.sample("feat_1", dist.Normal(weights_loc[0], weights_scale[0]))
    b_r = pyro.sample("feat_2", dist.Normal(weights_loc[1], weights_scale[1]))
    b_ar = pyro.sample("feat_3", dist.Normal(weights_loc[2], weights_scale[2]))
    sigma = pyro.sample("sigma", dist.Normal(sigma_loc, torch.tensor(0.05)))
    mean = a + b_a * is_cont_africa + b_r * ruggedness + b_ar * is_cont_africa * ruggedness
'''

* **Loading data**

In [3]:
# Prepare training data
df = rugged_data[["cont_africa", "rugged", "rgdppc_2000"]]
df = df[np.isfinite(df.rgdppc_2000)]
df["rgdppc_2000"] = np.log(df["rgdppc_2000"])
train = torch.tensor(df.values, dtype=torch.float)

is_cont_africa, ruggedness, log_gdp = train[:, 0], train[:, 1], train[:, 2]

* **HMC Sampling**

In [4]:
from pyro.infer import MCMC, NUTS

nuts_kernel = NUTS(model)

mcmc = MCMC(nuts_kernel, num_samples=1000, warmup_steps=200)
mcmc.run(is_cont_africa, ruggedness, log_gdp)

hmc_samples = {k: v.detach().cpu().numpy() for k, v in mcmc.get_samples().items()}


Sample: 100%|██████████| 1200/1200 [00:53, 22.48it/s, step size=4.02e-01, acc. prob=0.941]


* **Summary of hmc samples**

In [5]:
# Utility function to print latent sites' quantile information.
def summary(samples):
    site_stats = {}
    for site_name, values in samples.items():
        marginal_site = pd.DataFrame(values)
        describe = marginal_site.describe(percentiles=[.05, 0.25, 0.5, 0.75, 0.95]).transpose()
        site_stats[site_name] = describe[["mean", "std", "5%", "25%", "50%", "75%", "95%"]]
    return site_stats

hmc_df= pd.DataFrame()
for key in summary(hmc_samples).keys():
    temp = summary(hmc_samples)[key]
    temp.index= [key]
    hmc_df= pd.concat([hmc_df, temp])

hmc_df

Unnamed: 0,mean,std,5%,25%,50%,75%,95%
bias,9.177086,0.132103,8.96432,9.087749,9.177876,9.266782,9.405351
feat_1,-1.833916,0.21121,-2.177543,-1.981985,-1.827741,-1.684364,-1.48171
feat_2,-0.180611,0.072021,-0.297735,-0.22914,-0.182988,-0.133065,-0.056745
feat_3,0.338777,0.126823,0.132223,0.249588,0.339038,0.425461,0.543319
sigma,0.950159,0.050119,0.869279,0.916199,0.948648,0.984792,1.031622


#### B. Statsmodel training

**Defining feature_3 for statsmodel**
* For statsmodel training, there are 3 features (`cont_africa`,`rugged`,`cont_africa_x_rugged`), 1 bias and 1 target label `rgdppc_2000`(GDP_per_capita)

In [6]:
df["cont_africa_x_rugged"] = df["cont_africa"] * df["rugged"]
df.head(3)

Unnamed: 0,cont_africa,rugged,rgdppc_2000,cont_africa_x_rugged
2,1,0.858,7.492609,0.858
4,0,3.427,8.216929,0.0
7,0,0.769,9.933263,0.0


In [7]:
import statsmodels.api as sm
feats, targets= df[['cont_africa', 'rugged', 'cont_africa_x_rugged']].values, df['rgdppc_2000'].values
feats= sm.add_constant(feats, prepend=False)

In [8]:
model_stm = sm.OLS(targets, feats, hasconst=True)
result= model_stm.fit(disp=0)
print(result.summary())

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.357
Model:                            OLS   Adj. R-squared:                  0.345
Method:                 Least Squares   F-statistic:                     30.71
Date:                Wed, 22 Apr 2020   Prob (F-statistic):           7.60e-16
Time:                        11:39:01   Log-Likelihood:                -229.37
No. Observations:                 170   AIC:                             466.7
Df Residuals:                     166   BIC:                             479.3
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
x1            -1.9480      0.227     -8.572      0.0

* **Summary of statsmodel result**

In [9]:
stats_summary= result.summary()
statsm_df = pd.DataFrame(stats_summary.tables[1].data[1:], columns=stats_summary.tables[1].data[0])
statsm_df

Unnamed: 0,Unnamed: 1,coef,std err,t,P>|t|,[0.025,0.975]
0,x1,-1.948,0.227,-8.572,0.0,-2.397,-1.499
1,x2,-0.2029,0.077,-2.621,0.01,-0.356,-0.05
2,x3,0.3934,0.132,2.989,0.003,0.134,0.653
3,const,9.2232,0.14,66.044,0.0,8.948,9.499


#### Contrasting resulst from HMC sampling and Statsmodel training

In [10]:
hmc_statsmodel_keys= {'bias':3, 'feat_1':0,'feat_2':1,'feat_3':2}#{0:X1,1:X2,2:X3, 3:const}

stats_hmc_di= dict()
for key, val in hmc_statsmodel_keys.items():
    print("For feature {}:".format(key))#site
    print('. . .val from HMC obtained samples: {}',hmc_df['mean'][key])
    print('. . .val from statsmodel training: {}',statsm_df['coef'][val], "\n")
    stats_hmc_di['param_val_{}'.format(key)]= [hmc_df['mean'][key], statsm_df['coef'][val]]
    stats_hmc_di['std_error_{}'.format(key)]= [hmc_df['std'][key], statsm_df['std err'][val]]

For feature bias:
. . .val from HMC obtained samples: {} 9.177085876464844
. . .val from statsmodel training: {}     9.2232 

For feature feat_1:
. . .val from HMC obtained samples: {} -1.8339163064956665
. . .val from statsmodel training: {}    -1.9480 

For feature feat_2:
. . .val from HMC obtained samples: {} -0.1806105077266693
. . .val from statsmodel training: {}    -0.2029 

For feature feat_3:
. . .val from HMC obtained samples: {} 0.3387771546840668
. . .val from statsmodel training: {}     0.3934 



In [11]:
pd.DataFrame(stats_hmc_di, index= ['hmc_samples','statsmodel_results'])

Unnamed: 0,param_val_bias,std_error_bias,param_val_feat_1,std_error_feat_1,param_val_feat_2,std_error_feat_2,param_val_feat_3,std_error_feat_3
hmc_samples,9.17709,0.132103,-1.83392,0.21121,-0.180611,0.072021,0.338777,0.126823
statsmodel_results,9.2232,0.14,-1.948,0.227,-0.2029,0.077,0.3934,0.132


#### HMC sampling on explicit pytorch model

In [12]:
from pyro.nn import PyroSample
from torch import nn
from pyro.nn import PyroModule

class regression_model(PyroModule):
    def __init__(self, in_features, out_features):
        super().__init__()
        self.linear = PyroModule[nn.Linear](in_features, out_features)
        self.linear.weight = PyroSample(dist.Normal(0., 1.).expand([out_features, in_features]).to_event(2))
        self.linear.bias = PyroSample(dist.Normal(0., 10.).expand([out_features]).to_event(1))

    def forward(self, x, y=None):
        sigma = pyro.sample("sigma", dist.Uniform(0., 10.))
        mean = self.linear(x).squeeze(-1)
        with pyro.plate("data", x.shape[0]):
            obs = pyro.sample("obs", dist.Normal(mean, sigma), obs=y)
        return mean

model2 = regression_model(3, 1)

In [13]:
data = torch.tensor(df[["cont_africa", "rugged", "cont_africa_x_rugged", "rgdppc_2000"]].values,
                        dtype=torch.float)
x_data, y_data = data[:, :-1], data[:, -1]

In [14]:
nuts_kernel = NUTS(model2)

mcmc = MCMC(nuts_kernel, num_samples=1000, warmup_steps=200)
mcmc.run(x_data, y_data)#is_cont_africa, ruggedness, log_gdp)
hmc_samples2 = {k: v.detach().cpu().numpy() for k, v in mcmc.get_samples().items()}

Sample: 100%|██████████| 1200/1200 [00:39, 30.61it/s, step size=4.29e-01, acc. prob=0.910]


In [15]:
hmc_samples_di = {'feat_1':hmc_samples2['linear.weight'].reshape(-1,3)[:,0],'feat_2':hmc_samples2['linear.weight'].reshape(-1,3)[:,1], 'feat_3':hmc_samples2['linear.weight'].reshape(-1,3)[:,2], 'bias':hmc_samples2['linear.bias'].reshape(-1)}#hmc_samples2['linear.weight'].reshape(-1,3)
hmc_df2= pd.DataFrame()
for key in summary(hmc_samples_di).keys():
    temp = summary(hmc_samples_di)[key]
    temp.index= [key]
    hmc_df2= pd.concat([hmc_df2, temp])

hmc_df2

Unnamed: 0,mean,std,5%,25%,50%,75%,95%
feat_1,-1.851923,0.22641,-2.215584,-2.011727,-1.856236,-1.703061,-1.452867
feat_2,-0.187826,0.078391,-0.318569,-0.239004,-0.183455,-0.136097,-0.065415
feat_3,0.353691,0.129144,0.141936,0.266109,0.35882,0.438002,0.564123
bias,9.189334,0.136106,8.966088,9.09313,9.189205,9.282696,9.408723


In [16]:
hmc_statsmodel_keys= {'bias':3, 'feat_1':0,'feat_2':1,'feat_3':2}#{0:X1,1:X2,2:X3, 3:const}

stats_hmc_di_2= dict()
for key, val in hmc_statsmodel_keys.items():
    print("For feature {}:".format(key))#site
    print('. . .val from HMC obtained samples: {}',hmc_df2['mean'][key])
    print('. . .val from statsmodel training: {}',statsm_df['coef'][val], "\n")
    stats_hmc_di_2['param_val_{}'.format(key)]= [hmc_df2['mean'][key], statsm_df['coef'][val]]
    stats_hmc_di_2['std_error_{}'.format(key)]= [hmc_df2['std'][key], statsm_df['std err'][val]]

For feature bias:
. . .val from HMC obtained samples: {} 9.18933391571045
. . .val from statsmodel training: {}     9.2232 

For feature feat_1:
. . .val from HMC obtained samples: {} -1.8519231081008911
. . .val from statsmodel training: {}    -1.9480 

For feature feat_2:
. . .val from HMC obtained samples: {} -0.18782629072666168
. . .val from statsmodel training: {}    -0.2029 

For feature feat_3:
. . .val from HMC obtained samples: {} 0.35369056463241577
. . .val from statsmodel training: {}     0.3934 



In [17]:
pd.DataFrame(stats_hmc_di_2, index= ['hmc_samples','statsmodel_results'])

Unnamed: 0,param_val_bias,std_error_bias,param_val_feat_1,std_error_feat_1,param_val_feat_2,std_error_feat_2,param_val_feat_3,std_error_feat_3
hmc_samples,9.18933,0.136106,-1.85192,0.22641,-0.187826,0.078391,0.353691,0.129144
statsmodel_results,9.2232,0.14,-1.948,0.227,-0.2029,0.077,0.3934,0.132


#### Training Pytorch model with SGD & setting priors from converged values

In [75]:
import torch.nn as nn
#from torch.autograd import Variable
import torch.optim as optim

class toynet(nn.Module):
    def __init__(self, in_features, out_features):
        super(toynet, self).__init__()
        self.fc1 = nn.Linear(in_features, out_features)
        self.sig = nn.Sigmoid()
    def forward(self,  x, y=None):
        x= self.fc1(x)
        return x
model3 = toynet(3, 1)
model3

toynet(
  (fc1): Linear(in_features=3, out_features=1, bias=True)
  (sig): Sigmoid()
)

In [43]:
optimizer3 = optim.Adam(model3.parameters())
loss3 = nn.MSELoss()

In [45]:
import torch
from torch.utils import data as d
import time

class Dataset(d.Dataset):
    def __init__(self, list_IDs, features, labels):
        'Initialization'
        self.labels = labels
        self.features= features
        self.list_IDs = list_IDs
    def __len__(self):
        'Denotes the total number of samples'
        return len(self.list_IDs)

    def __getitem__(self, index):
        'Generates one sample of data'
        # Select sample
        ID = self.list_IDs[index]

        # Load data and get label
        X = self.features[ID]#torch.load('data/' + ID + '.pt')
        y = self.labels[ID]

        return X, y
use_cuda = torch.cuda.is_available()
device = torch.device("cuda:0" if use_cuda else "cpu")
# Parameters
params = {'batch_size': 64,
          'shuffle': True,
          'num_workers': 6}

partition = {'train':list(range(y_data.shape[0]))}# IDs
features= {idx:record for idx, record in enumerate(x_data)}
labels = {idx:record for idx, record in enumerate(y_data)}# Labels
training_set = Dataset(partition['train'],features, labels)
training_generator = d.DataLoader(training_set, **params)

In [46]:
import time
max_epochs = 400
t1=time.time()
for epoch in range(max_epochs):
    # Training
    for local_batch, local_labels in training_generator:
        # Transfer to GPU
        local_batch, local_labels = local_batch.to(device), local_labels.to(device)
        optimizer3.zero_grad()
        y_pred= model3(local_batch)    
        l = loss3(y_pred, local_labels)
        l.backward()
        optimizer3.step()
    if epoch%50==0:
            print('epoch: ', epoch, ' loss: ', l.item());
print('total exe. time:', time.time()-t1)

epoch:  0  loss:  63.119693756103516


  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)


epoch:  50  loss:  62.37919998168945
epoch:  100  loss:  64.35924530029297
epoch:  150  loss:  57.75749969482422
epoch:  200  loss:  62.62089157104492
epoch:  250  loss:  61.59002685546875
epoch:  300  loss:  60.481849670410156
epoch:  350  loss:  60.46846389770508
total exe. time: 17.99188256263733


In [76]:
list(model3.parameters())[0][0]

[Parameter containing:
 tensor([[ 0.5054, -0.2933, -0.5212]], requires_grad=True),
 Parameter containing:
 tensor([0.5618], requires_grad=True)]

In [77]:
class regression_model_2(PyroModule):
    def __init__(self, in_features, out_features):
        super().__init__()
        self.linear = PyroModule[nn.Linear](in_features, out_features)
        self.linear.weight = PyroSample(dist.Normal(list(model3.parameters())[0][0], 1.).expand([out_features, in_features]).to_event(2))
        self.linear.bias = PyroSample(dist.Normal(list(model3.parameters())[1][0], 10.).expand([out_features]).to_event(1))

    def forward(self, x, y=None):
        sigma = pyro.sample("sigma", dist.Uniform(0., 10.))
        mean = self.linear(x).squeeze(-1)
        with pyro.plate("data", x.shape[0]):
            obs = pyro.sample("obs", dist.Normal(mean, sigma), obs=y)
        return mean
model4= regression_model_2(3, 1)

In [78]:
nuts_kernel = NUTS(model4)

mcmc = MCMC(nuts_kernel, num_samples=1000, warmup_steps=200)
mcmc.run(x_data, y_data)#is_cont_africa, ruggedness, log_gdp)
hmc_samples4 = {k: v.detach().cpu().numpy() for k, v in mcmc.get_samples().items()}

Sample: 100%|██████████| 1200/1200 [00:25, 47.05it/s, step size=3.84e-01, acc. prob=0.921]


In [79]:
hmc_samples_di = {'feat_1':hmc_samples4['linear.weight'].reshape(-1,3)[:,0],'feat_2':hmc_samples4['linear.weight'].reshape(-1,3)[:,1], 'feat_3':hmc_samples4['linear.weight'].reshape(-1,3)[:,2], 'bias':hmc_samples4['linear.bias'].reshape(-1)}#hmc_samples2['linear.weight'].reshape(-1,3)
hmc_df4= pd.DataFrame()
for key in summary(hmc_samples_di).keys():
    temp = summary(hmc_samples_di)[key]
    temp.index= [key]
    hmc_df4= pd.concat([hmc_df4, temp])

hmc_df4

Unnamed: 0,mean,std,5%,25%,50%,75%,95%
feat_1,-1.810519,0.217312,-2.16624,-1.962683,-1.803437,-1.660652,-1.44686
feat_2,-0.182261,0.072628,-0.304403,-0.228927,-0.183999,-0.129148,-0.063949
feat_3,0.33103,0.128846,0.124735,0.24323,0.333,0.41823,0.540064
bias,9.174149,0.133759,8.964955,9.080325,9.169611,9.258896,9.398646


In [80]:
hmc_statsmodel_keys= {'bias':3, 'feat_1':0,'feat_2':1,'feat_3':2}#{0:X1,1:X2,2:X3, 3:const}

stats_hmc_di_4= dict()
for key, val in hmc_statsmodel_keys.items():
    print("For feature {}:".format(key))#site
    print('. . .val from HMC obtained samples: {}',hmc_df4['mean'][key])
    print('. . .val from statsmodel training: {}',statsm_df['coef'][val], "\n")
    stats_hmc_di_4['param_val_{}'.format(key)]= [hmc_df4['mean'][key], statsm_df['coef'][val]]
    stats_hmc_di_4['std_error_{}'.format(key)]= [hmc_df4['std'][key], statsm_df['std err'][val]]

For feature bias:
. . .val from HMC obtained samples: {} 9.174148559570312
. . .val from statsmodel training: {}     9.2232 

For feature feat_1:
. . .val from HMC obtained samples: {} -1.810518503189087
. . .val from statsmodel training: {}    -1.9480 

For feature feat_2:
. . .val from HMC obtained samples: {} -0.18226078152656555
. . .val from statsmodel training: {}    -0.2029 

For feature feat_3:
. . .val from HMC obtained samples: {} 0.33103030920028687
. . .val from statsmodel training: {}     0.3934 



In [81]:
pd.DataFrame(stats_hmc_di_4, index= ['hmc_samples','statsmodel_results'])

Unnamed: 0,param_val_bias,std_error_bias,param_val_feat_1,std_error_feat_1,param_val_feat_2,std_error_feat_2,param_val_feat_3,std_error_feat_3
hmc_samples,9.17415,0.133759,-1.81052,0.217312,-0.182261,0.0726282,0.33103,0.128846
statsmodel_results,9.2232,0.14,-1.948,0.227,-0.2029,0.077,0.3934,0.132


#### Wrapping up pytorch & pyro model

In [None]:
class pyrotorch():
    def __init__(self,  in_features, out_features):
        self.in_features= in_features
        self.out_features= out_features
    class ptmodel(nn.Module):
        def __init__(self, in_features, out_features):
            super(toynet, self).__init__()
            self.fc1 = nn.Linear(in_features, out_features)
            self.sig = nn.Sigmoid()
            def forward(self,  x, y=None):
            x= self.fc1(x)
            return x
        def forward(self,  x, y=None):
            x= self.fc1(x)
            return x
    model_pt = ptmodel(self.in_features, self.out_features)
    optimizer = optim.Adam(model_pt.parameters())
    loss = nn.MSELoss()
    
    ###