In [None]:
pip install pyro-ppl

In [None]:
import os
import torch
from torch.distributions import constraints
from torch import tensor

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

import pyro
import pyro.distributions as dist
from pyro.infer import SVI,Trace_ELBO
from pyro.infer.autoguide  import AutoMultivariateNormal, AutoNormal, init_to_mean
from pyro.optim import ClippedAdam

assert pyro.__version__.startswith('1.8')
pyro.set_rng_seed(1)
torch.manual_seed(1)

# Set matplotlib settings
%matplotlib inline
plt.style.use('default')
plt.rcParams['figure.figsize'] = [12, 8]

In [None]:
from sklearn.datasets import load_iris

X_df, y = load_iris(as_frame=True, return_X_y=True)

In [None]:
target_names = ['setosa', 'versicolor', 'virginica']

In [None]:
# standardize X
X_df.columns = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width']
X = X_df.apply(lambda x: (x - x.mean())/x.std(), axis=0)
X.columns = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width']

X['iris_type'] = y
X['is_setosa'] = np.where(X['iris_type'].values == 0, 1,0)
X.info()

In [None]:
X.head()

In [None]:
sns.pairplot(data=X, hue='iris_type', palette="colorblind", corner=True); #setosa is easy to distinguish, versicolor and virginica are harder

## Make a logistic regression model of 
is_setosa ~ 'petal_width'

### Using sklearn

In [None]:
X['petal_width'].shape, X['is_setosa'].shape

In [None]:
from sklearn.linear_model import LogisticRegression
#C = 1e9 means no L2 regularization
clf = LogisticRegression(C=1e9, random_state=0).fit(X['petal_width'].values.reshape(-1,1), X['is_setosa'])


In [None]:
# these are the parameters we expect to recover
print(clf.coef_)
print(clf.intercept_)

In [None]:
data = tensor(X[['petal_width']].values, dtype=torch.float)
target = tensor(X['is_setosa'].values, dtype=torch.float)

In [None]:
data.size(), target.size()

## Using Pyro

In [None]:
from torch import nn
from pyro.nn import PyroSample, PyroModule

class BayesianLogisticRegression(PyroModule):
    def __init__(self, in_features, out_features = 1, bias = True):
        super().__init__()
        self.linear = PyroModule[nn.Linear](in_features, out_features)
        self.linear.weight = PyroSample(dist.Normal(-35., 5.).expand([out_features, in_features]).to_event(2))
        if bias:
          self.linear.bias = PyroSample(dist.Uniform(-25., 5.).expand([out_features]).to_event(1))
        
    def forward(self, x, y=None):
        logits = self.linear(x).squeeze(-1)

        with pyro.plate("data", x.shape[0]):
            obs = pyro.sample("obs", dist.Bernoulli(logits=logits), obs=y)
        return logits

In [None]:
data.size(1)

In [None]:
model = BayesianLogisticRegression(data.size(1))

In [None]:
from pyro.infer.autoguide import AutoMultivariateNormal

guide = AutoMultivariateNormal(model, init_loc_fn=init_to_mean)

In [None]:
def train(model, guide, lr=0.01, n_steps=2000):
    pyro.set_rng_seed(1)
    pyro.clear_param_store()
    
    gamma = 0.01  # final learning rate will be gamma * initial_lr
    lrd = gamma ** (1 / n_steps)
    adam = pyro.optim.ClippedAdam({'lr': lr, 'lrd': lrd})

    svi = SVI(model, guide, adam, loss=Trace_ELBO())

    for i in range(n_steps):
        elbo = svi.step(data, target)
        if i % 500 == 0:
          print(f"Elbo loss: {elbo}")

In [None]:
%%time
train(model, guide)

In [None]:
from pyro.infer import Predictive

num_samples = 1000
predictive = Predictive(model, guide=guide, num_samples=num_samples)

svi_samples = {k: v.reshape((num_samples,-1)).detach().cpu().numpy()
               for k, v in predictive(data, target).items()
               if k != "obs"}

In [None]:
svi_samples.keys()

In [None]:
svi_samples['linear.bias'].mean()

In [None]:
svi_samples['linear.weight'].mean(axis=0)

In [None]:
guide.quantiles([0.05,0.50,0.95])

In [None]:
samples = pd.DataFrame({'bias':svi_samples['linear.bias'].squeeze(), 'weight':svi_samples['linear.weight'].squeeze()})
sns.pairplot(data=samples, corner=True);

In [None]:
print(clf.intercept_)
print(clf.coef_)

# What about MCMC?

In [None]:
from pyro.infer import MCMC, NUTS
nuts_kernel = NUTS(model)
mcmc = MCMC(nuts_kernel, num_samples=3000, warmup_steps=1000)

In [None]:
%%time
mcmc.run(data, target)

In [None]:
hmc_samples = {k: v.detach().cpu().numpy() for k, v in mcmc.get_samples().items()}

In [None]:
hmc_samples.keys()

In [None]:
np.median(hmc_samples['linear.bias'])

In [None]:
sns.displot(hmc_samples['linear.bias']);

In [None]:
np.median(hmc_samples['linear.weight'])

In [None]:
sns.kdeplot(hmc_samples['linear.weight'][:,0,0]);

In [None]:
print(clf.coef_)
print(clf.intercept_)

## So, this appears to work in 1D 
## Let's try predicting the multinomial logit

### note: due to perfect separation of setosa, this fails to converge for frequentist solution

In [None]:
import statsmodels.formula.api as smf

In [None]:
mod = smf.mnlogit(formula='iris_type ~ sepal_length + sepal_width + petal_length + petal_width', data=X)

In [None]:
result = mod.fit()
print(result.summary())

In [None]:
from torch import nn
from pyro.nn import PyroSample, PyroModule

class BayesianMultinomialLogisticRegression(PyroModule):
    def __init__(self, in_features, out_features = 1, bias = True):
        super().__init__()
        self.linear = PyroModule[nn.Linear](in_features, out_features)
        self.linear.weight = PyroSample(dist.Normal(-10., 5.).expand([out_features, in_features]).to_event(2))
        if bias:
          self.linear.bias = PyroSample(dist.Normal(-1., 2.).expand([out_features]).to_event(1))
        
    def forward(self, x, y=None):
        logits = self.linear(x).squeeze(-1)

        with pyro.plate("data", x.shape[0]):
            obs = pyro.sample("obs", dist.Categorical(logits=logits), obs=y)
        return logits

In [None]:
model_2 = BayesianMultinomialLogisticRegression(4,1)

In [None]:
guide_2 = AutoMultivariateNormal(model_2, init_loc_fn=init_to_mean)

In [None]:
def train(model, guide, lr=0.1, n_steps=4000):
    pyro.set_rng_seed(1)
    pyro.clear_param_store()
    
    gamma = 0.01  # final learning rate will be gamma * initial_lr
    lrd = gamma ** (1 / n_steps)
    adam = pyro.optim.ClippedAdam({'lr': lr, 'lrd': lrd})

    svi = SVI(model, guide, adam, loss=Trace_ELBO())

    for i in range(n_steps):
        elbo = svi.step(data, target)
        if i % 500 == 0:
          print(f"Elbo loss: {elbo}")


In [None]:
data = tensor(X[['sepal_length', 'sepal_width', 'petal_length', 'petal_width']].values, dtype=torch.float)
target = tensor(X['iris_type'].values, dtype=torch.float)

data.size(), target.size()

In [None]:
%%time
train(model_2, guide_2)

In [None]:
from pyro.infer import MCMC, NUTS
nuts_kernel = NUTS(model_2)
mcmc = MCMC(nuts_kernel, num_samples=2000, warmup_steps=1500)
mcmc.run(data, target)

In [None]:
hmc_samples = {k: v.detach().cpu().numpy() for k, v in mcmc.get_samples().items()}

In [None]:
hmc_samples.keys()

In [None]:
import arviz as az

az_data = az.from_pyro(mcmc)
az.plot_trace(az_data, compact=False);
plt.tight_layout()