## Imports

In [None]:
!pip install pyro-ppl

In [None]:
import os
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import statsmodels.api as sm

import torch
from torch.distributions import constraints
from torch import tensor

import pyro
import pyro.distributions as dist
from pyro.infer import SVI,Trace_ELBO
from pyro.infer.autoguide  import AutoMultivariateNormal, AutoNormal, init_to_mean
from pyro.optim import ClippedAdam

assert pyro.__version__.startswith('1.8')
pyro.set_rng_seed(1)
torch.manual_seed(1)

# Set matplotlib settings
%matplotlib inline
plt.style.use('default')
plt.rcParams['figure.figsize'] = [8, 6]

## Example 1 - Student Awards

In [None]:
df = pd.read_csv("https://raw.githubusercontent.com/seanreed1111/datasets/master/count-regression-datasets/competition_awards_data.csv")
df.columns = ['award_count', 'math_score']
X = df.copy()
df.head()

In [None]:
sns.displot(data=X, x='award_count', palette='colorblind');

Let's try to fit this into a Poisson Distribution. Poisson should have equal mean and variance. 

Let's check:

In [None]:
X['award_count'].mean(), X['award_count'].var() # Variance larger than the mean. Overdispersed.

In [None]:
# create sample distribution where sample Poisson mean = data mean
samples_1 = dist.Poisson(X['award_count'].mean()).sample(tensor([200]))
sns.distplot(samples_1.numpy(), kde=False);

In [None]:
# create sample distribution where sample Poisson mean = data variance
samples_2 = dist.Poisson(X['award_count'].var()).sample(tensor([200])) 
sns.displot(samples_2.numpy());

### So, comparing the actual data with the above two poisson samples distributions above, it appears our data is both zero inflated AND overdispersed!

In [None]:
# let's make a Poisson model(GLM) with 'log(award_count) ~ math_score' 
data = X[['math_score']]
target = X['award_count']

### Using sklearn

In [None]:
#Generalized Linear Model with a Poisson distribution and log link.
from sklearn.linear_model import PoissonRegressor 

reg = PoissonRegressor().fit(data.values, target.values)

# these are MLE estimates of parameters we expect to recover
print(reg.intercept_)
print(reg.coef_)
reg.score(data.values.reshape(-1,1), target)

### Using Statsmodels

import statsmodels.formula.api as smf
statsmod = smf.poisson(formula='award_count ~ math_score', data=X)
result = statsmod.fit()
print(result.summary())

### Using Bayesian Regression with SVI

In [None]:
# convert data and target to torch tensors
data = tensor(data.values, dtype=torch.float)
target = tensor(target.values, dtype=torch.float)

In [None]:
data.Size(), target.Size()

In [None]:
# Can build a full PyTorch Model using torch.nn (not always needed)
from torch import nn
from pyro.nn import PyroSample, PyroModule

# need to pass the priors for all models as parameters to the object.
class BayesianPoissonRegression(PyroModule):
    def __init__(self, in_features, out_features = 1, bias = True):
        super().__init__()
        self.linear = PyroModule[nn.Linear](in_features, out_features)
        if bias:
          self.linear.bias = PyroSample(dist.Normal(0., 5.).expand([out_features]).to_event(1))
        self.linear.weight = PyroSample(dist.Normal(0., 0.05).expand([out_features, in_features]).to_event(2))

        
    def forward(self, x, y=None):
        rate = self.linear(x).squeeze(-1).exp() #we are using log link, so apply inverse of log to the matrix multiplication, i.e. exp
        with pyro.plate("data", x.shape[0]):
            obs = pyro.sample("obs", dist.Poisson(rate), obs=y)
        return rate

In [None]:
model = BayesianPoissonRegression(data.size(1))
from pyro.infer.autoguide import AutoMultivariateNormal

# define black box autoguide for Stochastic Variational Inference using Pyro
guide = AutoMultivariateNormal(model, init_loc_fn=init_to_mean)

#define training loop using PyTorch infrastructure
# Use PyTorch optimizer to build a Pyro Optimizer
# Use Pyro to define default loss function 'Trace_ELBO'
def train(model, guide, lr=0.01, n_steps=4000):
    pyro.set_rng_seed(1)
    pyro.clear_param_store()
    
    gamma = 0.01  # final learning rate will be gamma * initial_lr
    lrd = gamma ** (1 / n_steps)
    adam = pyro.optim.ClippedAdam({'lr': lr, 'lrd': lrd})

    svi = SVI(model, guide, adam, loss=Trace_ELBO())

    for i in range(n_steps):
        elbo = svi.step(data, target)
        if i % 500 == 0:
          print(f"Elbo loss: {elbo}")
    print(f"Final Elbo loss: {elbo}")

train(model, guide)

In [None]:
from pyro.infer import Predictive

num_samples = 1000
predictive = Predictive(model, guide=guide, num_samples=num_samples)

svi_samples = {k: v.reshape((num_samples,-1)).detach().cpu().numpy()
               for k, v in predictive(data, target).items()
               if k != "obs"}

svi_samples.keys()


#### We can now compare the distribution of the coefficients retrieved via SVI to the point estimates above

In [None]:

print(reg.intercept_)
sns.kdeplot(data = svi_samples['linear.bias']);


In [None]:
print(reg.coef_)
sns.kdeplot(data = svi_samples['linear.weight']);

#### We used the black box guide for multivariate normal latent variables in this case. That will not always work! We can perform the same analysis using MCMC and it will give a similar answer in this case.(Yes, I checked!) 

#### But remember, MCMC will *always* be more precise in measuring the posterior. SVI is only an approximation, and the approximation is only as good as your guide.