source: https://github.com/CamDavidsonPilon/Probabilistic-Programming-and-Bayesian-Methods-for-Hackers/blob/master/Chapter1_Introduction/Ch1_Introduction_Pyro.ipynb


In [None]:
pip install pyro-ppl

In [None]:
import os
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import statsmodels.api as sm

import torch
from torch.distributions import constraints
from torch import tensor

import pyro
import pyro.distributions as dist
from pyro.infer import SVI,Trace_ELBO
from pyro.infer import MCMC, NUTS, HMC
from pyro.infer.autoguide  import AutoMultivariateNormal, AutoNormal, init_to_mean
from pyro.optim import ClippedAdam

assert pyro.__version__.startswith('1.8')
pyro.set_rng_seed(1)
torch.manual_seed(1)

# Set matplotlib settings
%matplotlib inline
plt.style.use('default')
plt.rcParams['figure.figsize'] = [15, 5]

In [None]:
df = pd.read_csv("https://raw.githubusercontent.com/CamDavidsonPilon/Probabilistic-Programming-and-Bayesian-Methods-for-Hackers/master/Chapter1_Introduction/data/txtdata.csv", header=None)
df.columns = ['messages_received']
df.loc[:,'day'] = df.index +1
df.head()

In [None]:
df.describe().T

You are given a series of daily text-message counts from a user of your system. The data, plotted over time, appears in the chart below. How would you model this data

In [None]:
sns.barplot(y='messages_received', x='day', data=df, palette='colorblind')
plt.xticks(fontsize=8);

Let's hypothesize that there is a switchpoint somewhere in the time series where the rate of texting increases. How can we find it with pyro?

In [None]:
X = df.copy()

In [None]:
X['messages_received'].mean(), X['messages_received'].std()

In [None]:
# Let's look at an actual poisson distribution fit to the mean of the data
samples = dist.Poisson(X['messages_received'].mean()).sample(tensor([74])) # create sample distribution where Poisson mean = data mean
sample_df = pd.DataFrame({'sample_texts_received':samples.numpy(), 'day':X['day'].copy()})

In [None]:
sns.barplot(y='sample_texts_received', x='day', data=sample_df, palette='colorblind');
plt.xticks(fontsize=8);

In [None]:
sns.displot(x = sample_df['sample_texts_received']); #poisson distributed samples

In [None]:
sns.displot(x = df['messages_received']); #actuals

### So, comparing the actual data with the above two poisson sampling distributions above, it appears our data is overdispersed. Our data is heavily skewed to the right, ie., it has many more texts received far away from the mean, and much greater variability spikes much bigger than a poisson distribution would suggest.

In probability theory and statistics, the *gamma* distribution is a two-parameter family of continuous probability distributions. The exponential distribution, Erlang distribution, and chi-square distribution are special cases of the gamma distribution. 

There are two different parameterizations in common use:

With a shape parameter k and a scale parameter θ.

With a shape parameter α = k and an inverse scale parameter β = 1/θ, called a rate parameter.
In each of these forms, both parameters are positive real numbers.

with α = shape parameter, β = rate parameter

Mean = α/β

Variance = α/β²

In [None]:
# Both Pytorch and Pyro use the concentration and rate parametrizations


In [None]:
from pyro.distributions.torch import Gamma

m = Gamma(torch.tensor([1.]), torch.tensor([1.]))
m.sample()  # Gamma distributed with concentration=1 and rate=1

In [None]:
a = tensor([0.5,1.,1.5,2.,2.5,3.]) #concentration parameters
b = tensor([0.25, 0.5, 1]) # rate parameters

In [None]:
sns.kdeplot(x=Gamma(0.5,0.25).sample(tensor([200])).numpy());

In [None]:
sns.kdeplot(x=Gamma(0.5,0.5).sample(tensor([200])).numpy());

In [None]:
sns.kdeplot(x=Gamma(0.5,1).sample(tensor([200])).numpy());

In [None]:
sns.kdeplot(x=Gamma(1,1).sample(tensor([200])).numpy());

In [None]:
sns.kdeplot(x=Gamma(1.5,1.5).sample(tensor([200])).numpy());

In [None]:
sns.kdeplot(x=Gamma(10,10).sample(tensor([500])).numpy());

In [None]:
sns.kdeplot(x=Gamma(100,100).sample(tensor([500])).numpy());

In [None]:
sns.kdeplot(x=Gamma(1000,1000).sample(tensor([1000])).numpy());

In [None]:
sns.kdeplot(x=Gamma(100000,100000).sample(tensor([1000])).numpy());

In [None]:
sns.kdeplot(x=Gamma(1,10).sample(tensor([1000])).numpy());

In [None]:
samples_nbd = dist.GammaPoisson(concentration=0.1, rate=0.1).sample(tensor([1000])).numpy(); # poisson with gamma parameters of shape and rate

In [None]:
sns.kdeplot(x=samples_nbd);

In [None]:
samples_nbd.mean()

In [None]:
samples_nbd.std()

In [None]:
sns.kdeplot(x = df['messages_received']); #actuals

In [None]:
df['messages_received'].mean()

In [None]:
df['messages_received'].var()

In [None]:
a = tensor([19.7**2/200.])
b = tensor([19.7/200.])
print(a,b,a/b, a/b**2)
samples_nbd = dist.GammaPoisson(concentration=a, rate=b).sample(tensor([74])).numpy()
sample_df = pd.DataFrame({'sample_texts_received':samples_nbd.squeeze(), 'day':X['day'].copy()})
print(sample_df['sample_texts_received'].mean(), sample_df['sample_texts_received'].var())
sns.barplot(y='sample_texts_received', x='day', data=sample_df, palette='colorblind');
plt.xticks(fontsize=8);

In [None]:
print(df['messages_received'].mean(), df['messages_received'].var())
sns.barplot(y='messages_received', x='day', data=df, palette='colorblind') #actuals
plt.xticks(fontsize=8);

In [None]:
df['messages_received'].mean(), df['messages_received'].var()

In [None]:
# So, it looks like the data is better fit with a nbd aka gamma poisson distribution. 
# Let's try a model that has priors starting from the mean of these parameters of a and b 

In [None]:
data = tensor(X['messages_received'].values, dtype=torch.float)
data.size()

In [None]:
data.mean(), data.var()

In [None]:
def model(data, m = data.mean(), v = data.var() ):
    # a = m**2/v
    # b = m/v
    # alpha_a = v/m**2
    # alpha_b = v/m
    a = pyro.sample("a", dist.Exponential(v/m**2)) # prior for a
    b = pyro.sample("b", dist.Exponential(v/m)) # prior for b

    with pyro.plate("data", data.size(0)):
        pyro.sample("obs", dist.GammaPoisson(a, b), obs=data)

In [None]:
from pyro.infer.autoguide import AutoMultivariateNormal

guide = AutoMultivariateNormal(model, init_loc_fn=init_to_mean)

In [None]:
def train(model, guide, lr=0.01, n_steps=4000):
    pyro.set_rng_seed(1)
    pyro.clear_param_store()
    
    gamma = 0.01  # final learning rate will be gamma * initial_lr
    lrd = gamma ** (1 / n_steps)
    adam = pyro.optim.ClippedAdam({'lr': lr, 'lrd': lrd})

    svi = SVI(model, guide, adam, loss=Trace_ELBO())

    for i in range(n_steps):
        elbo = svi.step(data)
        if i % 500 == 0:
          print(f"Elbo loss: {elbo}")
    print(f"Final Elbo loss: {elbo}")

In [None]:
%%time
train(model, guide)

In [None]:
from pyro.infer import Predictive

num_samples = 1000
predictive = Predictive(model, guide=guide, num_samples=num_samples)

svi_samples = {k: v.reshape((num_samples,-1)).detach().cpu().numpy()
               for k, v in predictive(data).items()
               if k != "obs"}

In [None]:
svi_samples.keys()

In [None]:
guide.quantiles([0.05,0.50,0.95])

In [None]:
data.mean()**2/data.var(), data.mean()/data.var()

## Let's do MCMC

In [None]:
kernel = NUTS(model, jit_compile=True, ignore_jit_warnings=True, max_tree_depth=3)
posterior = MCMC(kernel, num_samples=1500, warmup_steps=500)
posterior.run(data);

In [None]:
hmc_samples = {k: v.detach().cpu().numpy() for k, v in posterior.get_samples().items()}
alpha_a_samples = hmc_samples['a']
alpha_b_samples = hmc_samples['b']


In [None]:
data.size(0)

In [None]:
print(data.mean()**2/data.var())
sns.displot(data=alpha_a_samples);

In [None]:
print(data.mean()/data.var())
sns.displot(data=alpha_b_samples);