In [1]:
import arviz as az
import pyro
import torch
import pyro.distributions as dist
from pyro.infer import MCMC, NUTS, Predictive
import pandas as pd
from torch.autograd import Variable
import numpy as np
import xarray
xarray.set_options(display_style="html");

pyro.enable_validation(True)
pyro.set_rng_seed(0)

In [2]:
#read data
data = pd.read_csv("linear_regression_data.csv", index_col=0)
time = data.time.values
slack_comments = torch.tensor(data.comments.values)
github_commits = torch.tensor(data.commits.values)
names = data.index.values
N = len(names)
data

Unnamed: 0,comments,commits,time
Alice,7500,25,4.5
Bob,10100,32,6.0
Cole,18600,49,7.0
Danielle,25200,66,12.0
Erika,27500,96,18.0


In [3]:
# data for out of sample predictions
candidate_devs = ["Francis", "Gerard"]
candidate_devs_time = torch.tensor([3.6, 5.1])
time = torch.tensor(time)
predictions_dict = {
    "developer": 2,
    "time_since_joined": candidate_devs_time
}

In [4]:
slack_comments = torch.tensor(data.comments.values)
github_commits = torch.tensor(data.commits.values)
time = torch.tensor(time)

dims={
    "slack_comments": ["developer"],
    "github_commits": ["developer"],
    "time": ["developer"],
}

data_dict = {
    "developer": N,
    "time_since_joined": time
}

def model(developer, time_since_joined, slack_comments=None, github_commits=None):
    b_sigma = abs(pyro.sample('b_sigma', dist.Normal(0, 300)))
    c_sigma = abs(pyro.sample('c_sigma', dist.Normal(0, 6)))
    b0 = pyro.sample("b0", dist.Normal(0, 200))
    b1 = pyro.sample("b1", dist.Normal(0, 200))
    c0 = pyro.sample("c0", dist.Normal(0, 10))
    c1 = pyro.sample("c1", dist.Normal(0, 10))

    with pyro.plate('developer', developer):
        slack = pyro.sample("slack_comments", dist.Normal(b0 + b1 * time_since_joined, b_sigma), obs=slack_comments)
        github = pyro.sample("github_commits", dist.Normal(c0 + c1 * time_since_joined, c_sigma), obs=github_commits)
        return slack, github


nuts_kernel = NUTS(model, jit_compile=True, ignore_jit_warnings=True)
mcmc = MCMC(nuts_kernel, num_samples=400, warmup_steps=400,
            num_chains=4, disable_progbar=True)
mcmc.run(**data_dict, slack_comments=slack_comments, github_commits=github_commits)
posterior_samples = mcmc.get_samples()
posterior_predictive = Predictive(model, posterior_samples).forward(**data_dict)
prior = Predictive(model, num_samples=150).forward(**data_dict)

pyro_data = az.from_pyro(mcmc, prior=prior, posterior_predictive=posterior_predictive, coords={'developer': names}, dims=dims)

  This is separate from the ipykernel package so we can avoid doing imports until


In [15]:
pyro_data

Inference data with groups:
	> posterior
	> sample_stats
	> log_likelihood
	> posterior_predictive
	> observed_data
	> prior
	> prior_predictive

In [16]:
pyro_data.posterior

In [17]:
pyro_data.prior

In [18]:
pyro_data.sample_stats

In [19]:
pyro_data.log_likelihood

In [20]:
pyro_data.posterior_predictive

In [21]:
pyro_data.observed_data

In [22]:
pyro_data.prior_predictive

In [13]:
predictions = Predictive(model, posterior_samples).forward(**predictions_dict)
predictions

{'slack_comments': tensor([[ 6984.6821,  9088.3037],
         [ 4079.7122, 10616.2109],
         [ 2995.8342,  9514.9609],
         ...,
         [ 6400.0327,  7977.3125],
         [ 6662.4097,  7788.4106],
         [ 6118.0713, 10126.9277]]),
 'github_commits': tensor([[18.9739, 31.9662],
         [22.0112, 28.8442],
         [30.2278, 35.6058],
         ...,
         [13.3742, 34.5168],
         [23.4763, 34.9166],
         [23.8936, 37.5543]])}

In [14]:
posterior_predictive

{'slack_comments': tensor([[13135.8870, 11640.0636, 13891.2552, 18302.3505, 30161.7115],
         [ 7774.3666, 10109.0372, 10050.4018, 19114.6338, 31080.6249],
         [ 7303.5594,  8241.8428, 11574.5337, 14694.7891, 27080.9010],
         ...,
         [ 5356.9876,  9725.0256,  8024.7975, 13244.8449, 30010.3011],
         [ 6302.2395, 10671.8654,  8497.1474, 19694.9268, 26186.0900],
         [ 8956.0987, 10367.3051, 12351.3986, 18373.6233, 27116.1491]],
        dtype=torch.float64),
 'github_commits': tensor([[ 28.5939,  42.9981,  37.6637,  60.7692, 103.8419],
         [ 31.0988,  45.5821,  44.0844,  75.1531, 109.7859],
         [ 28.6095,  41.4615,  45.7096,  71.2575, 104.8828],
         ...,
         [ 34.7850,  33.8098,  48.4754,  57.2339, 108.3084],
         [ 34.9200,  29.5217,  35.3537,  68.4338, 106.0278],
         [ 20.8144,  40.6970,  39.5808,  54.6423,  90.4466]],
        dtype=torch.float64)}