In [2]:
import arviz as az
import pyro
import torch
import pyro.distributions as dist
from pyro.infer import MCMC, NUTS, Predictive
import pandas as pd
from torch.autograd import Variable
import numpy as np
import xarray
xarray.set_options(display_style="html");

pyro.enable_validation(True)
pyro.set_rng_seed(0)

In [3]:
#read data
data = pd.read_csv("linear_regression_data.csv", index_col=0)
time = data.time.values
slack_comments = torch.tensor(data.comments.values)
github_commits = torch.tensor(data.commits.values)
names = data.index.values
N = len(names)
data

Unnamed: 0,comments,commits,time
Alice,7500,25,4.5
Bob,10100,32,6.0
Cole,18600,49,7.0
Danielle,25200,66,12.0
Erika,27500,96,18.0


In [4]:
# data for out of sample predictions
candidate_devs = ["Francis", "Gerard"]
candidate_devs_time = np.array([3.6, 5.1])
time = torch.tensor(time)
time

tensor([ 4.5000,  6.0000,  7.0000, 12.0000, 18.0000], dtype=torch.float64)

In [5]:
dims={
    "slack_comments": ["developer"],
    "github_commits": ["developer"],
    "time": ["developer"],
}

data_dict = {
    "developer": N,
    "time_since_joined": time
}

def model(developer, time_since_joined):
    b_sigma = abs(pyro.sample('b_sigma', dist.Normal(0, 300)))
    c_sigma = abs(pyro.sample('c_sigma', dist.Normal(0, 6)))
    b0 = pyro.sample("b0", dist.Normal(0, 200))
    b1 = pyro.sample("b1", dist.Normal(0, 200))
    c0 = pyro.sample("c0", dist.Normal(0, 10))
    c1 = pyro.sample("c1", dist.Normal(0, 10))

    with pyro.plate('developer', developer):
        slack = pyro.sample("slack_comments", dist.Normal(b0 + b1 * time_since_joined, b_sigma), obs=slack_comments)
        github = pyro.sample("github_commits", dist.Normal(c0 + c1 * time_since_joined, c_sigma), obs=github_commits)
        return slack, github

nuts_kernel = NUTS(model, jit_compile=True, ignore_jit_warnings=True)
mcmc = MCMC(nuts_kernel, num_samples=400, warmup_steps=400,
            num_chains=4, disable_progbar=True)
mcmc.run(**data_dict)
posterior_samples = mcmc.get_samples()
posterior_predictive = Predictive(model, posterior_samples).get_samples(**data_dict)
prior = Predictive(model, num_samples=400).get_samples(**data_dict)

pyro_data = az.from_pyro(mcmc, prior=prior, posterior_predictive=posterior_predictive,
                         coords={'developer': names},
                         dims=dims)

In [6]:
pyro_data

Inference data with groups:
	> posterior
	> sample_stats
	> log_likelihood
	> posterior_predictive
	> observed_data
	> prior
	> prior_predictive

In [7]:
pyro_data.posterior

In [8]:
pyro_data.prior

In [9]:
pyro_data.sample_stats

In [10]:
pyro_data.log_likelihood

In [11]:
pyro_data.posterior_predictive

In [12]:
pyro_data.observed_data

In [13]:
pyro_data.prior_predictive