In [1]:
import arviz as az
import pyro
import torch
import pyro.distributions as dist
from pyro.infer import MCMC, NUTS, Predictive
import pandas as pd
from torch.autograd import Variable
import numpy as np
import xarray
xarray.set_options(display_style="html");

pyro.enable_validation(True)
pyro.set_rng_seed(0)

In [2]:
#read data
data = pd.read_csv("linear_regression_data.csv", index_col=0)
time = torch.tensor(data.time.values)
slack_comments = torch.tensor(data.comments.values)
github_commits = torch.tensor(data.commits.values)
names = data.index.values
N = torch.tensor(len(names))
data

Unnamed: 0,comments,commits,time
Alice,7500,25,4.5
Bob,10100,32,6.0
Cole,18600,49,7.0
Danielle,25200,66,12.0
Erika,27500,96,18.0


In [3]:
# data for out of sample predictions
candidate_devs = ["Francis", "Gerard"]
candidate_devs_time = torch.tensor([3.6, 5.1])

In [4]:
data_dict = {
    "developer": N,
    "time_since_joined": time
}

predictions_dict = {
    "developer": torch.tensor(2),
    "time_since_joined": candidate_devs_time
}

dims={
    "slack_comments": ["developer"],
    "github_commits": ["developer"],
    "time_since_joined": ["developer"],
}

pred_dims = {
    "slack_comments": ["developer_pred"],
    "github_commits": ["developer_pred"],
    "time_since_joined": ["developer_pred"]
}

coords = {
    "developer": names,
    "developer_pred": candidate_devs
}

def model(developer, time_since_joined, slack_comments=None, github_commits=None):
    b_sigma = abs(pyro.sample('b_sigma', dist.Normal(0, 300)))
    c_sigma = abs(pyro.sample('c_sigma', dist.Normal(0, 6)))
    b0 = pyro.sample("b0", dist.Normal(0, 200))
    b1 = pyro.sample("b1", dist.Normal(0, 200))
    c0 = pyro.sample("c0", dist.Normal(0, 10))
    c1 = pyro.sample("c1", dist.Normal(0, 10))

    with pyro.plate('developer', developer):
        slack = pyro.sample("slack_comments", dist.Normal(b0 + b1 * time_since_joined, b_sigma), obs=slack_comments)
        github = pyro.sample("github_commits", dist.Normal(c0 + c1 * time_since_joined, c_sigma), obs=github_commits)
        return slack, github


nuts_kernel = NUTS(model, jit_compile=True, ignore_jit_warnings=True)
mcmc = MCMC(nuts_kernel, num_samples=50, warmup_steps=50,
            num_chains=3, disable_progbar=True)
mcmc.run(**data_dict, slack_comments=slack_comments, github_commits=github_commits)
posterior_samples = mcmc.get_samples()
posterior_predictive = Predictive(model, posterior_samples).forward(**data_dict)
prior = Predictive(model, num_samples=50).forward(**data_dict)
predictions = Predictive(model, posterior_samples).forward(**predictions_dict)

pyro_data = az.from_pyro(mcmc,
                         prior=prior,
                         posterior_predictive=posterior_predictive,
                         predictions=predictions,
                         constant_data={"time_since_joined": time},
                         predictions_constant_data={"time_since_joined": candidate_devs_time},
                         coords=coords,
                         dims=dims,
                         pred_dims=pred_dims)

In [5]:
pyro_data

Inference data with groups:
	> posterior
	> sample_stats
	> log_likelihood
	> posterior_predictive
	> observed_data
	> constant_data
	> prior
	> prior_predictive
	> predictions
	> predictions_constant_data

In [6]:
pyro_data.posterior

In [7]:
pyro_data.prior

In [8]:
pyro_data.sample_stats

In [9]:
pyro_data.log_likelihood

In [10]:
pyro_data.posterior_predictive

In [11]:
pyro_data.observed_data

In [12]:
pyro_data.prior_predictive

In [13]:
pyro_data.predictions

In [14]:
pyro_data.constant_data

In [15]:
pyro_data.predictions_constant_data

In [9]:
newdata = az.from_pyro(predictions=predictions, pred_dims=pred_dims, coords=coords, num_chains=3)
newdata

Inference data with groups:
	> predictions

In [10]:
newdata.predictions