In [None]:
import pandas as pd
import numpy as np
import pymc as pm
import arviz as az

az.style.use(["arviz-darkgrid"])  #, "arviz-doc"])
import matplotlib.pyplot as plt

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

rng = np.random.default_rng(12345)

with pm.Model() as m:
    # y ~ 2 * x
    x = pm.MutableData("x", [-2, -1, 0, 1, 2])
    y_obs = [-4, -1.7, -0.1, 1.8, 4.1]

    beta = pm.Normal("beta")
    y = pm.Normal("y", mu=beta * x, sigma=0.1, shape=x.shape, observed=y_obs)

    idata = pm.sample()

pm.model_to_graphviz(m)

In [None]:
with m:
    pp = pm.sample_posterior_predictive(idata)

pm.plot_ppc(pp);


Once we have ran the inference and learned the model parameters we can simply change the data with ``pm.set_data()`` and condition the model on new predictor values. This is how we post-stratified our estimates with an MRP.

In [None]:
with m:
    # Make predictions conditioned on new Xs
    pm.set_data({"x": [-1, 3, 5]})
    pp = pm.sample_posterior_predictive(idata, predictions=True)

pm.plot_posterior(pp, group="predictions")

We can also specify a *new model* with new predictor values while sampling from the posterior of the different model. The posterior samples for the ``beta`` variable contained in the previous posterior are used when making predictions in this new (purely predictive) model, like this:

In [None]:
with pm.Model() as pred_m:
    # Only x changes
    x = np.array([-1, 0, 1])

    beta = pm.Normal("beta")
    y_pred = pm.Normal("y_pred", mu=beta * x, sigma=0.1, shape=x.shape)

    pp = pm.sample_posterior_predictive(
        idata, 
        var_names=["y_pred"], # here we define y_pred as an unobserved random variable, as it is prediction and not observation
        predictions=True, 
    )
    
pm.plot_posterior(pp, group="predictions")

This is neat and allows us to generate out-of-sample predictions that can certainly be very useful. However, we can use this for more complex and interesting purposes. For example, we can build a new model with a different likelihood and still sample from the posterior of a previous model.

In [None]:
with pm.Model() as pred_t_m:
    # Using the same x as in the last example
    x = np.array([-1, 0, 1])

    beta = pm.Normal("beta")

    # Only the likelihood distribution changes
    y_t = pm.StudentT("y_pred_t", nu=4, mu=beta * x, sigma=0.1)

    pp_t = pm.sample_posterior_predictive(
        idata, 
        var_names=["y_pred_t"], 
        predictions=True, 
    )

pm.plot_posterior(pp, group="predictions")
pm.plot_posterior(pp_t, group="predictions", color="C1")

In fact the "transferred variables" do not even have the same priors as in the original model. To illustrate this, let's change the ``pm.Beta()`` prior in the original model to ``pm.Flat()``in the new one:

In [None]:
with pm.Model() as pred_bern_m:
    x = np.linspace(-1, 1, 25)

    beta = pm.Flat("beta")

    # We again change the functional form of the model
    # Instead of a linear Gaussian we Have a logistic Bernoulli model
    p = pm.Deterministic("p", pm.math.sigmoid(beta * x)) # pm.math.sigmoid is the logistic function that yields us Bernoulli draws
    y = pm.Bernoulli("y", p=p)

    pp = pm.sample_posterior_predictive(
        idata, 
        var_names=["p", "y"], 
        predictions=True, 
    )

# a helper function to add jitter to the data
def jitter(x, rng):
    return rng.normal(x, 0.02)

# plot the posterior predictive distribution
x = np.linspace(-1, 1, 25)
for i in range(25):
    p = pp.predictions["p"].sel(chain=0, draw=i)
    y = pp.predictions["y"].sel(chain=0, draw=i)

    plt.plot(x, p, color="C0", alpha=.1)
    plt.scatter(jitter(x, rng), jitter(y, rng), s=10, color="k", alpha=.1)

plt.plot([], [], color="C0", label="p")
plt.scatter([], [], color="k", label="y + jitter")
plt.legend(loc=(1.03, 0.75));



We can also do this with hierarchical models and sample estimates for groups that are not represented in the data. In that case we will be sampling values from the prior.

To illustrate this let's take the famous [eight schools model](https://www.jstor.org/stable/1164617) illustrating the effectiveness of SAT coaching programs conducted in parallel at eight schools as a baseline. 

In [None]:
y = np.array([28, 8, -3, 7, -1, 1, 18, 12])
sigma = np.array([15, 10, 16, 11, 9, 11, 10, 18])
J = len(y)

with pm.Model() as eight_schools:
    eta = pm.Normal("eta", 0, 1, shape=J)

    # Hierarchical mean and SD
    mu = pm.Normal("mu", 0, sigma=10)
    tau = pm.HalfNormal("tau", 10)

    # Non-centered parameterization of random effect
    theta = pm.Deterministic("theta", mu + tau * eta)

    pm.Normal("y", theta, sigma=sigma, observed=y)

    idata = pm.sample(2000, target_accept=0.9)

pm.plot_posterior(idata, var_names=["eta"], ref_val=0)

So we add two more schools that were not observed.

We set the model up in a way that allows us to get posterior predictive draws for all 10 schools. For this we create two vectors of variables separately, ``eta`` and ``eta_new`` and concatenate them. The ``sample_posterior_predictive`` function will reuse the ``InferenceData`` draws for ``eta`` and take new draws for ``eta_new``. The predictions for new schools are informed by the group-level variables ``mu`` and ``tau``, which were estimated via sampling of the original subset of 8 schools. 

In [None]:
with pm.Model() as ten_schools:
    # Priors for schools 9 and 10
    # We assume that the mean of school 10 is expected to be one std above the mean
    # and have a relatively low measurement error
    eta_new = pm.Normal("eta_new", mu=[0, 1.0], sigma=1)
    sigma_new = pm.Uniform("sigma_new", lower=[10, 5], upper=[20, 7])

    # These are unchanged
    eta = pm.Normal("eta", 0, 1, shape=J)
    mu = pm.Normal("mu", 0, sigma=10)
    tau = pm.HalfNormal("tau", 10)

    # We concatenate the variables from the old and new groups
    theta = pm.Deterministic("theta", mu + tau * pm.math.concatenate([eta, eta_new]))
    pm.Normal("y", theta, sigma=pm.math.concatenate([sigma, sigma_new]))

    pp = pm.sample_posterior_predictive(idata, var_names=["y"])

pm.summary(pp, group="posterior_predictive")


In [None]:
import arviz as az
import seaborn as sns

pps = az.extract(pp, group="posterior_predictive")

_, ax = plt.subplots(5, 2, figsize=(8, 8), sharex=True, sharey=True)
for i, axi in enumerate(ax.ravel()):
    sns.kdeplot(pps["y"][i], fill=True, ax=axi, color="C0" if i < 8 else "C1")
    axi.axvline(0, ls="--", c="k")
    axi.set_title(f"School {i}")
plt.tight_layout()

Instead -- or in addition to -- predicting model outcomes we may be also interested in predicting latent variables. We can create an example with censored data, using PyMC-s ``pm.Censored``

In [None]:
x_censored_obs = [4.3, 5.0, 5.0, 3.2, 0.7, 5.0]

with pm.Model() as censored_m:
    mu = pm.Normal("mu")
    sigma = pm.HalfNormal("sigma", sigma=1)

    x = pm.Normal.dist(mu, sigma)
    x_censored = pm.Censored(
        "x_censored", 
        dist=x, 
        lower=None, 
        upper=5.0, 
        observed=x_censored_obs,
    )

    idata = pm.sample()


Next we take the same model and add a new variable that is not censored. We can then sample from the posterior predictive distribution for this new latent variable.

In [None]:
with pm.Model() as uncensored_m:
    mu = pm.Normal("mu")
    sigma = pm.HalfNormal("sigma")

    x = pm.Normal.dist(mu, sigma)
    x_censored = pm.Censored("x_censored", dist=x, lower=None, upper=5.0)

    # This uncensored variable is new
    x_uncensored = pm.Normal("x_uncensored", mu, sigma)

    pp = pm.sample_posterior_predictive(
        idata,
        var_names=["x_censored", "x_uncensored"],
        predictions=True,
    )

az.plot_posterior(pp, group="predictions");


## WHAT IF

We can once more return to the eight schools example.

In [None]:
from pymc import do

pm.model_to_graphviz(eight_schools)


In [None]:
no_effect = do(eight_schools, {"eta": np.zeros(J)}) 

with no_effect:
    pp = pm.sample_posterior_predictive(idata, var_names=["eta", "y"], predictions=True)

pp


In [None]:
pp.predictions.eta.mean()

In [None]:
pps = az.extract(pp, group="predictions")

_, ax = plt.subplots(4, 2, figsize=(6, 6), sharex=True, sharey=True)
for i, axi in enumerate(ax.ravel()):
    sns.kdeplot(pps["y"][i], fill=True, ax=axi, color="C0")
    axi.axvline(0, ls="--", c="k")
    axi.set_title(f"School {i}")
plt.tight_layout()

In [None]:
pp