In [None]:
# init repo notebook
!git clone git@github.com-juselara1:rramosp/ppdl.dev.git
!mv ppdl.dev/content/init.py ppdl.dev/content/local .

Joint Distributions

`tensorflow-probability` contains a `JointDistribution` class that can be used for multilevel and hierarchical bayesian modeling. Generally, We'll be using two classes:

* `JointDistributionSequential`: this distribution is similar to `tf.keras.Sequential` class, since in allows to build a model with a sequence of elements. In this case distributions or callables.
* `JointDistributionNamed`: this distribution builds a model from a dictionary of distributions or callables.

In [None]:
from ppdl.samplers import LinearRegressionSampler
import tensorflow as tf
import numpy as np
import tensorflow_probability as tfp
import matplotlib.pyplot as plt
import seaborn as sns
plt.style.use("ggplot")
tfd = tfp.distributions

## OLS with Joint Distributions

In this example we'll use `JointDistribution` to build a linear regression model, with the following structure:

$$
w_0 \sim \mathcal{N}(0, 1)\\
w_1 \sim \mathcal{N}(0, 1)\\
y \sim \mathcal{N}(w_1 * x + w_0, 1)
$$

First, let us load the data.

In [None]:
sampler = LinearRegressionSampler(noise_std=1.0)
data = sampler(1000, seed=42)

Let us visualize the data:

In [None]:
fig, ax = plt.subplots(figsize=(8, 8))
data.plot(x="x_0", y="y", kind="scatter", ax=ax)

The JointDistributionNamed class allows building a model using a syntax similar to it's mathematical definition.

In [None]:
def ols_model(x):
    model = tfd.JointDistributionNamedAutoBatched({
        "w_0": tfd.Normal(loc=tf.zeros(1), scale=1),
        "w_1": tfd.Normal(loc=tf.zeros(1), scale=1),
        "y": lambda w_1, w_0: tfd.Normal(
            loc=w_1 * x + w_0,
            scale=0.1
            )
        })
    return model

In this case, `AutoBatched` is used to automatically batch the data, i.e., we didn't consider the batch axis in the function that computes `y`. Also, the `JointDistributionNamed` class allows to define a multilevel model from a dictionary.

We can verify that the model is a linear regression model:

In [None]:
model = ols_model(data.x_0)

In [None]:
sample = model.sample(1)
y_prior = sample["y"]
w_0 = sample["w_0"][0, 0]
w_1 = sample["w_1"][0, 0]

In [None]:
fig, ax = plt.subplots(figsize=(8, 8))
ax.scatter(data.x_0, y_prior, label="data")
ax.set_title(f"$y = {w_1:.3f} x + {w_0:.3f}$")

We can solve the linear regression model using Markov Chain Monte Carlo:

In [None]:
def log_prob(w_0, w_1):
    return model.log_prob(w_0=w_0, w_1=w_1, y=data.y.values)

In [None]:
num_results = int(10e3)
burning_steps = int(1e3)

mcmc_kernel = tfp.mcmc.HamiltonianMonteCarlo(
    target_log_prob_fn=log_prob,
    num_leapfrog_steps=3,
    step_size=1.,
    )
adaptive_hmc = tfp.mcmc.SimpleStepSizeAdaptation(
        mcmc_kernel, num_adaptation_steps=int(burning_steps * 0.8),
        )

In [None]:
samples, is_accepted = tfp.mcmc.sample_chain(
    num_results=num_results,
    num_burnin_steps=burning_steps,
    current_state=[tf.ones(1), tf.ones(1)],
    kernel=adaptive_hmc,
    trace_fn=lambda _, pkr: pkr.inner_results.is_accepted
    )

Let's visualize the posterior distributions for the weights:

In [None]:
fig, ax = plt.subplots(1, 2, figsize=(8, 4))
sns.kdeplot(samples[0].numpy().flatten(), ax=ax[0])
sns.kdeplot(samples[1].numpy().flatten(), ax=ax[1])
ax[0].set_title("$w_0$")
ax[1].set_title("$w_1$")
fig.tight_layout()

Now, let's visualize the mean posterior model over the data:

In [None]:
w_0_mean = tf.reduce_mean(samples[0]).numpy()
w_1_mean = tf.reduce_mean(samples[1]).numpy()

print(f"mean(w_0): {w_0_mean:.3f}")
print(f"mean(w_1): {w_1_mean:.3f}")

In [None]:
x = np.linspace(data.x_0.min(), data.x_0.max(), 100)
y_pred = w_1_mean * x + w_0_mean

In [None]:
fig, ax = plt.subplots(figsize=(8, 8))
ax.scatter(data.x_0, data.y, label="data", alpha=0.5)
ax.plot(x, y_pred, label="prediction", color="k")
ax.legend()