# Posterior predictive checks for the speed of light data

In [None]:
import numpy as np, pandas as pd, matplotlib.pyplot as plt, pymc3 as pm, arviz as az, os
import plot_tools

## Normal model

In [None]:
# From Aki Vehtari's demos
# data
data_path = os.path.abspath(
    os.path.join(
        os.path.curdir,
        '../data',
        'light.txt'
    )
)
#y = pd.DataFrame(data = np.loadtxt(data_path), columns = ['time'])
y = np.loadtxt(data_path)
#y = np.concatenate((y[:5], y[6:]))
n = len(y)
s2 = np.var(y, ddof=1)  # Here ddof=1 is used to get the sample estimate.
s = np.sqrt(s2)
my = np.mean(y)

In [None]:
y = np.concatenate((y[:5], y[6:]))
y

In [None]:
replicates = np.random.standard_t(n-1, size=(9,n)) * np.sqrt(1+1/n)*s + my

In [None]:
# plot them along with the real data set in random order subplot
fig, axes = plt.subplots(5, 2, sharex=True, sharey=True, figsize=(9, 12))
fig.subplots_adjust(top=0.95, wspace=0.4)
order = np.random.permutation(10)
for i, ax in enumerate(axes.flat):
    ax.hist(
        replicates[order[i]] if order[i] < 9 else y,
        np.arange(-45, 65, 5)
    )
    plot_tools.modify_axes.only_x(ax)
axes[0, 0].set_xlim([-50, 70])
fig.suptitle(
    "Light speed example: Observed data + Replicated datasets.\n"
    "Can you spot which one is the observed data?"
);

In [None]:
minimums = np.min(replicates, axis = 1)
plt.hist(minimums, bins=np.arange(-50, 10, 4))
plt.vlines(x=np.min(y), ymin=0, ymax=30, color = 'red')
plt.show()

In [None]:
# Normal model in PyMC3
with pm.Model() as model:
    sigma = pm.HalfCauchy('sigma', 5)
    m = pm.Normal('m', 0, 50)
    nu = pm.HalfCauchy('nu', 5)
    y_obs = pm.Normal('y_obs', mu = m, sigma = sigma, observed = y)
    
    trace_norm = pm.sample()
    
    replicates = pm.sample_posterior_predictive(trace_norm)

## Student T model

In [None]:
# Student T Model
with pm.Model() as model:
    sigma = pm.HalfCauchy('sigma', 5)
    m = pm.Normal('m', 0, 50)
    nu = pm.HalfCauchy('nu', 5)
    y_obs = pm.StudentT('y_obs', nu = nu, mu = m, sigma = sigma, observed = y)
    
    trace_T = pm.sample()
    
    replicates = pm.sample_posterior_predictive(trace_T)

In [None]:
yrep = replicates['y_obs'][:10]
replicates['y_obs'].shape

In [None]:
# plot them along with the real data set in random order subplot
fig, axes = plt.subplots(5, 2, sharex=True, sharey=True, figsize=(9, 12))
fig.subplots_adjust(top=0.95, wspace=0.4)
order = np.random.permutation(10)
for i, ax in enumerate(axes.flat):
    ax.hist(
        yrep[order[i]] if order[i] < 9 else y,
        np.arange(-75, 100, 5)
    )
    plot_tools.modify_axes.only_x(ax)
axes[0, 0].set_xlim([-75, 100])
fig.suptitle(
    "Light speed example: Observed data + Replicated datasets.\n"
    "Can you spot which one is the observed data?"
);

## Cauchy model

In [None]:
# Cauchy Model
with pm.Model() as model:
    beta = pm.HalfCauchy('beta', 5)
    alpha = pm.Normal('alpha', 0, 50)
    y_obs = pm.Cauchy('y_obs', alpha = alpha, beta = beta, observed = y)
    
    trace_C = pm.sample()
    
    replicates = pm.sample_posterior_predictive(trace_C)

In [None]:
# plot them along with the real data set in random order subplot
fig, axes = plt.subplots(5, 2, sharex=True, sharey=True, figsize=(9, 12))
fig.subplots_adjust(top=0.95, wspace=0.4)
order = np.random.permutation(10)
for i, ax in enumerate(axes.flat):
    ax.hist(
        yrep[order[i]] if order[i] < 9 else y,
        np.arange(-75, 100, 5)
    )
    plot_tools.modify_axes.only_x(ax)
axes[0, 0].set_xlim([-75, 100])
fig.suptitle(
    "Light speed example: Observed data + Replicated datasets.\n"
    "Can you spot which one is the observed data?"
);

In [None]:
pm.compare({'Normal':trace_norm, 'StudentT':trace_T, 'Cauchy':trace_C}, ic='waic', scale = 'deviance')

In [None]:
pm.compare({'Normal':trace_norm, 'StudentT':trace_T, 'Cauchy':trace_C}, ic='loo', scale = 'deviance')