## 【線形回帰】新生児の体重

線形回帰モデルでの事後予測チェックを行ってみる。

In [None]:
import pymc as pm
import arviz as az

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
plt.rcParams['font.size'] = 12
plt.rcParams['figure.figsize'] = [8, 6]

## Load & Check Data

In [None]:
data = pd.read_csv('babies_weight.csv')

In [None]:
data.head(10)

## Preprocess & Scale Data

In [None]:
x = data['weeks'].values
y = data['weight'].values

gender = data['gender'].values

In [None]:
x_mu = x.mean()
x_sd = x.std()

x_scaled = (x - x_mu) / x_sd

In [None]:
y_mu = y.mean()
y_sd = y.std()

y_scaled = (y - y_mu) / y_sd

## Define Model & Inference

In [None]:
with pm.Model() as model:

    shared_y_scaled = pm.Data('y_scaled', y_scaled, mutable=True)
    shared_x_scaled = pm.Data('x_scaled', x_scaled, mutable=True)

    shared_gender = pm.Data('gender', gender, mutable=True)

    a = pm.Normal('a', mu=0, sigma=10)
    b = pm.Normal('b', mu=0, sigma=10, shape=2)

    mu = a * shared_x_scaled + b[shared_gender]

    sd = pm.HalfCauchy('sd', 5)

    obs = pm.Normal('obs', mu=mu, sigma=sd, observed=shared_y_scaled)

In [None]:
with model:

    trace = pm.sample(3000, return_inferencedata=False)
    idata = pm.to_inference_data(trace)

In [None]:
az.plot_trace(idata)
plt.gcf().subplots_adjust(wspace=0.5, hspace=0.5)

In [None]:
az.summary(idata)

## Posterior Predictive Check / Case : Baby Boy

In [None]:
x_new = np.arange(35, 44)

x_scaled_new = (x_new - x_mu) / x_sd
y_scaled_dum = np.zeros_like(x_new, dtype=int)

In [None]:
# Case : Baby Boy
gender_new = np.zeros_like(x_new, dtype=int)

In [None]:
with model:

    pm.set_data({
        'x_scaled': x_scaled_new,
        'y_scaled': y_scaled_dum,
        'gender': gender_new
    })

    ppc = pm.sample_posterior_predictive(idata, return_inferencedata=False, var_names=['obs'])

In [None]:
obs_pred = ppc['obs']

In [None]:
obs_pred.shape

In [None]:
obs_mean = obs_pred.mean(axis=0).mean(axis=0)

In [None]:
# 事後予測分布の表示
az.plot_hdi(x_scaled_new, obs_pred)
plt.plot(x_scaled_new, obs_mean)

# 観測データの表示
sns.scatterplot(x=x_scaled, y=y_scaled, hue=gender, s=80)

plt.xlabel('Period (Standardized)')
plt.ylabel('Weight (Standardized)');

## Posterior Predictive Check / Case : Baby Girl

In [None]:
# Case : Baby Girl
gender_new = np.ones_like(x_new, dtype=int)

In [None]:
with model:

    pm.set_data({
        'x_scaled': x_scaled_new,
        'y_scaled': y_scaled_dum,
        'gender': gender_new
    })

    ppc = pm.sample_posterior_predictive(idata, return_inferencedata=False, var_names=['obs'])

In [None]:
obs_pred = ppc['obs']

In [None]:
obs_mean = obs_pred.mean(axis=0).mean(axis=0)

In [None]:
# 事後予測分布の表示
az.plot_hdi(x_scaled_new, obs_pred)
plt.plot(x_scaled_new, obs_mean)

# 観測データの表示
sns.scatterplot(x=x_scaled, y=y_scaled, hue=gender, s=80)

plt.xlabel('Period (Standardized)')
plt.ylabel('Weight (Standardized)');