## 【線形回帰】新生児の体重

新生児の性別と妊娠期間の両方を組み込んだ線形回帰モデルを構成してみる。このモデルでは男児と女児の回帰係数が異なる傾きと切片を持つものとしている。

In [None]:
import pymc as pm
import arviz as az

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
plt.rcParams['font.size'] = 12
plt.rcParams['figure.figsize'] = [8, 6]

## Load & Check Data

In [None]:
data = pd.read_csv('babies_weight.csv')

In [None]:
data.head(10)

In [None]:
g = sns.lmplot(data=data, x='weeks', y='weight', hue='gender')

g.fig.set_figwidth(8)
g.fig.set_figheight(6)

plt.xlabel('Period [weeks]')
plt.ylabel('Weight [g]');

## Preprocess & Scale Data

In [None]:
def standardize(x):

    return (x - np.mean(x)) / np.std(x)

In [None]:
x = data['weeks'].values
y = data['weight'].values

gender = data['gender'].values

In [None]:
x_scaled = standardize(x)
y_scaled = standardize(y)

## Define Model & Inference

In [None]:
with pm.Model() as model:

    a = pm.Normal('a', mu=0, sigma=10, shape=2)
    b = pm.Normal('b', mu=0, sigma=10, shape=2)

    mu = a[gender] * x_scaled + b[gender]

    sd = pm.HalfCauchy('sd', 5)

    obs = pm.Normal('obs', mu=mu, sigma=sd, observed=y_scaled)

    a_diff = pm.Deterministic('a_diff', a[1] - a[0])
    b_diff = pm.Deterministic('b_diff', b[1] - b[0])

In [None]:
with model:

    trace = pm.sample(3000, return_inferencedata=False)
    idata = pm.to_inference_data(trace, log_likelihood=True)

In [None]:
az.plot_trace(idata)
plt.gcf().subplots_adjust(wspace=0.5, hspace=0.5)

In [None]:
az.summary(idata)

In [None]:
az.plot_posterior(idata, var_names=['a_diff', 'b_diff'], ref_val=0);

## Export Inference Data

In [None]:
az.to_netcdf(idata, 'idata.nc')