## 【ポアソン分布】野菜の収穫量

野菜の収穫量のデータをポアソン分布へあてはめてみる。

In [None]:
import pymc as pm
import arviz as az

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
plt.rcParams['font.size'] = 12

## Load & Check Data

In [None]:
data = pd.read_csv('data.csv')

In [None]:
data

In [None]:
x = data.query('group == 0')['yield'].values

print(x)

In [None]:
x.shape

In [None]:
np.mean(x)

In [None]:
np.var(x)

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(12, 4))

sns.histplot(x, binwidth=1, ax=axes[0])
axes[0].set_xlabel('Yield')

sns.histplot(x, binwidth=3, ax=axes[1])
axes[1].set_xlabel('Yield')

plt.tight_layout()

## Define Model & Inference

In [None]:
with pm.Model() as model:

    mu = pm.HalfNormal('mu', sigma=10)

    obs = pm.Poisson('obs', mu=mu, observed=x)

In [None]:
with model:

    trace = pm.sample(3000, return_inferencedata=False)
    idata = pm.to_inference_data(trace)

pm.sample 関数は、return_inferencedata を False に指定しないと、戻り値が InferenceData Object になります。

**pm.sample 関数**：
https://www.pymc.io/projects/docs/en/stable/api/generated/pymc.sample.html

## Check MultiTrace Object & InferenceData Object

In [None]:
trace

In [None]:
trace['mu']

In [None]:
trace['mu'].shape

In [None]:
idata

In [None]:
idata.posterior['mu']

In [None]:
idata.posterior['mu'].shape

## Check MCMC-samples

In [None]:
az.plot_trace(idata);

In [None]:
az.summary(idata)

In [None]:
az.plot_posterior(idata);
#az.plot_posterior(idata, hdi_prob=0.95);

## Posterior Predictive Check

In [None]:
with model:

    ppc = pm.sample_posterior_predictive(idata, return_inferencedata=False)
    idata_ppc = pm.to_inference_data(posterior_predictive=ppc)

pm.sample_posterior_predictive 関数には、InfereceData Object である idata を渡しても、 MultiTrace Object である trace を渡しても、同じように動きます。

pm.sample_posterior_predictive 関数：
https://www.pymc.io/projects/docs/en/stable/api/generated/pymc.sample_posterior_predictive.html

In [None]:
ppc_samples = ppc['obs']
ppc_samples.shape

In [None]:
ppc_samples = ppc_samples.reshape(-1, 50)
ppc_samples.shape

In [None]:
# 記述統計量（平均・分散）によるチェック

ppc_mean = ppc_samples.mean(axis=1)
ppc_var = ppc_samples.var(axis=1)

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(12, 4))

sns.kdeplot(ppc_mean, ax=axes[0])
axes[0].axvline(x.mean(), color='r', linestyle='dashed')
axes[0].set_xlabel('stats = mean')

sns.kdeplot(ppc_var, ax=axes[1])
axes[1].axvline(x.var(), color='r', linestyle='dashed')
axes[1].set_xlabel('stats = var')

plt.tight_layout()

In [None]:
# 分布によるチェック

az.plot_ppc(idata_ppc, kind='kde', num_pp_samples=50, figsize=(12, 4));

In [None]:
az.plot_ppc(idata_ppc, kind='kde', num_pp_samples=3000, figsize=(12, 4));