## 【ポアソン分布】出張回数

出張回数のデータをポアソン分布にあてはめてみる。

In [None]:
import pymc as pm
import arviz as az

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
plt.rcParams['font.size'] = 12

## Load & Check Data

In [None]:
data = pd.read_csv('data.csv')

In [None]:
data

In [None]:
x = data['count'].values

print(x)

In [None]:
np.mean(x)

In [None]:
np.var(x)

In [None]:
sns.histplot(x, discrete=True, shrink=0.7)

plt.xlim(-1, 5)
plt.xlabel('Number of Trips');

## Define Model & Inference

In [None]:
with pm.Model() as model:

    mu = pm.HalfNormal('mu', sigma=10)

    obs = pm.Poisson('obs', mu=mu, observed=x)

In [None]:
with model:

    trace = pm.sample(3000, return_inferencedata=False)
    idata = pm.to_inference_data(trace, log_likelihood=True)

pm.to_inference_data 関数は、log_likelihood を True に指定しないと InferenceData Object に対数尤度が定義されません。対数尤度がないと、WAIC等が計算できないため、WAIC等が必要な場合はオプションで指定しておく必要があります。

**pm.to_inferece_data 関数**：https://www.pymc.io/projects/docs/en/stable/api/generated/pymc.to_inference_data.html#pymc.to_inference_data

In [None]:
idata

## Check MCMC-samples

In [None]:
az.plot_trace(idata);

In [None]:
az.summary(idata)

In [None]:
az.plot_posterior(idata);

## Posterior Predictive Check

In [None]:
with model:

    idata_ppc = pm.sample_posterior_predictive(idata)

In [None]:
az.plot_ppc(idata_ppc, num_pp_samples=1000);

## Save Inference Data

In [None]:
idata.to_netcdf('idata.nc')