## 【一般化線形混合モデル】肉まんの販売個数

各店舗での肉まんの販売個数を事後予測分布として予測させてみる。実際のデータと比較することで、モデルの妥当性をチェックしてみる。

In [None]:
import pymc as pm
import arviz as az

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
plt.rcParams['font.size'] = 12
plt.rcParams['figure.figsize'] = [8, 6]

## Load Data

In [None]:
data = pd.read_csv('data.csv')

In [None]:
data.head(20)

## Preprocess & Scale Data

In [None]:
x = data['temperature'].values
y = data['num_sold'].values

store_id = data['store_id'].values

num_stores = np.max(store_id) + 1

In [None]:
x_mu = np.mean(x)
x_sd = np.std(x)

x_scaled = (x - x_mu) / x_sd

## Define Model & Inference

In [None]:
with pm.Model() as model:

    shared_x_scaled = pm.Data('x_scaled', x_scaled, mutable=True)
    shared_store_id = pm.Data('store_id', store_id, mutable=True)

    shared_y = pm.Data('y', y, mutable=True)

    a = pm.Normal('a', mu=0, sigma=10)
    b = pm.Normal('b', mu=0, sigma=10)

    s = pm.HalfCauchy('s', 5)
    r = pm.Normal('r', mu=0, sigma=s, shape=(num_stores,))

    theta = a * shared_x_scaled + r[shared_store_id] + b

    mu = pm.math.exp(theta)

    obs = pm.Poisson('obs', mu=mu, observed=shared_y)

In [None]:
with model:

    trace = pm.sample(3000, tune=3000, target_accept=0.99, return_inferencedata=False)
    idata = pm.to_inference_data(trace)

## Check MCMC-samples

In [None]:
az.plot_trace(idata)
plt.gcf().subplots_adjust(wspace=0.5, hspace=0.5)

In [None]:
az.summary(idata)

## Posterior Prediction Check

気温を０度から１９度まで変化させたときに、ある店舗（ID=4）において肉まんが何個売れるかを予測してみる。

In [None]:
x_new = np.arange(0, 20)

x_scaled_new = (x_new - x_mu) / x_sd

In [None]:
store_id_new = 4
store_id_dup = np.ones_like(x_new, dtype=int) * store_id_new

In [None]:
y_dummy = np.zeros_like(x_new, dtype=int)

In [None]:
with model:

    pm.set_data({
        'x_scaled': x_scaled_new,
        'store_id': store_id_dup,
        'y': y_dummy
    })

    ppc = pm.sample_posterior_predictive(idata, return_inferencedata=False, var_names=['obs'])

In [None]:
ppc['obs'].shape

In [None]:
obs_pred = ppc['obs']

In [None]:
obs_mean = obs_pred.mean(axis=0).mean(axis=0)

In [None]:
# 事後予測分布の表示
az.plot_hdi(x_new, obs_pred)
plt.plot(x_new, obs_mean)

# 観測データの表示
idx = data['store_id'] == store_id_new
plt.scatter(data[idx]['temperature'], data[idx]['num_sold'], s=100)

plt.xticks(x_new)
plt.xlabel('Temperature')
plt.ylabel('Number of Sales');

## Export Inference Data

In [None]:
az.to_netcdf(idata, 'idata.nc')