# Mental Health Problem

In [None]:
# Enable the commands below when running this program on Google Colab.
# !pip install arviz==0.7
# !pip install pymc3==3.8
# !pip install Theano==1.0.4

import numpy as np
import pandas as pd
from scipy import stats
import matplotlib.pyplot as plt
import seaborn as sns

import pymc3 as pm
import theano.tensor as tt

plt.style.use('seaborn-darkgrid')
np.set_printoptions(precision=3)
pd.set_option('display.precision', 3)

In [None]:
AFTER = [73,72,56,58,71,42,78,77,75,72,56,71,69,77,84,51,62,88,56,58,84,91,71,82,81,77,65,78,79,60,66,70,65,57,64,61,56,67,75,64,68,67,80,55,48,85,56,62,65,79]
BEFORE = [62,54,19,54,47,22,35,77,64,60,27,41,41,44,57,16,42,89,40,67,69,46,74,62,60,87,32,42,73,25,42,57,31,35,33,38,43,53,55,62,67,56,76,5,31,70,66,65,34,48]

In [None]:
# Data vsualization
plt.boxplot([AFTER, BEFORE], labels=['After', 'Before'])
plt.ylabel('Score')
plt.show()

In [None]:
# Summary
data = pd.DataFrame([AFTER, BEFORE], index=['After', 'Before']).transpose()
data.describe()

## Bayesian analysis

In [None]:
with pm.Model() as model:
    # Prior distribution
    mu = pm.Normal('mu', 0, 100, shape=2)
    sigma = pm.Uniform('sigma', 0, 100, shape=2)

    C_triu = pm.LKJCorr('omega', n=2, p=2)
    C = tt.fill_diagonal(C_triu[np.zeros((2, 2), dtype=np.int64)], 1)
    sigma_diag = tt.nlinalg.diag(sigma)
    cov = tt.nlinalg.matrix_dot(sigma_diag, C, sigma_diag)

    # Likelihood
    y_pred = pm.MvNormal('y_pred', mu=mu, cov=cov, observed=data.values)

    # Difference of average values
    delta_mu = pm.Deterministic('mu1 - mu2', mu[0] - mu[1])

    trace = pm.sample(21000, chains=5)

In [None]:
chain = trace[1000:]
pm.traceplot(chain)
plt.show()

In [None]:
pm.summary(chain)

### RQ1: 第1群の平均値が第2群の平均値より高い確率

In [None]:
print('p(mu1 - mu2 > 0) = {:.3f}'.format((chain['mu'][:,0] - chain['mu'][:,1] > 0).mean()))
# 「援助後のメンタルヘルス得点は、援助前のメンタルヘルス得点より高い」という研究仮説が正しい確率は100%

### RQ2: 第1群と第2群の平均値の差の点推定、平均値の差の区間推定

In [None]:
print('Point estimation (difference of mean): {:.3f}'.format(chain['mu1 - mu2'].mean()))
# 平均値差に関するEAP推定値
hpd_0025 = np.quantile(chain['mu1 - mu2'], 0.025)
hpd_0975 = np.quantile(chain['mu1 - mu2'], 0.975)
print('Credible Interval (95%): ({:.3f}, {:.3f})'.format(hpd_0025, hpd_0975))
# 平均値差は95%の確率で上記の区間に入る

### RQ3: 平均値の差の片側区間推定の下限・上限

In [None]:
hpd_005 = np.quantile(chain['mu1 - mu2'], 0.05)
hpd_0950 = np.quantile(chain['mu1 - mu2'], 0.95)
print('At most (95%): {:.3f}'.format(hpd_0950))  # 95%の確信で高々これだけの差がある
print('At least (95%): {:.3f}'.format(hpd_005))  # 95%の確信で少なくともこれだけの差がある

### RQ4: 平均値の差が基準点cより大きい確率

In [None]:
print('p(mu1 - mu2 > 10) = {:.3f}'.format((chain['mu'][:,0] - chain['mu'][:,1] > 10).mean()))
print('p(mu1 - mu2 > 15) = {:.3f}'.format((chain['mu'][:,0] - chain['mu'][:,1] > 12).mean()))
print('p(mu1 - mu2 > 20) = {:.3f}'.format((chain['mu'][:,0] - chain['mu'][:,1] > 14).mean()))