# Adviser for High School Students

In [None]:
# Enable the commands below when running this program on Google Colab.
# !pip install arviz==0.7
# !pip install pymc3==3.8
# !pip install Theano==1.0.4
 
import numpy as np
import pandas as pd
from scipy import stats
import matplotlib.pyplot as plt
import seaborn as sns

import pymc3 as pm

import math

plt.style.use('seaborn-darkgrid')
np.set_printoptions(precision=3)
pd.set_option('display.precision', 3)

In [None]:
columns = ['Parent', 'Friend', 'Brother', 'Teacher', 'None', 'Others']
data = pd.DataFrame([(30, 12, 4, 20, 22, 8)], columns=columns)
display(data)
observed = data.values[0]

In [None]:
with pm.Model() as model:
    # Prior distribution
    p_ = pm.Uniform('p_', 0, 1, shape=len(columns))
    p = pm.Deterministic('p', p_ / pm.math.sum(p_))

    # Likelihood
    y_pred = pm.Multinomial('y_pred', n=sum(observed), p=p, observed=observed)

    trace=pm.sample(21000, chains=5)

In [None]:
chain = trace[1000:]
pm.traceplot(chain)
plt.show()

In [None]:
pm.summary(chain, ['p'])

### RQ1: どのカテゴリとどのカテゴリの間に差があるのか？

In [None]:
# 行の水準が列の水準より大きい確率
result = [[(chain['p'][:,i] > chain['p'][:,j]).mean() for j in range(len(columns))] for i in range(len(columns))]

print('row (-) > column (|)')
comp_table = pd.DataFrame(result, index=columns, columns=columns)
display(comp_table)

### RQ2: 「親」は他の誰よりも相談される比率が高い

In [None]:
p = chain['p']
p_p = p[:,0]
p_f = p[:,1]
p_b = p[:,2]
p_t = p[:,3]
p_n = p[:,4]
p_o = p[:,5]

In [None]:
# Parent > Friend, Brother, Teacher, None, Others
val_1 = (p_p > p_f).mean() * (p_p > p_b).mean() * (p_p > p_t).mean() * (p_p > p_n).mean() * (p_p > p_o).mean()
print('Parent > Friend, Brother, Teacher, None, Others: {:.3f} %'.format(val_1 * 100))

### RQ3: 「相談しない」を除いて、「親」は「友達」「兄弟」「先生」「その他」よりも相談される比率が高い

In [None]:
# Parent > Friend, Brother, Teacher, Others
val_2 = (p_p > p_f).mean() * (p_p > p_b).mean() * (p_p > p_t).mean() * (p_p > p_o).mean()
print('Parent > Friend, Brother, Teacher, Others: {:.3f} %'.format(val_2 * 100))

### RQ4: 「先生」を除いて、「親」は「友達」「兄弟」「その他」よりも相談される比率が高い

In [None]:
# Parent > Friend, Brother, Others
val_3 = (p_p > p_f).mean() * (p_p > p_b).mean() * (p_p > p_o).mean()
print('Parent > Friend, Brother, Others: {:.3f} %'.format(val_3 * 100))