# Estimation of population proportion (Multinomial distribution)
Alternative of z-test and chi-square test

In [None]:
# Enable the commands below when running this program on Google Colab.
# !pip install arviz==0.7
# !pip install pymc3==3.8
# !pip install Theano==1.0.4
 
import numpy as np
import pandas as pd
from scipy import stats
import matplotlib.pyplot as plt
import seaborn as sns

import pymc3 as pm

import math

plt.style.use('seaborn-darkgrid')
np.set_printoptions(precision=3)
pd.set_option('display.precision', 3)

## Q: Students in a junior high school were asked which person they consulted the most about their problems. Estimate the population proportion.

In [None]:
columns = ['Parent', 'Friend', 'Brother', 'Teacher', 'None', 'Others']
data = pd.DataFrame([(26, 40, 8, 2, 23, 8)], columns=columns)
display(data)
# print(data.values[0])
observed = data.values[0]

In [None]:
with pm.Model() as model:
    # Prior distribution
    p_ = pm.Uniform('p_', 0, 1, shape=len(columns))
    p = pm.Deterministic('p', p_ / pm.math.sum(p_))

    # Likelihood
    y_pred = pm.Multinomial('y_pred', n=sum(observed), p=p, observed=observed)

    trace = pm.sample(21000, chains=5)

In [None]:
chain = trace[1000:]
pm.traceplot(chain)
plt.show()

In [None]:
# print(len(trace))
# print(len(chain))
# print(trace.nchains)
# print(trace.get_values('p'))
# print(chain.get_values('p').shape)
# for samples in trace.get_values('p', combine=False):
#     print(samples.shape)
#     print(samples)

In [None]:
pm.summary(chain, ['p'])

### RQ1: どのカテゴリとどのカテゴリの間に差があるのか？

In [None]:
result = [[(chain['p'][:,i] > chain['p'][:,j]).mean() for j in range(len(columns))] for i in range(len(columns))]

print('row (-) > column (|)')
comp_table = pd.DataFrame(result, index=columns, columns=columns)
display(comp_table)

### RQ2: 「友達」は他の誰よりも相談される比率が高い

In [None]:
p = chain['p']
p_f = p[:,1]

# Friend > Parent, Brother, Teacher, None, Others
val_1 = (p[:,0] < p_f).mean() * (p[:,2] < p_f).mean() * (p[:,3] < p_f).mean() *(p[:,4] < p_f).mean() * (p[:,5] < p_f).mean()
print('Friend > Parent, Brother, Teacher, None, Others: {:.3f} %'.format(val_1 * 100))

# Friend > Brother, Teacher, None, Others
val_2 = (p[:,2] < p_f).mean() * (p[:,3] < p_f).mean() *(p[:,4] < p_f).mean() * (p[:,5] < p_f).mean()
print('Friend > Brother, Teacher, None, Others: {:.3f} %'.format(val_2 * 100))

### RQ3: 「先生」は他の誰よりも相談される比率が低い


In [None]:
p_t = p[:,3]

# Teacher < Parent, Friend, Brother, None, Others
val_3 = (p_t < p[:,0]).mean() * (p_t < p[:,1]).mean() * (p_t < p[:,2]).mean() * (p_t < p[:,4]).mean() * (p_t < p[:,5]).mean()
print('Teacher < Parent, Friend, Brother, None, Others: {:.3f}'.format(val_3 * 100))

# Teacher < Parent, Friend, None, Others
val_4 = (p_t < p[:,0]).mean() * (p_t < p[:,1]).mean() * (p_t < p[:,4]).mean() * (p_t < p[:,5]).mean()
print('Teacher < Parent, Friend, None, Others: {:.3f}'.format(val_4 * 100))

# Teacher < Parent, Friend, Brother, None
val_5 = (p_t < p[:,0]).mean() * (p_t < p[:,1]).mean() * (p_t < p[:,2]).mean() * (p_t < p[:,4]).mean()
print('Teacher < Parent, Friend, Brother, None: {:.3f}'.format(val_5 * 100))