# Independent g x 2 cross table
Alternative of z-test and chi-square test

In [None]:
# Enable the commands below when running this program on Google Colab.
# !pip install arviz==0.7
# !pip install pymc3==3.8
# !pip install Theano==1.0.4

import numpy as np
import pandas as pd
from scipy import stats
import matplotlib.pyplot as plt
import seaborn as sns

import pymc3 as pm

import math

plt.style.use('seaborn-darkgrid')
np.set_printoptions(precision=3)
pd.set_option('display.precision', 3)

## Q. People (junior high school, high school, university students, and adults) were asked if they had ever been on a date with the opposite sex. Estimate the population ratio.

In [None]:
index = ['Junior High', 'High', 'University', 'Adult']
data = pd.DataFrame([[38, 63], [51, 48], [66, 34], [79, 23]], columns=['Yes', 'No'], index=index)
display(data)

In [None]:
with pm.Model() as model_multi:
    # Prior distribution
    p1 = pm.Uniform('p1', 0, 1)
    p2 = pm.Uniform('p2', 0, 1)
    p3 = pm.Uniform('p3', 0, 1)
    p4 = pm.Uniform('p4', 0, 1)

    # Likelihood
    y1_pred = pm.Binomial('y1_pred', n=data.loc['Junior High'].sum(), p=p1, observed=data.loc['Junior High']['Yes'])
    y2_pred = pm.Binomial('y2_pred', n=data.loc['High'].sum(), p=p2, observed=data.loc['High']['Yes'])
    y3_pred = pm.Binomial('y3_pred', n=data.loc['University'].sum(), p=p3, observed=data.loc['University']['Yes'])
    y4_pred = pm.Binomial('y4_pred', n=data.loc['Adult'].sum(), p=p4, observed=data.loc['Adult']['Yes'])

    trace = pm.sample(21000, chains=5)

In [None]:
chain = trace[1000:]
pm.traceplot(chain)
plt.show()

In [None]:
pm.summary(chain)

### RQ1: どのカテゴリとどのカテゴリに差があるのか？

In [None]:
result = [[(chain['p' + str(i + 1)] > chain['p' + str(j + 1)]).mean() for j in range(len(index))] for i in range(len(index))]

print('row (-) > column (|)')
comp_table = pd.DataFrame(result, index=index, columns=index)
display(comp_table)

### RQ2: デートの経験比率は年代とともに上昇するのか？

In [None]:
val_1 = (chain['p1'] < chain['p2']).mean() * (chain['p2'] < chain['p3']).mean() * (chain['p3'] < chain['p4']).mean()
print('Adult > University > High > Junior High: {:.3f} %'.format(val_1 * 100))

### RQ3: デートの経験比率は年代とともに上昇するが、大学生と社会人の差は問わない。

In [None]:
val_2 = (chain['p1'] < chain['p2']).mean() * (chain['p2'] < chain['p3']).mean() * (chain['p2'] < chain['p4']).mean()
print('Adult, University > High > Junior High: {:.3f} %'.format(val_2 * 100))

### RQ4: 中学生はどの年代よりもデートの経験比率が低い

In [None]:
val_3 = (chain['p1'] < chain['p2']).mean() * (chain['p1'] < chain['p3']).mean() * (chain['p1'] < chain['p4']).mean()
print('Junior High < High, University, Adult: {:.3f} %'.format(val_3 * 100))