# Independent 2 x 2 cross table
Alternative of z-test and chi-square test

In [None]:
# Enable the commands below when running this program on Google Colab.
# !pip install arviz==0.7
# !pip install pymc3==3.8
# !pip install Theano==1.0.4

import numpy as np
import pandas as pd
from scipy import stats
import matplotlib.pyplot as plt
import seaborn as sns

import pymc3 as pm

import math

plt.style.use('seaborn-darkgrid')
np.set_printoptions(precision=3)
pd.set_option('display.precision', 3)

## Q. The number of respondents who agreed or disagreed with Proposition A was gathered by gender. Estimate the population proportion.

In [None]:
data = pd.DataFrame([[71, 49], [42, 83]], columns=['Agree', 'Disagree'], index=['Male', 'Female'])
display(data)

## Bayesian analysis

In [None]:
with pm.Model() as model:
    # Prior distribution
    p1 = pm.Uniform('p1', 0, 1)
    p2 = pm.Uniform('p2', 0, 1)

    # Likelihood
    y1_pred = pm.Binomial('y1_pred', n=data.loc['Male'].sum(), p=p1, observed=data.loc['Male']['Agree'])
    y2_pred = pm.Binomial('y2_pred', n=data.loc['Female'].sum(), p=p2, observed=data.loc['Female']['Agree'])

    # Difference of ratios
    diff = pm.Deterministic('diff', p1 - p2)

    # Ratio of ratios
    ratio = pm.Deterministic('ratio', p1 / p2)

    # Odds
    odds1 = pm.Deterministic('odds1', p1 / (1 - p1))
    odds2 = pm.Deterministic('odds2', p2 / (1 - p2))

    # Odds ratio
    odds_ratio = pm.Deterministic('odds_ratio', odds1 / odds2)

    trace = pm.sample(21000, chains=5)

In [None]:
chain = trace[1000:]
pm.traceplot(chain)
plt.show()

In [None]:
pm.summary(chain)

In [None]:
print('Agree rate (Male): {:.3f} % ({:.3f})'.format(chain['p1'].mean() * 100, chain['p1'].std()))
print('Agree rate (Female): {:.3f} % ({:.3f})'.format(chain['p2'].mean() * 100, chain['p2'].std()))

### RQ1: 男性の賛成率は女性の何倍か？

In [None]:
print('Agree rate of Male is {:.3f} times higher than that of Female.'.format(chain['ratio'].mean()))

### RQ2: 男性の賛成率と女性の賛成率の差は0.1より大きい

In [None]:
print('Probability of agree rate of Male is 0.1 higher than that of Female: {:.3f} %'.format((chain['diff'] > 0.1).mean() * 100))

### RQ3: 男性の賛成率は女性の賛成率よりも1.5倍より高い

In [None]:
print('Probability of agree rate of Male is 1.5 times higher than that of Female: {:.3f} %'.format((chain['ratio'] > 1.5).mean() * 100))