# Estimation of population rate (Binomial distribution)
Alternative of z-test and chi-square test

In [None]:
# Enable the commands below when running this program on Google Colab.
# !pip install arviz==0.7
# !pip install pymc3==3.8
# !pip install Theano==1.0.4

import numpy as np
import pandas as pd
from scipy import stats
import matplotlib.pyplot as plt
import seaborn as sns

import pymc3 as pm

import math

plt.style.use('seaborn-darkgrid')
np.set_printoptions(precision=3)
pd.set_option('display.precision', 3)

## Q. Is the ratio of Soba lovers higher than that of Udon lovers?

In [None]:
SOBA_LOVER = 220  # Number of people who love Soba
UDON_LOVER = 180  # Number of people who love Udon

N = SOBA_LOVER + UDON_LOVER

print(SOBA_LOVER / N)

## Bayesian analysis

In [None]:
with pm.Model() as model:
    # Prior distribution
    p = pm.Uniform('p', 0, 1)

    # Likelihood
    y_pred = pm.Binomial('y_pred', n=400, p=p, observed=SOBA_LOVER)
    
    # Odds
    odds = pm.Deterministic('odds', p / (1 - p))

    trace = pm.sample(21000, chains=5)

In [None]:
chain = trace[1000:]
pm.traceplot(chain)
plt.show()

In [None]:
pm.summary(chain)

In [None]:
print('There are {:.3f} times as many Soba lovers than as Udon lovers.'.format(chain['odds'].mean()))

### RQ1: 「蕎麦好き」が「うどん好き」より多いか？

In [None]:
print('Probability of there are more Soba lovers than Udon lovers: {:.3f} %'.format((chain['p'] > 0.5).mean() * 100))

### RQ2: 「蕎麦好き」は「うどん好き」の1.4倍より多くいるか？

In [None]:
print('Probability of there are 1.4 times as many Soba lovers than as Udon lovers: {:.3f} %'.format((chain['odds'] > 1.4).mean() * 100))

In [None]:
print('At least {:.3f} times as many (95%).'.format(np.quantile(chain['odds'], 0.05)))

In [None]:
pm.plot_posterior(chain['odds'], credible_interval=0.95, point_estimate='mode')
plt.xlabel('Odds')
plt.show()