In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import pymc3 as pm
import arviz as az

sns.set()

In [None]:
# HIERARCHICAL MODEL that demonstrates SHRINKAGE,
# using binomial likelihood and logit-link

N_data = [30,30,30]
G_data = [15,3,3]

data = []

# if repeat is given as arr, it must have a broadcastable shape, and 
# then each element of input array is repeated arr[i] times

group_idx = np.repeat(range(len(N_data)),N_data)

for i in range(len(N_data)):
    
    data.extend(np.repeat([1,0],[G_data[i],N_data[i] - G_data[i]]))

data

df = pd.DataFrame({'water' : data,
                  'group_idx' : group_idx})

df

In [None]:
df['water'].mean()

In [None]:
df.groupby('group_idx').mean()

In [None]:
def logit_pure(x):
    return (np.exp(x)) / (1 + np.exp(x))

def logistic_pure(p):
    return np.log(p / (1 - p))

In [None]:
grp_idx = df['group_idx'].values

model = pm.Model()
with model:
    alpha_bar = pm.Normal('alpha_bar',0,1.5)
    sigma = pm.Exponential('sigma',1)
    
    alpha  = pm.Normal('alpha',alpha_bar,shape=len(np.unique(df['group_idx'])))
    
    logit = np.exp(alpha[grp_idx]) / (
            1 + np.exp(alpha[grp_idx] ))

    obs = pm.Bernoulli('obs',logit,observed=df['water'])
    
    trace = pm.sample(500,tune=500,random_seed=4711)
    
    summary = az.summary(trace,hdi_prob=0.89)
    result = pm.trace_to_dataframe(trace)

In [None]:
summary

In [None]:
with model:
    az.plot_trace(trace)

In [None]:
plt.hist(trace['alpha_bar'])

In [None]:
p_result = pd.DataFrame()
p_result['alpha_bar'] = logit_pure(result['alpha_bar'])
p_result['alpha__0'] = logit_pure(result['alpha__0'])
p_result['alpha__1'] = logit_pure(result['alpha__1'])
p_result['alpha__2'] = logit_pure(result['alpha__2'])
p_result.mean()

In [None]:
p_result['alpha_bar'].plot(kind='hist')