In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import pymc as pm

sns.set()

In [None]:
# the logit and logistic below seem to be inversed, but I couldnt figure out McElreaths text otherwise

alpha=1
beta=1

x = np.linspace(0,1,20)
y= np.arange(-10,10) 

def logit_pure(x):
    return (np.exp(x)) / (1 + np.exp(x))

In [None]:
def logistic_pure(p):
    return np.log(p / (1 - p))

In [None]:
fig,axes = plt.subplots(3,1,sharex=True,figsize=(18,12))

axes[0].plot(x,y)
axes[1].plot(x,logit_pure(x=y))
axes[2].plot(x,logistic_pure(logit_pure(x=y)))

In [None]:
df = pd.read_csv('UCBAdmit.csv',sep=';')
df['male'] = df['applicant.gender'].apply(lambda x : 1 if x=='male' else 0)
df

In [None]:
# model: 
# admit ~ Binomial(applications,p)
# p = logit(alpha + beta * male)

# probability is a function depending on gender
# so if female, alpha will be the only determinant for p

# logit function maps the linear function to a non-linear space 0..1

alpha = pm.Normal('alpha',0, 1 / 10 ** 2)
beta = pm.Normal('beta',0,1 / 10 ** 2)

x = df['male']

@pm.deterministic
def logit(alpha=alpha,beta=beta,x=x):
    return (np.exp(alpha+beta*x)) / (1 + np.exp(alpha+beta*x))

lkh = pm.Binomial('lkh',n=df['applications'],p=logit,observed=True,value=df['admit'])

model = pm.Model([alpha,beta,logit,lkh])

mcmc = pm.MCMC(model)
sample = mcmc.sample(50000,10000,2)

In [None]:
post_alpha = mcmc.trace('alpha')[:]
post_beta = mcmc.trace('beta')[:]

result = pd.DataFrame({'post_alpha' : post_alpha,
                      'post_beta' : post_beta})

result['male_p'] = logit_pure(result['post_alpha'] + result['post_beta'])
result['female_p'] = logit_pure(result['post_alpha'])
result['male_advantage'] = result['male_p'] - result['female_p']

print (result.head())
result.describe()



In [None]:
plt.scatter(df['male'],df['admit'])

In [None]:
nr_rows = 100000
nr_applications = 4000

rows = np.random.choice(result.index,replace=True,size=nr_rows)

m_admitted = pm.rbinomial(n=nr_applications,p=result.iloc[rows].male_p,size=nr_rows)
f_admitted = pm.rbinomial(n=nr_applications,p = result.iloc[rows].female_p,size=nr_rows)

male_advantage = (m_admitted / f_admitted)
print (male_advantage.mean()) # same as 0.44 / 0.30 above
print (m_admitted.mean() / nr_applications)
print (f_admitted.mean() / nr_applications)
plt.hist(male_advantage)