In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import pymc as pm

from pymc.Matplot import plot as pmplot

sns.set()

In [None]:
number_of_categories = 2

df = pd.read_csv('Howell1.csv',sep=';')
df['index'] = df['male'].apply(lambda x : 1 if x==1 else 2)

df.head()

In [None]:
def regression(x,y): # x is the category index (male/female), y the outcome (height)
   
    height_sigma = pm.Uniform('height_sigma',0,50)
    
    a = pm.Normal('a',178,1 / 20 ** 2,size=nr_categories) # two 'a:s', one for each category
    
    mu = a[x-1] # data category index starts at 1, so shift down to match python zero-based index
    
    height_mu = pm.Normal('height_mu', mu, 1 / height_sigma ** 2)
    
    obs = pm.Normal('obs',height_mu, 1 / height_sigma ** 2,observed=True,value=y)
    
    model = pm.Model([height_sigma,a,height_mu,obs])
    
    map_ = pm.MAP(model)
    map_.fit()
    
    mcmc = pm.MCMC(model)
    
    sample = mcmc.sample(300000,100000,3)
    
    height_sigma_post = mcmc.trace('height_sigma')[:]
    a_post_1 = mcmc.trace('a')[:,0]
    a_post_2 = mcmc.trace('a')[:,1]
    height_mu_post = mcmc.trace('height_mu')[:]
    
    pmplot(height_sigma_post,'height_sigma')
    pmplot(a_post_1,'a_1')
    pmplot(a_post_2,'a_2')
    
    
    result = pd.DataFrame({'height_sigma_post' : height_sigma_post,
                          'a_post_1' : a_post_1,
                            'a_post_2' : a_post_2})
    
    return result

In [None]:
result = regression(df['index'],df['height'])



In [None]:

result['alpha_diff'] = (result.a_post_2 - result.a_post_1)

desc = result.describe().T

CI_89_low = result.quantile(0.055)
CI_89_high = result.quantile(0.945)

CI = pd.DataFrame({'5.5%': CI_89_low,
                  '94.5%':CI_89_high})


stats = desc.join(CI)

stats