In [17]:
import matplotlib.pyplot as plt
import numpy as np
import pymc3 as pm
import pandas as pd
import theano

data = pd.read_csv(pm.get_data('radon.csv'))
data['log_radon'] = data['log_radon'].astype(theano.config.floatX)

county_names = data.county.unique()
county_idx = data.county_code.values

n_counties = len(data.county.unique())

In [11]:
data[['county', 'log_radon', 'floor']].head()

Unnamed: 0,county,log_radon,floor
0,AITKIN,0.832909,1.0
1,AITKIN,0.832909,0.0
2,AITKIN,1.098612,0.0
3,AITKIN,0.09531,0.0
4,ANOKA,1.163151,0.0


In [None]:
with pm.Model() as unpooled_model:
    
    # parameters for each county
    a = pm.Normal('a', 0, sd=100, shape=n_counties)
    b = pm.Normal('b', 0, sd=100, shape=n_counties)
    
    #error
    eps = pm.HalfCauchy('eps', 5)
    
    #prediction
    radon_est = a[county_idx] + b[county_idx]*data.floor.values
    
    # likelihood
    y = pm.Normal('y', radon_est, sd=eps, observed=data.log_radon)
    
    unpooled_trace = pm.sample(200)

Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
  if not np.issubdtype(var.dtype, float):
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [eps_log__, b, a]


In [None]:
with pm.Model() as hierarchical_model:
    # Hyperpriors
    mu_a = pm.Normal('mu_a', mu=0., sd=100**2)
    sigma_a = pm.HalfCauchy('sigma_a', 5)
    mu_b = pm.Normal('mu_b', mu=0., sd=100**2)
    sigma_b = pm.HalfCauchy('sigma_b', 5)
    
    a = pm.Normal('a', mu=mu_a, sd=sigma_a, shape=n_counties)
    b = pm.Normal('b', mu=mu_b, sd=sigma_b, shape=n_counties)
    
    # model error
    eps = pm.HalfCauchy('eps', 5)
    
    radon_est = a[county_idx] + b[county_idx]*data.floor.values
    
    # likelihood
    radon_like = pm.Normal('radon_like', mu=radon_est, sd=eps, observed=data.log_radon)
    
    # inference
    hierarchical_trace = pm.sample(draws=2000, n_init=1000)