In [5]:
import pymc3 as pm
import numpy as np
import pandas as pd
from theano import shared

In [3]:
# reading in data
campaign_data = pd.read_csv('spend_data_distribute_short.csv')
campaign_data.head()

Unnamed: 0,business_id,billed amount,log_billed_amount,product,product_idx,size,size_idx,region,region_idx,industry,industry_idx
0,b03051,896.413102,6.798401,Display,0,size_less_50,1,northeast,0,other_ind,0
1,b10014,818.637745,6.707642,Display,0,size_50_100,3,midwest,2,other_ind,0
2,b01342,452.298445,6.114342,Display,0,unavailable,2,northeast,0,other_ind,0
3,b11218,871.714101,6.770462,Email,1,size_less_50,1,south,1,other_ind,0
4,b00181,1831.665464,7.512981,Display,0,size_less_50,1,midwest,2,other_ind,0


In [4]:
model_dict = {}
# non-hierarchical / pooled
with pm.Model() as model:
    product_betas = pm.Normal('product_betas', mu=0., sd=1.0, 
                              shape=campaign_data['product'].nunique()-1)
    size_betas = pm.Normal('size_betas', mu=0., sd=1.0, 
                           shape=campaign_data['size'].nunique()-1)
    region_betas = pm.Normal('region_betas', mu=0., sd=1.0,
                             shape=campaign_data['region'].nunique()-1)
    industry_betas = pm.Normal('industry_betas', mu=0., sd=1.0,
                               shape=campaign_data['industry'].nunique()-1)
    intercept = pm.Normal('Intercept', 0., sd=1.0)
    sigma = pm.HalfCauchy('sigma', beta=1)
    
    mu = intercept+pm.math.dot(product_betas, shared_vars['product'].T)+\
        pm.math.dot(size_betas, shared_vars['size'].T)+\
        pm.math.dot(region_betas, shared_vars['region'].T)+\
        pm.math.dot(industry_betas, shared_vars['industry'].T)
    mu_pred = pm.Normal('spend', mu=mu, sd=sigma, 
                        observed=campaign_data['log_billed_amount'])
model_dict['pooled'] = model

NameError: name 'shared_vars' is not defined

In [None]:
# hiearchical - product
with pm.Model() as model:
    # Hyperpriors
    mu_a = pm.Normal('mu_alpha', mu=0., sd=1)
    sigma_a = pm.HalfCauchy('sigma_alpha', beta=1)
    mu_b = pm.Normal('mu_beta', mu=0., sd=1)
    sigma_b = pm.HalfCauchy('sigma_beta', beta=1)
    
    # product-level intercept
    product_alphas = pm.Normal('product_alphas', mu=mu_a, sd=sigma_a, 
                              shape=campaign_data['product'].nunique())
    
    # betas
    region_betas = pm.Normal('region_betas', mu=mu_b, sd=sigma_b,
                             shape=campaign_data['region'].nunique()-1)
    size_betas = pm.Normal('size_betas', mu=mu_b, sd=sigma_b, 
                           shape=campaign_data['size'].nunique()-1)
    industry_betas = pm.Normal('industry_betas', mu=mu_b, sd=sigma_b,
                               shape=campaign_data['industry'].nunique()-1)
    sigma = pm.HalfCauchy('sigma', beta=1)
    
    mu = product_alphas[shared_vars['product_idx']]+\
        pm.math.dot(size_betas, shared_vars['size'].T)+\
        pm.math.dot(region_betas, shared_vars['region'].T)+\
        pm.math.dot(industry_betas, shared_vars['industry'].T)
    mu_pred = pm.Normal('spend', mu=mu, sd=sigma, 
                        observed=campaign_data['log_billed_amount'])
model_dict['product_alphas'] = model

In [None]:
# hiearchical - product and region
with pm.Model() as model:
    # Hyperpriors
    mu_a_product = pm.Normal('mu_a_product', mu=0., sd=1)
    sigma_a_product = pm.HalfCauchy('sigma_a_product', beta=1)
    # removed due to poor performance
    #mu_a_region = pm.Normal('mu_a_region', mu=0., sd=1)
    #sigma_a_region = pm.HalfCauchy('sigma_a_region', beta=1)
    mu_b = pm.Normal('mu_beta', mu=0., sd=1)
    sigma_b = pm.HalfCauchy('sigma_beta', beta=1)    
    
    # product and region-level intercepts
    product_alphas = pm.Normal('product_alphas', mu=mu_a_product, sd=sigma_a_product, 
                              shape=campaign_data['product'].nunique())
    region_alphas = pm.Normal('region_alphas', mu=0, sd=1.0,
                         shape=campaign_data['region'].nunique())
    
    # betas
    size_betas = pm.Normal('size_betas', mu=mu_b, sd=sigma_b, 
                           shape=campaign_data['size'].nunique()-1)
    industry_betas = pm.Normal('industry_betas', mu=mu_b, sd=sigma_b,
                               shape=campaign_data['industry'].nunique()-1)
    sigma = pm.HalfCauchy('sigma', beta=1)
    
    mu = product_alphas[shared_vars['product_idx']]+\
        region_alphas[shared_vars['region_idx']]+\
        pm.math.dot(size_betas, shared_vars['size'].T)+\
        pm.math.dot(industry_betas, shared_vars['industry'].T)
    mu_pred = pm.Normal('spend', mu=mu, sd=sigma, 
                        observed=campaign_data['log_billed_amount'])
model_dict['product_region_alphas'] = model