## 8.3 An introduction to Hierarchical Models

### Ratings-Based Conjoint Analysis for the Amusement Park

In [0]:
import pandas as pd
conjoint_df = pd.read_csv('http://bit.ly/PMR-ch8pt3')
conjoint_df.head() # Not shown
conjoint_df.describe(include='all') # Not shown

In [0]:
import pandas as pd
import numpy as np
np.random.seed(89745)
response_id = range(200) # respondent ids
n_questions = 16 # number of conjoint ratings per respondent
speed_options = ['40', '50', '60', '70']
speed = np.random.choice(speed_options,
                         size=n_questions,
                         replace=True)
height_options = ['200', '300', '400']
height = np.random.choice(height_options,
                         size=n_questions,
                         replace=True)
const_options = ['Steel', 'Wood']
const= np.random.choice(const_options,
                         size=n_questions,
                         replace=True)
theme_options = ['Dragon', 'Eagle']
theme = np.random.choice(theme_options,
                         size=n_questions,
                         replace=True)

In [0]:
profiles_df = pd.DataFrame([speed, height, const, theme],
                           index=['speed', 'height', 'const', 'theme']).T
profiles_df

In [0]:
profile_dummies = pd.get_dummies(profiles_df)
profile_dummies.drop(
    ['speed_40', 'height_200', 'const_Steel', 'theme_Dragon'],
    axis=1, inplace=True)
profiles_model = pd.concat(
    [pd.Series(np.ones(16, dtype=int), name='Intercept'),
     profile_dummies],
    axis=1)
profiles_model

In [0]:
weights = np.random.multivariate_normal(
    mean=[-3, 0.5, 1, 3, 2, 1, -0.2, -0.5],
    cov=np.diag([0.2, 0.1, 0.1, 0.1, 0.2, 0.3, 1, 1]),
    size=len(response_id)
)

In [0]:
conjoint_df = pd.DataFrame()
for i in response_id:
  utility = (
      (profiles_model * weights[i]).sum(axis=1)
      + np.random.normal(size=16))
  ratings = pd.cut(utility, 10, labels=range(1,11))
  conjoint_resp = profiles_df.copy()
  conjoint_resp['rating'] = pd.to_numeric(ratings)
  conjoint_resp['resp_id'] = i
  conjoint_df = conjoint_df.append(conjoint_resp,
                                   ignore_index=True)
conjoint_df.head()

In [0]:
conjoint_df.describe(include='all')

### 8.3.4 An Initial Linear Model

In [0]:
conjoint_df.groupby('height').rating.mean()

In [0]:
import statsmodels.formula.api as smf
ride_lm = smf.ols('rating ~ speed + height + const + theme',
                    data=conjoint_df).fit()
ride_lm.summary()

### 8.3.5 Hierarchical Linear Model with statsmodels

In [0]:
ride_hlm_1 = smf.mixedlm('rating ~ speed + height + const + theme',
                         data=conjoint_df,
                         groups=conjoint_df['resp_id'],
                         re_formula='~ 1')
ride_hlm_1_f = ride_hlm_1.fit()
ride_hlm_1_f.summary()

In [0]:
ride_hlm_1_f.fe_params

In [0]:
re_params = pd.DataFrame(ride_hlm_1_f.random_effects).T
re_params.head()

In [0]:
ride_hlm_1_f_coef = \
  ride_hlm_1_f.fe_params.to_frame().T\
    .iloc[np.zeros(len(re_params))]
ride_hlm_1_f_coef.index = range(len(re_params))
ride_hlm_1_f_coef.Intercept += re_params.Group

ride_hlm_1_f_coef.head()

### 8.3.6 The Complete Hierarchical Linear Model

In [0]:
np.random.seed(89745)
ride_hlm_2 = smf.mixedlm('rating ~ speed + height + const + theme',
                         data=conjoint_df,
                         groups=conjoint_df['resp_id'],
                         re_formula='~ speed + height + const + theme')
ride_hlm_2_f = ride_hlm_2.fit(maxiter=1000, method='nm')

In [0]:
ride_hlm_2_f.fe_params

In [0]:
ride_hlm_2_f_re_df = pd.DataFrame(ride_hlm_2_f.random_effects).T
ride_hlm_2_f_re_df.rename({'Group': 'Intercept'},
                          axis=1, inplace=True)
ride_hlm_2_f_re_df.head()

In [0]:
hlm_2_f_coef = ride_hlm_2_f_re_df + ride_hlm_2_f.fe_params
hlm_2_f_coef.head()

### 8.3.7 Interpreting random effects

In [0]:
import seaborn as sns

sns.heatmap(hlm_2_f_coef.iloc[:,1:].corr(), vmax=0.3)

In [0]:
sns.heatmap(ride_hlm_2_f_re_df.iloc[:,1:].corr(), vmax=0.3)

In [0]:
import matplotlib.pyplot as plt
import seaborn as sns

cg = sns.clustermap(ride_hlm_2_f_re_df.iloc[:,1:].corr(), vmax=0.5,
                    vmin=-0.5,cmap=plt.cm.bwr, center=0)
plt.setp(cg.ax_heatmap.yaxis.get_majorticklabels(), rotation=0)
plt.setp(cg.ax_heatmap.xaxis.get_majorticklabels(), rotation=45)

In [0]:
import matplotlib.pyplot as plt
import seaborn as sns

cg = sns.clustermap(ride_hlm_2_f_re_df.iloc[:,1:].corr(), vmax=0.5,
                    vmin=-0.5,cmap=plt.cm.bwr, center=0)
plt.setp(cg.ax_heatmap.yaxis.get_majorticklabels(), rotation=0)
plt.setp(cg.ax_heatmap.xaxis.get_majorticklabels(), rotation=45)