In [1]:
from collections import OrderedDict    # For recording the model specification 

import pandas as pd                    # For file input/output
import numpy as np                     # For vectorized math operations

import pylogit as pl                   # For choice model estimation

In [7]:
catsup = pd.read_csv('data/catsup.csv')
catsup = catsup.drop('Unnamed: 0', axis=1)
catsup.head()

Unnamed: 0,id,disp.heinz41,disp.heinz32,disp.heinz28,disp.hunts32,feat.heinz41,feat.heinz32,feat.heinz28,feat.hunts32,price.heinz41,price.heinz32,price.heinz28,price.hunts32,choice
0,1,0,0,0,0,0,0,0,0,4.6,3.7,5.2,3.4,2
1,1,0,0,0,0,0,0,0,0,4.6,4.3,5.2,4.4,2
2,1,0,0,0,0,0,1,0,0,4.6,2.5,4.6,4.8,2
3,1,0,0,0,0,0,0,0,0,4.6,3.7,5.2,3.4,2
4,1,0,0,0,0,0,0,1,0,4.6,3.0,4.6,4.8,2


In [22]:
#get variable list
index_var_names = list(catsup.keys())[1:-1]
for col in index_var_names:
    catsup[col] = catsup[col].astype(float)

In [None]:
#specification
example_specification = OrderedDict()
example_names = OrderedDict()

# Note that the names used below are simply for consistency with
# the coefficient names given in the mlogit vignette.
for col in index_var_names:
    example_specification[col] = [[0, 1, 2, 3]]
    example_names[col] = [col]

In [None]:
# Provide the module with the needed input arguments to create
# an instance of the Mixed Logit model class.

# Note that "chid" is used as the obs_id_col because "chid" is
# the choice situation id.

# Currently, the obs_id_col argument name is unfortunate because
# in the most general of senses, it refers to the situation id.
# In panel data settings, the mixing_id_col argument is what one 
# would generally think of as a "observation id".

# For mixed logit models, the "mixing_id_col" argument specifies
# the units of observation that the coefficients are randomly
# distributed over.
example_mixed = pl.create_choice_model(data=catsup,
                                       alt_id_col="alt",
                                       obs_id_col="chid",
                                       choice_col="choice",
                                       specification=example_specification,
                                       model_type="Mixed Logit",
                                       names=example_names,
                                       mixing_id_col="id",
                                       mixing_vars=index_var_names)

# Note 2 * len(index_var_names) is used because we are estimating
# both the mean and standard deviation of each of the random coefficients
# for the listed index variables.
example_mixed.fit_mle(init_vals=np.zeros(2 * len(index_var_names)),
                      num_draws=600,
                      seed=123)

# Look at the estimated results
example_mixed.get_statsmodels_summary()