In [1]:
from collections import OrderedDict    # For recording the model specification 

import pandas as pd                    # For file input/output
import numpy as np                     # For vectorized math operations

import pylogit as pl                   # For choice model estimation

In [7]:
catsup = pd.read_csv('data/catsup_trainformat.csv')
catsup = catsup.drop('Unnamed: 0', axis=1)
catsup.head()

Unnamed: 0,chosen,id,alt,display,feature,price,chid
0,0.0,1.0,0,0.0,0.0,4.6,0
1,0.0,1.0,1,0.0,0.0,3.7,0
2,1.0,1.0,2,0.0,0.0,5.2,0
3,0.0,1.0,3,0.0,0.0,3.4,0
4,0.0,1.0,0,0.0,0.0,4.6,1


In [3]:
#get variable list
index_var_names = ['display','feature','price']
for col in index_var_names:
    catsup[col] = catsup[col].astype(float)

In [4]:
#specification
example_specification = OrderedDict()
example_names = OrderedDict()

# Note that the names used below are simply for consistency with
# the coefficient names given in the mlogit vignette.
for col in index_var_names:
    example_specification[col] = [[0, 1, 2, 3]]
    example_names[col] = [col]

In [8]:
# Provide the module with the needed input arguments to create
# an instance of the Mixed Logit model class.

# Note that "chid" is used as the obs_id_col because "chid" is
# the choice situation id.

# Currently, the obs_id_col argument name is unfortunate because
# in the most general of senses, it refers to the situation id.
# In panel data settings, the mixing_id_col argument is what one 
# would generally think of as a "observation id".

# For mixed logit models, the "mixing_id_col" argument specifies
# the units of observation that the coefficients are randomly
# distributed over.
example_mixed = pl.create_choice_model(data=catsup,
                                       alt_id_col="alt",
                                       obs_id_col="chid",
                                       choice_col="chosen",
                                       specification=example_specification,
                                       model_type="Mixed Logit",
                                       names=example_names,
                                       mixing_id_col="id",
                                       mixing_vars=index_var_names)

# Note 2 * len(index_var_names) is used because we are estimating
# both the mean and standard deviation of each of the random coefficients
# for the listed index variables.
example_mixed.fit_mle(init_vals=np.zeros(2 * len(index_var_names)),
                      num_draws=600,
                      seed=123)

# Look at the estimated results
example_mixed.get_statsmodels_summary()

Log-likelihood at zero: -3,878.8516
Initial Log-likelihood: -3,878.8516




Estimation Time for Point Estimation: 1.71 minutes.
Final log-likelihood: -2,815.2232


  self._store_inferential_results(np.sqrt(np.diag(self.robust_cov)),
  return (self.a < x) & (x < self.b)
  return (self.a < x) & (x < self.b)
  cond2 = cond0 & (x <= self.a)


0,1,2,3
Dep. Variable:,chosen,No. Observations:,2798.0
Model:,Mixed Logit Model,Df Residuals:,2792.0
Method:,MLE,Df Model:,6.0
Date:,"Mon, 27 May 2019",Pseudo R-squ.:,0.274
Time:,17:35:06,Pseudo R-bar-squ.:,0.273
AIC:,5642.446,Log-Likelihood:,-2815.223
BIC:,5678.066,LL-Null:,-3878.852

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
display,1.4197,0.110,12.950,0.000,1.205,1.635
feature,1.0881,0.121,9.022,0.000,0.852,1.324
price,-1.0520,0.087,-12.109,0.000,-1.222,-0.882
Sigma display,-0.5360,0.240,-2.233,0.026,-1.006,-0.065
Sigma feature,0.1091,1.050,0.104,0.917,-1.949,2.167
Sigma price,1.1598,0.073,15.905,0.000,1.017,1.303


In [None]:
example_mixed.