In [1]:
from collections import OrderedDict    # For recording the model specification 

import pandas as pd                    # For file input/output
import numpy as np                     # For vectorized math operations

import pylogit as pl                   # For choice model estimation
import pickle

In [2]:
catsup = pd.read_csv('data/catsup_trainformat.csv')
catsup.head()

Unnamed: 0,chosen,id,alt,display,feature,price,chid
0,0,1,0,0,0,4.6,0
1,0,1,1,0,0,3.7,0
2,1,1,2,0,0,5.2,0
3,0,1,3,0,0,3.4,0
4,0,1,0,0,0,4.6,1


In [10]:
infile = open('./data/10_dgp.p', 'rb')
big_dict = pickle.load(infile)
Y_dgp = big_dict['theta: [ 1.5  1.  -1.1  0.8  0.1  1.2]']
print(Y_dgp.shape)
chosen = np.zeros([11192,1])
j = 0
for choice in Y_dgp[:,0]:
    for i in range(4):
        if i == choice:
            chosen[i+j] = True
        else:
            chosen[i+j] = False
    j+=4
catsup['chosen'] = chosen
catsup.head(30)

(2798, 10)


Unnamed: 0,chosen,id,alt,display,feature,price,chid
0,0.0,1,0,0,0,4.6,0
1,0.0,1,1,0,0,3.7,0
2,0.0,1,2,0,0,5.2,0
3,1.0,1,3,0,0,3.4,0
4,0.0,1,0,0,0,4.6,1
5,1.0,1,1,0,0,4.3,1
6,0.0,1,2,0,0,5.2,1
7,0.0,1,3,0,0,4.4,1
8,0.0,1,0,0,0,4.6,2
9,1.0,1,1,0,1,2.5,2


In [3]:
#get variable list
index_var_names = ['display','feature','price']
for col in index_var_names:
    catsup[col] = catsup[col].astype(float)

In [4]:
#specification
example_specification = OrderedDict()
example_names = OrderedDict()

# Note that the names used below are simply for consistency with
# the coefficient names given in the mlogit vignette.
for col in index_var_names:
    example_specification[col] = [[0, 1, 2, 3]]
    example_names[col] = [col]

In [7]:
# Provide the module with the needed input arguments to create
# an instance of the Mixed Logit model class.

# Note that "chid" is used as the obs_id_col because "chid" is
# the choice situation id.

# Currently, the obs_id_col argument name is unfortunate because
# in the most general of senses, it refers to the situation id.
# In panel data settings, the mixing_id_col argument is what one 
# would generally think of as a "observation id".

# For mixed logit models, the "mixing_id_col" argument specifies
# the units of observation that the coefficients are randomly
# distributed over.
# infile = open('./data/500_MC_dgp_uts.p', 'rb')
# big_dict = pickle.load(infile)
# Y_dgp = big_dict['theta: [ 1.5  1.  -1.1  0.4  0.1  0.6]']
# print(Y_dgp.shape)
# results = []
# for row in range(100):
#     chosen = np.zeros([11192,1])
#     j = 0
#     for choice in Y_dgp[:,row]:
#         for i in range(4):
#             if i == choice:
#                 chosen[i+j] = True
#             else:
#                 chosen[i+j] = False
#         j+=4
#     catsup['chosen'] = chosen
example_mixed = pl.create_choice_model(data=catsup,
                                   alt_id_col="alt",
                                   obs_id_col="chid",
                                   choice_col="chosen",
                                   specification=example_specification,
                                   model_type="Mixed Logit",
                                   names=example_names,
                                   mixing_id_col="id",
                                   mixing_vars=index_var_names)

# Note 2 * len(index_var_names) is used because we are estimating
# both the mean and standard deviation of each of the random coefficients
# for the listed index variables.
example_mixed.fit_mle(init_vals=np.zeros(2 * len(index_var_names)),
                      num_draws=10000,
                      seed=10)

# Look at the estimated results
print(example_mixed.get_statsmodels_summary())
# results.append(example_mixed.get_statsmodels_summary())

Log-likelihood at zero: -3,878.8516
Initial Log-likelihood: -3,878.8516
Estimation Time for Point Estimation: 105.87 minutes.
Final log-likelihood: -2,813.8720
                     Mixed Logit Model Regression Results                     
Dep. Variable:                 chosen   No. Observations:                2,798
Model:              Mixed Logit Model   Df Residuals:                    2,792
Method:                           MLE   Df Model:                            6
Date:                Thu, 27 Jun 2019   Pseudo R-squ.:                   0.275
Time:                        20:14:39   Pseudo R-bar-squ.:               0.273
AIC:                        5,639.744   Log-Likelihood:             -2,813.872
BIC:                        5,675.364   LL-Null:                    -3,878.852
                    coef    std err          z      P>|z|      [0.025      0.975]
---------------------------------------------------------------------------------
display           1.4129      0.110     12.8

In [9]:
pickle.dump()

<pylogit.mixed_logit.MixedLogit at 0x250099fe128>