In [262]:
from collections import OrderedDict    # For recording the model specification 

import pandas as pd                    # For file input/output
import numpy as np                     # For vectorized math operations

import pylogit as pl                   # For MNL model estimation
                                       # To convert from wide to long format
import numpy.matlib

# 1. Read and Reshape Data

In [263]:
data = pd.read_csv("TrafficModeData12.csv")
data["Group"] = np.matlib.repmat(np.array([x+1 for x in range(46)]).reshape(46,1),1,2).reshape(92,1)
data["ALT"] = np.matlib.repmat(np.array([1,2]).reshape(2,1),46,1)
data.head(8)
# Variables:
# ASC(alternative-specific coefficients)
# TTME(Travel Time）；
# FEE(Congestion fee cost）；
# COST(For driving, it includes gas fee, parking fee. For not driving, it includes public transportation fee)
# AGE（Age）；
# INC(Individual Income)
# GEN(Gender)

Unnamed: 0,MODE,FEE,COST,TTME,AGE,GEN,INC,Group,ALT
0,1,5.5,17.2046,56.5,21,1,193631.3389,1,1
1,0,0.0,10.25,86.3,21,1,193631.3389,1,2
2,1,5.5,15.16557,49.06,22,1,70019.2248,2,1
3,0,0.0,2.75,50.67,22,1,70019.2248,2,2
4,1,5.5,14.80658,38.27,33,1,125358.6178,3,1
5,0,0.0,2.75,39.72,33,1,125358.6178,3,2
6,1,5.5,16.76483,63.42,48,0,130802.3121,4,1
7,0,0.0,10.25,78.32,48,0,130802.3121,4,2


In [264]:
data.columns

Index([u'MODE', u'FEE', u'COST', u'TTME', u'AGE', u'GEN', u'INC', u'Group',
       u'ALT'],
      dtype='object')

In [265]:
# data['FEE'] = data['FEE'].astype(dtype=int) 

In [266]:
# data['AGE'] = data['AGE'].astype(dtype=int) 
# data['INC'] = data['INC'].astype(dtype=int) 

In [267]:
# data['GEN'] = data['GEN'].astype(dtype=int) 

In [268]:
# data.drop([u'GEN'], axis=1, inplace=True)

###  Variables:
#### TTME(Travel Time）； FEE(Congestion fee cost）;   AGE（Age）； INC(Individual Income);  GEN(Gender)

### Utility Functions：
#### V drive = βTTME1 * TTMEdrive + βFEE1 * FEEdrive + βAGE1 * AGEdrive + βINC1 * INCdrive + βGEN1 * GENdrive
#### V not-drive = βTTME2 * TTMEnotdrive + βFEE2 * FEEnotdrive + βAGE2 * AGEnotdrive + βGEN2* GENnotdrive

# 2. Set Variables

In [269]:
spec = OrderedDict()
variable_names = OrderedDict()
# Vars = ["TTME", "FEE", "COST", "INC", "GEN"]
Vars = ["TTME", "FEE", "COST"]
# Vars = ["TTME",  "AGE", "INC", "FEE"]
# Vars = ["TTME", "FEE", "AGE", "GEN"]
# Vars = ["TTME", "AGE", "GEN"]
# spec["intercept"] = [1, 2]
# variable_names["intercept"] = ["ASC Drive", "ASC Not Drive"]
for var in Vars:
   spec[var] = [[1,2]]
   variable_names[var] = [var]
# spec["intercept"] = [1, 2]
# variable_names["intercept"] = ["ASC Drive", "ASC Not Drive"]
spec["TTME"] = [[1],[2]]
variable_names["TTME"] = ["TTME for Drive", "TTME for Not Drive"]
spec["FEE"] = [1]
variable_names["FEE"] = ["FEEdrive"]
spec["COST"] = [[1],[2]]
variable_names["COST"] = ["COST for Drive", "COST for Not Drive"]
# spec["AGE"] = [1,2]
# variable_names["AGE"] = ["AGEdrive", "AGEnotdrive"]
# spec["INC"] = [1,2]
# variable_names["INC"] = ["INCdrive", "INCnotdrive"]
# spec["income"] = range(1, 5)
# variable_names["income"] = ["income_{}".format(x)
#                            for x in ["gc", "gr", "ec", "er"]]
# spec["GEN"] = [[1], [2]]
# variable_names["GEN"] = ["GENdrive", "GENnotdrive"]

# 3. Set Model

In [270]:
model = pl.create_choice_model(data = data,
                    alt_id_col="ALT",
                    obs_id_col="Group",
                    choice_col="MODE",
                    specification=spec,
                    model_type = "MNL",
                    names = variable_names
)

# 4. Fit Model

In [273]:
# model.fit_mle(np.zeros(10))
model.fit_mle(np.zeros(5))

Log-likelihood at zero: -31.8848
Initial Log-likelihood: -31.8848
Estimation Time for Point Estimation: 0.01 seconds.
Final log-likelihood: -17.9161


In [276]:
model.get_statsmodels_summary()

0,1,2,3
Dep. Variable:,MODE,No. Observations:,46.0
Model:,Multinomial Logit Model,Df Residuals:,41.0
Method:,MLE,Df Model:,5.0
Date:,"Wed, 12 Dec 2018",Pseudo R-squ.:,0.438
Time:,21:35:13,Pseudo R-bar-squ.:,0.281
AIC:,45.832,Log-Likelihood:,-17.916
BIC:,54.975,LL-Null:,-31.885

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
TTME for Drive,-0.0954,0.052,-1.829,0.067,-0.198,0.007
TTME for Not Drive,-0.0685,0.054,-1.270,0.204,-0.174,0.037
FEEdrive,0.8714,0.557,1.564,0.118,-0.221,1.964
COST for Drive,-0.3670,0.260,-1.411,0.158,-0.877,0.143
COST for Not Drive,-0.5262,0.303,-1.737,0.082,-1.120,0.068


In [275]:
model.fit_mle(np.zeros(8))
model.print_summaries()model.print_summaries()

SyntaxError: invalid syntax (<ipython-input-275-d8702d68dde2>, line 2)