In [1]:
# PS4 - CE264

# importing the requried libraries
from collections import OrderedDict    # For recording the model specification 

import pandas as pd                    # For file input/output
import numpy as np                     # For vectorized math operations

import pylogit as pl                   # For MNL model estimation and
                                       # conversion from wide to long format
from pylogit import nested_logit as nl

# reading the data file 
data_wide  = pd.read_csv("Cleaned BayPass Survey_4.24.csv",sep=",")
pd.set_option('display.max_columns', None)

In [2]:
# Converting null values to "Did Not Response"
data_wide.fillna("Did Not Response", inplace = True)
data_wide['Percent_TOD-y'] = data_wide['Percent_TOD_y'].fillna(0)

In [3]:
# Converting data from wide to long format
#print(data_wide.columns)

#Create the list of indiviudal specific variables
ind_variables = data_wide.columns.tolist()[1:12]
ind_variables.remove('Gender (Other)')
ind_variables.remove('Race/Ethnicity (Other)')
ind_variables.append('BayPass')
ind_variables.append('student_dummy')
ind_variables.append('employed_dummy')
ind_variables.append('Percent_TOD_y')
#print(ind_variables)

In [4]:
alt_varying_variables = {u'travel_time': dict([(1, 'tt_walk'),
                                              (2, 'tt_bike'),
                                              (3, 'tt_bus'),
                                              (4, 'tt_rail'),
                                              (5, 'tt_drive'),
                                              (6, 'tt_rideshare')]), 
                        u'waiting_time': dict([(1, 'wt_walk'),
                                              (2, 'wt_bike'),
                                               (3, 'wt_bus'),
                                               (4, 'wt_rail'),
                                              (5, 'wt_drive'),
                                              (6, 'wt_rideshare')]),
                         u'cost': dict([(1, 'cost_walk'),
                                       (2, 'cost_bike'),
                                       (3, 'cost_bus'),
                                       (4, 'cost_rail'),
                                       (5, 'cost_drive'),
                                       (6,'cost_rideshare')])
                        }

In [48]:
#Specify the avaialiblity variables
#All options are available
listAvail = ['walk_av','bike_av', 'bus_av', 'rail_av', 'drive_av', 'rideshare_av']
data_wide[[listAvail]] = 1


availability_variables = {1:'walk_av' ,
                         2:'bike_av',
                         3:'bus_av',
                         4:'rail_av',
                         5:'drive_av',
                         6:'rideshare_av'}

#Determine the columns for: alternative ids, observation ids, and the choice
custom_alt_id = "mode_id"

obs_id_column = 'column_id'
data_wide[obs_id_column] = np.arange(data_wide.shape[0], dtype = int)+1
choice_column = "choice"


       0  obs_id    Introduction     Campus Afflication  Gender  \
0      0       1  Yes, I consent       Graduate student  Female   
1      1       1  Yes, I consent       Graduate student  Female   
2      2       1  Yes, I consent       Graduate student  Female   
3      3       1  Yes, I consent       Graduate student  Female   
4      4       1  Yes, I consent       Graduate student  Female   
..   ...     ...             ...                    ...     ...   
758  758     148  Yes, I consent  Undergraduate student  Female   
759  759     148  Yes, I consent  Undergraduate student  Female   
760  760     148  Yes, I consent  Undergraduate student  Female   
761  761     148  Yes, I consent  Undergraduate student  Female   
762  762     148  Yes, I consent  Undergraduate student  Female   

       Gender (Other) Race/Ethnicity Race/Ethnicity (Other)  \
0    Did Not Response          Asian       Did Not Response   
1    Did Not Response          Asian       Did Not Response   
2    D

In [6]:
data_long = pl.convert_wide_to_long(data_wide,
                                   ind_variables,
                                   alt_varying_variables,
                                    availability_variables,
                                   obs_id_column,
                                   choice_column,
                                   new_alt_id_name = custom_alt_id)
data_long.head()



Unnamed: 0,column_id,mode_id,choice,obs_id,Introduction,Campus Afflication,Gender,Race/Ethnicity,Employment,Zipcode,Modes,BayPass Access,BayPass,student_dummy,employed_dummy,Percent_TOD_y,travel_time,waiting_time,cost
0,1,1,1,1,"Yes, I consent",Graduate student,Female,Asian,"Yes, employed off campus",94704,"AC Transit,BART,Walk,Personal bike",No,0,1,1,98.486878,10,0,0
1,1,2,0,1,"Yes, I consent",Graduate student,Female,Asian,"Yes, employed off campus",94704,"AC Transit,BART,Walk,Personal bike",No,0,1,1,98.486878,40,0,0
2,1,3,0,1,"Yes, I consent",Graduate student,Female,Asian,"Yes, employed off campus",94704,"AC Transit,BART,Walk,Personal bike",No,0,1,1,98.486878,30,10,10
3,1,4,0,1,"Yes, I consent",Graduate student,Female,Asian,"Yes, employed off campus",94704,"AC Transit,BART,Walk,Personal bike",No,0,1,1,98.486878,40,5,1
4,1,5,0,1,"Yes, I consent",Graduate student,Female,Asian,"Yes, employed off campus",94704,"AC Transit,BART,Walk,Personal bike",No,0,1,1,98.486878,5,20,4


In [7]:
data_long.head().T

Unnamed: 0,0,1,2,3,4
column_id,1,1,1,1,1
mode_id,1,2,3,4,5
choice,1,0,0,0,0
obs_id,1,1,1,1,1
Introduction,"Yes, I consent","Yes, I consent","Yes, I consent","Yes, I consent","Yes, I consent"
Campus Afflication,Graduate student,Graduate student,Graduate student,Graduate student,Graduate student
Gender,Female,Female,Female,Female,Female
Race/Ethnicity,Asian,Asian,Asian,Asian,Asian
Employment,"Yes, employed off campus","Yes, employed off campus","Yes, employed off campus","Yes, employed off campus","Yes, employed off campus"
Zipcode,94704,94704,94704,94704,94704


In [8]:
data_long['travel_time_hr'] = data_long['travel_time']/60
data_long['waiting_time_hr'] = data_long['waiting_time']/60

data_long['in-vehicle travel time'] = (data_long['travel_time_hr']+data_long['waiting_time_hr'])

data_long['travel_time_BayPass'] = data_long['travel_time_hr']*data_long['BayPass']
data_long['in-vehicle travel time_BayPass'] = data_long['travel_time_BayPass']+data_long['waiting_time_hr']

data_long['cost_employed'] = data_long['cost']*data_long['employed_dummy']

## Specifying Utility Equations
basic_specification = OrderedDict()
basic_names = OrderedDict()

basic_specification['intercept'] = [1,2,3,4,6]
basic_names['intercept'] = ['ASC Walk','ASC Bike', 'ASC Bus', 'ASC Rail', 'ASC SR']


basic_specification['in-vehicle travel time'] = [[5,6]]
basic_names['in-vehicle travel time'] = ['In-Vehicle Travel Time (DA, SR) (hrs)']

# basic_specification['in-vehicle travel time_BayPass'] = [[5,6]]
# basic_names['in-vehicle travel time_BayPass'] = ['In-Vehicle Travel Time * Baypass(DA, SR) (hrs)']

basic_specification['travel_time_hr'] = [1,2,3,4]
basic_names['travel_time_hr'] = ['Travel Time (Walk) (hrs)', 'Travel Time (Bike) (hrs)', 'Travel Time (Bus) (hrs)', 
                                    'Travel Time (Rail) (hrs)']

# basic_specification['travel_time_BayPass'] = [1,2,3,4]
# basic_names['travel_time_BayPass'] = ['Travel Time (Walk)*BayPass (hrs)', 'Travel Time (Bike)* BayPass (hrs)', 'Travel Time (Bus)*BayPass (hrs)', 
#                                     'Travel Time (Rail)*BayPass (hrs)']

basic_specification['waiting_time_hr'] = [[3,4]]
basic_names['waiting_time_hr']= ["Waiting Time (Bus and Rail) (hrs)"]


# basic_specification['BayPass'] = [2,[3,4],[5,6]]
# basic_names['BayPass'] = ['BayPass Access (Bike)', 'BayPass Access (Transit)', 'BayPass Access (Vehicles)']

basic_specification['BayPass'] = [[3,4]]
basic_names['BayPass'] = ['BayPass']

basic_specification['Percent_TOD_y'] = [[1,2,3,4,6]]
basic_names['Percent_TOD_y'] = ['Percent TOD (Compared to Drive)']

basic_specification['cost'] = [[1,2,3,4,5,6]]
basic_names['cost'] = ['Cost ($)']

# basic_specification['cost_employed']=[[1,2,3,4,5,6]]
# basic_names['cost_employed'] = ['Cost if Employed ($)']

In [9]:
data_mnl1 = pl.create_choice_model(data = data_long,
                                 alt_id_col = custom_alt_id,
                                 obs_id_col = obs_id_column,
                                 choice_col = choice_column,
                                 specification = basic_specification,
                                 model_type = "MNL",
                                 names = basic_names)

data_mnl1.fit_mle(np.zeros(14))
data_mnl1.get_statsmodels_summary()

Log-likelihood at zero: -1,367.1125
Initial Log-likelihood: -1,367.1125
Estimation Time for Point Estimation: 8.49 seconds.
Final log-likelihood: -786.3771


  warn('Method %s does not use Hessian information (hess).' % method,


0,1,2,3
Dep. Variable:,choice,No. Observations:,763.0
Model:,Multinomial Logit Model,Df Residuals:,749.0
Method:,MLE,Df Model:,14.0
Date:,"Sun, 30 Apr 2023",Pseudo R-squ.:,0.425
Time:,17:09:06,Pseudo R-bar-squ.:,0.415
AIC:,1600.754,Log-Likelihood:,-786.377
BIC:,1665.676,LL-Null:,-1367.112

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
ASC Walk,2.2558,0.574,3.932,0.000,1.131,3.380
ASC Bike,0.1728,0.572,0.302,0.763,-0.948,1.294
ASC Bus,0.1035,0.524,0.198,0.843,-0.923,1.130
ASC Rail,-0.3641,0.533,-0.684,0.494,-1.408,0.680
ASC SR,-1.1810,0.414,-2.852,0.004,-1.993,-0.369
"In-Vehicle Travel Time (DA, SR) (hrs)",-7.0225,0.996,-7.048,0.000,-8.975,-5.069
Travel Time (Walk) (hrs),-9.6035,0.696,-13.794,0.000,-10.968,-8.239
Travel Time (Bike) (hrs),-8.9732,0.931,-9.641,0.000,-10.797,-7.149
Travel Time (Bus) (hrs),-6.4340,0.684,-9.406,0.000,-7.775,-5.093


In [10]:
# Interacting with Students
data_long['travel_time_hr'] = data_long['travel_time']/60
data_long['waiting_time_hr'] = data_long['waiting_time']/60
data_long['in-vehicle travel time_hr'] = data_long['travel_time_hr']+data_long['waiting_time_hr']
data_long['in-vehicle travel time_student'] = data_long['in-vehicle travel time_hr']*data_long['student_dummy']
data_long['travel_time_student'] = data_long['travel_time_hr']*data_long['student_dummy']
data_long['waiting_time_student'] = data_long['waiting_time_hr']*data_long['student_dummy']
#data_long['cost_student'] = data_long['cost']*data_long['student_dummy']

## Specifying Utility Equations
basic_specification = OrderedDict()
basic_names = OrderedDict()

basic_specification['intercept'] = [1,2,3,4,6]
basic_names['intercept'] = ['ASC Walk','ASC Bike', 'ASC Bus', 'ASC Rail', 'ASC SR']

basic_specification['in-vehicle travel time_hr'] = [[5,6]]
basic_names['in-vehicle travel time_hr'] = ['In-Vehicle Travel Time (DA, SR)']

basic_specification['in-vehicle travel time_student'] = [[5,6]]
basic_names['in-vehicle travel time_student'] = ['In-Vehicle Travel Time (DA, SR) (Students)']

basic_specification['travel_time_hr'] = [1,2,3,4]
basic_names['travel_time_hr'] = ['Travel Time (Walk)', 'Travel Time (Bike)', 'Travel Time (Bus)', 
                                    'Travel Time (Rail)']

basic_specification['travel_time_student'] = [1,2,3,4]
basic_names['travel_time_student'] = ['Travel Time (Walk) (Students)', 'Travel Time (Bike) (Students)', 'Travel Time (Bus) (Students)', 
                                    'Travel Time (Rail) (Students)']


basic_specification['waiting_time_hr'] = [[3,4]]
basic_names['waiting_time_hr']= ["Waiting Time (Bus and Rail)"]


basic_specification['waiting_time_student'] = [[3,4]]
basic_names['waiting_time_student']= ["Waiting Time (Bus and Rail) (Students)"]


basic_specification['BayPass'] = [[3,4]]
basic_names['BayPass'] = ['BayPass']

basic_specification['Percent_TOD_y'] = [[1,2,3,4,6]]
basic_names['Percent_TOD_y'] = ['Percent TOD (Compared to Drive)']

# basic_specification['total travel time'] = [1,2,3,4,5,6]
# basic_names['total travel time'] = ['Travel Time (Walk)', 'Travel Time (Bike)', 'Travel Time (Bus)', 
#                                     'Travel Time (Rail)','Travel Time (Drive)','Travel Time (Shared Ride)']

# basic_specification['cost'] = [2,3,4,5,6]
# basic_names['cost'] = ['Cost (Bike)', 'Cost (Bus)','Cost (Rail)', 'Cost (Drive)', 'Cost (SR)']

basic_specification['cost'] = [[1,2,3,4,5,6]]
basic_names['cost'] = ['Cost ($)']


In [11]:
data_mnl = pl.create_choice_model(data = data_long,
                                 alt_id_col = custom_alt_id,
                                 obs_id_col = obs_id_column,
                                 choice_col = choice_column,
                                 specification = basic_specification,
                                 model_type = "MNL",
                                 names = basic_names)

data_mnl.fit_mle(np.zeros(20))
data_mnl.get_statsmodels_summary()

Log-likelihood at zero: -1,367.1125
Initial Log-likelihood: -1,367.1125
Estimation Time for Point Estimation: 0.18 seconds.

  warn('Method %s does not use Hessian information (hess).' % method,



Final log-likelihood: -779.5977


0,1,2,3
Dep. Variable:,choice,No. Observations:,763.0
Model:,Multinomial Logit Model,Df Residuals:,743.0
Method:,MLE,Df Model:,20.0
Date:,"Sun, 30 Apr 2023",Pseudo R-squ.:,0.43
Time:,17:09:07,Pseudo R-bar-squ.:,0.415
AIC:,1599.195,Log-Likelihood:,-779.598
BIC:,1691.941,LL-Null:,-1367.112

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
ASC Walk,2.3689,0.586,4.044,0.000,1.221,3.517
ASC Bike,0.2053,0.581,0.353,0.724,-0.934,1.344
ASC Bus,0.1091,0.532,0.205,0.837,-0.933,1.151
ASC Rail,-0.3351,0.540,-0.621,0.535,-1.393,0.723
ASC SR,-1.1884,0.419,-2.838,0.005,-2.009,-0.368
"In-Vehicle Travel Time (DA, SR)",-8.5573,1.465,-5.840,0.000,-11.429,-5.685
"In-Vehicle Travel Time (DA, SR) (Students)",2.1257,1.346,1.580,0.114,-0.512,4.763
Travel Time (Walk),-11.9953,1.102,-10.880,0.000,-14.156,-9.834
Travel Time (Bike),-11.1285,1.513,-7.356,0.000,-14.094,-8.163


## Forecasting and Predictions

In [33]:
# Weights

# Weights for students only

#student_weights = pd.newDataFrame()
student_weights_value = [0.0015, 0.0163, 0.022]
student_index = ["Graduate student", "Undergraduate student", "Not affiliated"]

#Weights for students and non-students 

weights_index = [0,1]
weights_value = [0.0166,0.0043]
weights = pd.DataFrame(index = weights_index, data=weights_value)
weights['student_dummy'] = weights_index
weights.rename(columns={0:"weight"}, inplace = True)
display(weights)

Unnamed: 0,weight,student_dummy
0,0.0166,0
1,0.0043,1


In [34]:
# Sample with only students
analysis_data = data_long.merge(weights, on = 'student_dummy', how = 'left')
#analysis_data = data_long[data_long['student_dummy'] == 1]
analysis_data.reset_index(drop = True, inplace = True)
analysis_data.head()

Unnamed: 0,column_id,mode_id,choice,obs_id,Introduction,Campus Afflication,Gender,Race/Ethnicity,Employment,Zipcode,Modes,BayPass Access,BayPass,student_dummy,employed_dummy,Percent_TOD_y,travel_time,waiting_time,cost,travel_time_hr,waiting_time_hr,in-vehicle travel time,travel_time_BayPass,in-vehicle travel time_BayPass,cost_employed,intercept,in-vehicle travel time_hr,in-vehicle travel time_student,travel_time_student,waiting_time_student,cost_student,weight
0,1,1,1,1,"Yes, I consent",Graduate student,Female,Asian,"Yes, employed off campus",94704,"AC Transit,BART,Walk,Personal bike",No,0,1,1,98.486878,10,0,0,0.166667,0.0,0.166667,0.0,0.0,,1.0,0.166667,0.166667,0.166667,0.0,0,0.0043
1,1,2,0,1,"Yes, I consent",Graduate student,Female,Asian,"Yes, employed off campus",94704,"AC Transit,BART,Walk,Personal bike",No,0,1,1,98.486878,40,0,0,0.666667,0.0,0.666667,0.0,0.0,,1.0,0.666667,0.666667,0.666667,0.0,0,0.0043
2,1,3,0,1,"Yes, I consent",Graduate student,Female,Asian,"Yes, employed off campus",94704,"AC Transit,BART,Walk,Personal bike",No,0,1,1,98.486878,30,10,10,0.5,0.166667,0.666667,0.0,0.166667,1111111111.0,1.0,0.666667,0.666667,0.5,0.166667,10,0.0043
3,1,4,0,1,"Yes, I consent",Graduate student,Female,Asian,"Yes, employed off campus",94704,"AC Transit,BART,Walk,Personal bike",No,0,1,1,98.486878,40,5,1,0.666667,0.083333,0.75,0.0,0.083333,1.0,1.0,0.75,0.75,0.666667,0.083333,1,0.0043
4,1,5,0,1,"Yes, I consent",Graduate student,Female,Asian,"Yes, employed off campus",94704,"AC Transit,BART,Walk,Personal bike",No,0,1,1,98.486878,5,20,4,0.083333,0.333333,0.416667,0.0,0.333333,1111.0,1.0,0.416667,0.416667,0.083333,0.333333,4,0.0043


In [13]:
len(analysis_data['column_id'].value_counts())

541

In [35]:
predictions = pd.DataFrame(index = [1,2,3,4,5,6])
prediction_array = data_mnl.predict(analysis_data)
shares = pd.DataFrame(prediction_array) 
display(shares)

Unnamed: 0,0
0,0.977439
1,0.001984
2,0.003097
3,0.008272
4,0.008756
...,...
4573,0.015699
4574,0.096756
4575,0.774177
4576,0.004333


In [36]:
analysis_data['prediction'] = prediction_array
analysis_data['prediction_weight'] = analysis_data['prediction']*analysis_data['weight']

In [37]:
analysis_data

Unnamed: 0,column_id,mode_id,choice,obs_id,Introduction,Campus Afflication,Gender,Race/Ethnicity,Employment,Zipcode,Modes,BayPass Access,BayPass,student_dummy,employed_dummy,Percent_TOD_y,travel_time,waiting_time,cost,travel_time_hr,waiting_time_hr,in-vehicle travel time,travel_time_BayPass,in-vehicle travel time_BayPass,cost_employed,intercept,in-vehicle travel time_hr,in-vehicle travel time_student,travel_time_student,waiting_time_student,cost_student,weight,prediction,prediction_weight
0,1,1,1,1,"Yes, I consent",Graduate student,Female,Asian,"Yes, employed off campus",94704,"AC Transit,BART,Walk,Personal bike",No,0,1,1,98.486878,10,0,0,0.166667,0.000000,0.166667,0.000000,0.000000,,1.0,0.166667,0.166667,0.166667,0.000000,0,0.0043,0.977439,0.004203
1,1,2,0,1,"Yes, I consent",Graduate student,Female,Asian,"Yes, employed off campus",94704,"AC Transit,BART,Walk,Personal bike",No,0,1,1,98.486878,40,0,0,0.666667,0.000000,0.666667,0.000000,0.000000,,1.0,0.666667,0.666667,0.666667,0.000000,0,0.0043,0.001984,0.000009
2,1,3,0,1,"Yes, I consent",Graduate student,Female,Asian,"Yes, employed off campus",94704,"AC Transit,BART,Walk,Personal bike",No,0,1,1,98.486878,30,10,10,0.500000,0.166667,0.666667,0.000000,0.166667,1111111111,1.0,0.666667,0.666667,0.500000,0.166667,10,0.0043,0.003097,0.000013
3,1,4,0,1,"Yes, I consent",Graduate student,Female,Asian,"Yes, employed off campus",94704,"AC Transit,BART,Walk,Personal bike",No,0,1,1,98.486878,40,5,1,0.666667,0.083333,0.750000,0.000000,0.083333,1,1.0,0.750000,0.750000,0.666667,0.083333,1,0.0043,0.008272,0.000036
4,1,5,0,1,"Yes, I consent",Graduate student,Female,Asian,"Yes, employed off campus",94704,"AC Transit,BART,Walk,Personal bike",No,0,1,1,98.486878,5,20,4,0.083333,0.333333,0.416667,0.000000,0.333333,1111,1.0,0.416667,0.416667,0.083333,0.333333,4,0.0043,0.008756,0.000038
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4573,763,2,0,148,"Yes, I consent",Undergraduate student,Female,White,"Yes, employed on campus as a student",94704,"AC Transit,BART,Walk,Personal bike,Carpool",Yes,1,1,1,98.486878,35,0,0,0.583333,0.000000,0.583333,0.583333,0.583333,,1.0,0.583333,0.583333,0.583333,0.000000,0,0.0043,0.015699,0.000068
4574,763,3,0,148,"Yes, I consent",Undergraduate student,Female,White,"Yes, employed on campus as a student",94704,"AC Transit,BART,Walk,Personal bike,Carpool",Yes,1,1,1,98.486878,20,20,0,0.333333,0.333333,0.666667,0.333333,0.666667,,1.0,0.666667,0.666667,0.333333,0.333333,0,0.0043,0.096756,0.000416
4575,763,4,1,148,"Yes, I consent",Undergraduate student,Female,White,"Yes, employed on campus as a student",94704,"AC Transit,BART,Walk,Personal bike,Carpool",Yes,1,1,1,98.486878,10,5,0,0.166667,0.083333,0.250000,0.166667,0.250000,,1.0,0.250000,0.250000,0.166667,0.083333,0,0.0043,0.774177,0.003329
4576,763,5,0,148,"Yes, I consent",Undergraduate student,Female,White,"Yes, employed on campus as a student",94704,"AC Transit,BART,Walk,Personal bike,Carpool",Yes,1,1,1,98.486878,20,20,8,0.333333,0.333333,0.666667,0.333333,0.666667,11111111,1.0,0.666667,0.666667,0.333333,0.333333,8,0.0043,0.004333,0.000019


### Mode share predictions by mode ID:

In [17]:
analysis_data.groupby('mode_id')['prediction'].sum()/analysis_data.groupby('mode_id')['prediction'].sum().sum()

mode_id
1    0.391205
2    0.137619
3    0.209922
4    0.181416
5    0.057568
6    0.022271
Name: prediction, dtype: float64

In [38]:
analysis_data.groupby('mode_id')['prediction_weight'].sum()/analysis_data.groupby('mode_id')['prediction_weight'].sum().sum()

mode_id
1    0.365001
2    0.137610
3    0.231542
4    0.180255
5    0.063300
6    0.022291
Name: prediction_weight, dtype: float64

## Forecasting 
Assuming BayPass (BayPass parameter is 1 for all and cost for bus and rail is 0)


In [41]:
analysis_data['BayPass'] = 1
analysis_data['cost'] = np.where((analysis_data['mode_id'] == 3) | (analysis_data['mode_id'] == 4), 0, analysis_data['cost'])
analysis_data.head()

Unnamed: 0,column_id,mode_id,choice,obs_id,Introduction,Campus Afflication,Gender,Race/Ethnicity,Employment,Zipcode,Modes,BayPass Access,BayPass,student_dummy,employed_dummy,Percent_TOD_y,travel_time,waiting_time,cost,travel_time_hr,waiting_time_hr,in-vehicle travel time,travel_time_BayPass,in-vehicle travel time_BayPass,cost_employed,intercept,in-vehicle travel time_hr,in-vehicle travel time_student,travel_time_student,waiting_time_student,cost_student,weight,prediction,prediction_weight
0,1,1,1,1,"Yes, I consent",Graduate student,Female,Asian,"Yes, employed off campus",94704,"AC Transit,BART,Walk,Personal bike",No,1,1,1,98.486878,10,0,0,0.166667,0.0,0.166667,0.0,0.0,,1.0,0.166667,0.166667,0.166667,0.0,0,0.0043,0.977439,0.004203
1,1,2,0,1,"Yes, I consent",Graduate student,Female,Asian,"Yes, employed off campus",94704,"AC Transit,BART,Walk,Personal bike",No,1,1,1,98.486878,40,0,0,0.666667,0.0,0.666667,0.0,0.0,,1.0,0.666667,0.666667,0.666667,0.0,0,0.0043,0.001984,9e-06
2,1,3,0,1,"Yes, I consent",Graduate student,Female,Asian,"Yes, employed off campus",94704,"AC Transit,BART,Walk,Personal bike",No,1,1,1,98.486878,30,10,0,0.5,0.166667,0.666667,0.0,0.166667,1111111111.0,1.0,0.666667,0.666667,0.5,0.166667,10,0.0043,0.003097,1.3e-05
3,1,4,0,1,"Yes, I consent",Graduate student,Female,Asian,"Yes, employed off campus",94704,"AC Transit,BART,Walk,Personal bike",No,1,1,1,98.486878,40,5,0,0.666667,0.083333,0.75,0.0,0.083333,1.0,1.0,0.75,0.75,0.666667,0.083333,1,0.0043,0.008272,3.6e-05
4,1,5,0,1,"Yes, I consent",Graduate student,Female,Asian,"Yes, employed off campus",94704,"AC Transit,BART,Walk,Personal bike",No,1,1,1,98.486878,5,20,4,0.083333,0.333333,0.416667,0.0,0.333333,1111.0,1.0,0.416667,0.416667,0.083333,0.333333,4,0.0043,0.008756,3.8e-05


In [42]:
predictions = pd.DataFrame(index = [1,2,3,4,5,6])
prediction_array = data_mnl.predict(analysis_data)
shares = pd.DataFrame(prediction_array) 
display(shares)

Unnamed: 0,0
0,0.948751
1,0.001926
2,0.020927
3,0.019459
4,0.008499
...,...
4573,0.015699
4574,0.096756
4575,0.774177
4576,0.004333


In [43]:
analysis_data['prediction'] = prediction_array
analysis_data['prediction_weight'] = analysis_data['prediction']*analysis_data['weight']

In [44]:
analysis_data.groupby('mode_id')['prediction_weight'].sum()/analysis_data.groupby('mode_id')['prediction_weight'].sum().sum()

mode_id
1    0.315896
2    0.108875
3    0.284701
4    0.227621
5    0.046949
6    0.015957
Name: prediction_weight, dtype: float64

### TOD Analysis
Analysis if zip codes were all 80% TOD

In [45]:
analysis_data['Percent_TOD_y'].unique()

array([ 98.48687783,  99.99999964,   0.        ,  93.77581209,
        99.9999985 ,  25.80839951,   1.38026858,  99.16200905,
        15.98691431, 100.0000002 ,  99.84363557, 100.0000003 ,
        99.99999836, 100.0000006 , 100.0000005 ,  93.06029311,
       100.        ,  88.45883676,  62.48696622,  33.00207106,
        44.63458575,  49.20335043,  64.85666278,  54.08034902,
        77.28897427,  44.31339478,  59.08893153,  99.99999972,
        99.99999877,   8.45738834,  99.19124876,  99.43540229,
        61.56853093,  23.69920849,  99.9999986 ,  43.03479958,
        87.20459277, 100.0000004 ,  93.62248949,  14.54575493])