In [1]:
import sys, os, copy, itertools
import numpy as np
import pandas as pd
import statsmodels.api as st
import statsmodels.formula.api as smf
import matplotlib.pyplot as plt

In [2]:
INDIR = r'Q:\Model Research\Swiftly\processed\v0.6'
INFILE = r'choices_with_valid_rideshare.csv'
choices = pd.read_csv(os.path.join(INDIR,INFILE))
# drop duplicate listings within a search_id
choices = choices.drop_duplicates(subset=['search_id','travel_provider','travel_product'])
# filter to just records where a choice is made
chosen_search_ids = choices.loc[choices['chosen']==1,'search_id'].tolist()
choices = choices[choices['search_id'].isin(chosen_search_ids)]

In [3]:
# clean data using 'drop' flag
choices['drop'] = 0
# flag records with invalid walk time or travel time
choices.loc[(choices['travel_mode'] == 'transit') & ((choices['travel_initial_walk_time'] == -1) | (pd.isnull(choices['travel_initial_walk_time']))),'drop'] = 1
choices.loc[choices['travel_time'] == 0,'drop'] = 1
# flag records with both trip-ends outside of 9 counties
choices.loc[(choices['county_start']=='Other') & (choices['county_end']=='Other'),'drop'] = 1
# flag records with invalid cost
choices.loc[(choices['travel_mode'] == 'transit') & (choices['travel_price']==-1),'drop'] = 1
# drop flagged records
choices = choices[choices['drop']==0]

In [4]:
# get walk and wait time in minutes to next 3 arrivals
choices['search_time'] = pd.to_datetime(choices['timestamp'])
choices['arr1'] = pd.to_datetime(choices['travel_arrival_time_1'])
choices['arr2'] = pd.to_datetime(choices['travel_arrival_time_2'])
choices['arr3'] = pd.to_datetime(choices['travel_arrival_time_3'])
choices['wait1'] = choices['arr1'] - choices['search_time']
choices['wait2'] = choices['arr2'] - choices['search_time']
choices['wait3'] = choices['arr3'] - choices['search_time']
choices['wait1'] = choices['wait1'].map(lambda x:x.total_seconds() / 60.0)
choices['wait2'] = choices['wait2'].map(lambda x:x.total_seconds() / 60.0)
choices['wait3'] = choices['wait3'].map(lambda x:x.total_seconds() / 60.0)
choices['walk_time'] = choices['travel_initial_walk_time'] / 60.0
# calc headways in minutes
choices['h1_2'] = choices['wait2'] - choices['wait1']
choices['h2_3'] = choices['wait3'] - choices['wait2']
choices['h3_3'] = choices['wait3'] - choices['wait1']
# pick first wait time that the user has time to walk to
choices['wait'] = choices['wait1']
choices.loc[(choices['wait'] < choices['walk_time']) & (pd.notnull(choices['wait2'])),'wait'] = choices['wait2']
choices.loc[(choices['wait'] < choices['walk_time']) & (pd.notnull(choices['wait3'])),'wait'] = choices['wait3']
choices['wait'] = choices['wait'] - choices['walk_time']
choices['headway'] = 99
choices.loc[pd.notnull(choices['h1_2']),'headway'] = choices['h1_2']
choices['n_arrivals'] = 0
choices.loc[pd.notnull(choices['arr1']),'n_arrivals'] += 1
choices.loc[pd.notnull(choices['arr2']),'n_arrivals'] += 1
choices.loc[pd.notnull(choices['arr3']),'n_arrivals'] += 1
# rename pool; it shows up as pool and uberpool in the data
choices.loc[choices['travel_product']=='pool','travel_product'] = 'uberpool'
# classify uber products into 4 types
choices['uber_mode_class'] = np.nan
choices.loc[choices['travel_product'].isin(['uberx','uberxl']),'uber_mode_class'] = 'basic'
choices.loc[choices['travel_product'].isin(['uberselect','uberblack','ubersuv']),'uber_mode_class'] = 'premium'
choices.loc[choices['travel_product']=='uberpool','uber_mode_class'] = 'pool'
choices.loc[choices['travel_product']=='ubertaxi','uber_mode_class'] = 'taxi'
choices.loc[(pd.isnull(choices['uber_mode_class'])) & (choices['travel_provider']=='Uber'),'uber_mode_class'] = 'other'
# classify uber products into 2 types
choices['uber_mode_class2'] = np.nan
choices.loc[choices['travel_product'].isin(['uberx','uberxl','pool','taxi']),'uber_mode_class2'] = 'basic'
choices.loc[choices['travel_product'].isin(['uberselect','uberblack','ubersuv']),'uber_mode_class2'] = 'premium'
choices.loc[(pd.isnull(choices['uber_mode_class'])) & (choices['travel_provider']=='Uber'),'uber_mode_class2'] = 'basic'

In [5]:
user_info = pd.read_csv(r'Q:\Model Research\Swiftly\processed\v0.6\user_data\users_with_demographics.csv')

  interactivity=interactivity, compiler=compiler, result=result)


In [6]:
user_info = user_info.set_index('user_id')
choices = choices.set_index('user_id')
choices['master_user_id'] = user_info['master_user_id']
choices['gender'] = user_info['gender']
choices['age'] = user_info['age']
choices['age_cat'] = user_info['age_cat']
choices['hhinc_cat'] = user_info['hhinc_cat']
choices['race'] = user_info['race']
choices['employ'] = user_info['employ']
choices['hhinc'] = user_info['hhinc']
choices['hhsize'] = user_info['hhsize']
choices['hhvehs'] = user_info['hhvehs']
choices['female'] = 0
choices.loc[choices['gender']==2,'female'] = 1
choices = choices.reset_index()

In [7]:
# set up estimation set.
estimation_set = pd.DataFrame(index=choices['search_id'].drop_duplicates())
chosen_mode = choices[choices['chosen']==1]
chosen_mode = chosen_mode.set_index('search_id')
estimation_set['chosen_mode'] = chosen_mode['travel_mode']
estimation_set['chosen_mode2'] = chosen_mode['travel_mode']
estimation_set['chosen_mode3'] = chosen_mode['travel_mode']
estimation_set.loc[estimation_set['chosen_mode']=='rideshare','chosen_mode'] = chosen_mode['uber_mode_class']
estimation_set.loc[estimation_set['chosen_mode2']=='rideshare','chosen_mode2'] = chosen_mode['uber_mode_class2']
estimation_set.loc[estimation_set['chosen_mode3']=='rideshare','chosen_mode3'] = "uber"

estimation_set = estimation_set.dropna()
print estimation_set.groupby(['chosen_mode3']).count().max(axis=1)

estimation_set['start_is_near_home'] = chosen_mode['start_is_near_home'].replace(-1, np.nan)
estimation_set['start_is_near_work'] = chosen_mode['start_is_near_work'].replace(-1, np.nan)
estimation_set['end_is_near_home'] = chosen_mode['end_is_near_home'].replace(-1, np.nan)
estimation_set['end_is_near_work'] = chosen_mode['end_is_near_work'].replace(-1, np.nan)


chosen_mode3
biking       107
scoot        348
transit    19189
uber         779
walking     1571
dtype: int64


In [8]:
# add demographcis to estimation set
estimation_set['master_user_id'] = chosen_mode['master_user_id']
estimation_set['age_cat'] = chosen_mode['age_cat']
estimation_set['age'] = chosen_mode['age']
estimation_set['hhinc_cat'] = chosen_mode['hhinc_cat']
estimation_set['hhinc'] = chosen_mode['hhinc']
estimation_set['employ'] = chosen_mode['employ']
estimation_set['hhvehs'] = chosen_mode['hhvehs']
estimation_set['female'] = chosen_mode['female']

# convert to dummies
estimation_set = pd.get_dummies(estimation_set,columns=['master_user_id','age_cat','hhinc_cat','employ'], dummy_na=True, drop_first=True)

# get dummy columns
user_dummy_cols = []
age_dummy_cols = []
inc_dummy_cols = []
employ_dummy_cols = []
for col in estimation_set.columns.tolist():
    if col[:14] == 'master_user_id':
        user_dummy_cols.append(col)
    elif col[:7] == 'age_cat':
        age_dummy_cols.append(col)
    elif col[:9] == 'hhinc_cat':
        inc_dummy_cols.append(col)
    elif col[:6] == 'employ':
        employ_dummy_cols.append(col)
        
estimation_set['master_user_id'] = chosen_mode['master_user_id']
print age_dummy_cols
print inc_dummy_cols
print employ_dummy_cols

['age_cat_18-24', 'age_cat_25-34', 'age_cat_35-44', 'age_cat_45-54', 'age_cat_55-64', 'age_cat_65+', 'age_cat_nan']
['hhinc_cat_$25-$50', 'hhinc_cat_$50-$75', 'hhinc_cat_$75+', 'hhinc_cat_nan']
['employ_2.0', 'employ_3.0', 'employ_4.0', 'employ_5.0', 'employ_nan']


In [9]:
transit_options = choices[choices['travel_mode']=='transit']
transit_options.groupby(['travel_provider'])['chosen'].sum()

travel_provider
Alcatraz Hornblower Ferry                            0.0
Altamont Commuter Express                            0.0
Bay Area Rapid Transit                            1917.0
Bear Transit - UC Berkeley Shuttle                   6.0
Blue & Gold Fleet                                    1.0
Caltrain                                           109.0
Commute.org Shuttle                                  6.0
County Connection                                   13.0
Golden Gate Ferry                                    0.0
Marin Transit                                        6.0
Mountain View Community Shuttle                      3.0
PresidiGo Shuttle                                   37.0
SamTrans                                           151.0
San Francisco Bay Ferry                              2.0
San Francisco Municipal Transportation Agency    16428.0
Santa Cruz Metro                                     5.0
SolTrans                                             2.0
VTA            

In [10]:
trn_sch_group = transit_options.groupby(['search_id'])
best_transit = transit_options.loc[trn_sch_group['travel_time'].idxmin(),:]
best_transit = best_transit.set_index('search_id')

In [11]:
estimation_set['transit_cost'] = best_transit['travel_price']
estimation_set['transit_time'] = best_transit['travel_time'] / 60.0
estimation_set['transit_dist'] = best_transit['travel_distance'] / 5280.0
estimation_set['wait'] = best_transit['wait']
estimation_set['headway'] = best_transit['headway']
estimation_set['walk'] = best_transit['walk_time']
estimation_set['n_arr'] = best_transit['n_arrivals']
n_transit = transit_options.groupby(['search_id']).count().max(axis=1)
estimation_set['n_transit'] = n_transit
estimation_set = estimation_set.dropna(subset=['transit_cost','transit_time','transit_dist'])
estimation_set['wait'] = estimation_set['wait'].fillna(999)

In [12]:
choices.groupby('uber_mode_class')['chosen'].sum()

uber_mode_class
basic      444.0
other       43.0
pool       363.0
premium    335.0
taxi        49.0
Name: chosen, dtype: float64

In [13]:
g = choices.groupby(['travel_provider','travel_product'])
s = g['chosen'].sum()
s.loc[['Uber',]]

travel_provider  travel_product
Uber             assist             37.0
                 uberblack         154.0
                 uberpool          363.0
                 uberselect        103.0
                 ubersuv            78.0
                 ubertaxi           49.0
                 uberwav             6.0
                 uberx             373.0
                 uberxl             71.0
Name: chosen, dtype: float64

In [14]:
uber_grouped = choices[choices['travel_provider']=='Uber'].groupby(['search_id','uber_mode_class'])
best_uber = uber_grouped['travel_price'].idxmin()
best_uber_idx = copy.deepcopy(best_uber)
best_uber = choices.loc[best_uber_idx,['search_id','travel_provider','uber_mode_class','travel_product','travel_price','travel_distance','travel_time']]
price = best_uber.pivot(index='search_id',columns='uber_mode_class',values='travel_price')
dist = best_uber.pivot(index='search_id',columns='uber_mode_class',values='travel_distance')
time = best_uber.pivot(index='search_id',columns='uber_mode_class',values='travel_time')

In [15]:
best_uber

Unnamed: 0,search_id,travel_provider,uber_mode_class,travel_product,travel_price,travel_distance,travel_time
224920,309474,Uber,basic,uberx,6.55,2144.0,393.0
224921,309474,Uber,premium,uberselect,13.49,2144.0,393.0
224925,309474,Uber,taxi,ubertaxi,6.55,2144.0,393.0
100855,309475,Uber,basic,uberx,7.48,3335.0,422.0
100856,309475,Uber,premium,uberselect,15.77,3335.0,422.0
78546,309477,Uber,basic,uberx,38.95,39564.0,1945.0
78547,309477,Uber,premium,uberselect,90.36,39564.0,1945.0
59904,309481,Uber,basic,uberx,22.74,20872.0,1166.0
59907,309481,Uber,premium,uberselect,51.93,20872.0,1166.0
310828,309487,Uber,basic,uberx,15.29,10647.0,1128.0


In [16]:
estimation_set['uber_basic_cost'] = price['basic']
estimation_set['uber_basic_dist'] = dist['basic'] / 5280.0
estimation_set['uber_basic_time'] = time['basic'] / 60.0
estimation_set['uber_premium_cost'] = price['premium']
estimation_set['uber_premium_dist'] = dist['premium'] / 5280.0
estimation_set['uber_premium_time'] = time['premium'] / 60.0
estimation_set['uber_taxi_cost'] = price['taxi']
estimation_set['uber_taxi_dist'] = dist['taxi'] / 5280.0
estimation_set['uber_taxi_time'] = time['taxi'] / 60.0
estimation_set['uber_other_cost'] = price['other']
estimation_set['uber_other_dist'] = dist['other'] / 5280.0
estimation_set['uber_other_time'] = time['other'] / 60.0
estimation_set['timestamp'] = choices.groupby('search_id')['timestamp'].first()

In [17]:
dti = pd.DatetimeIndex(estimation_set['timestamp'])
dti = dti.tz_localize('UTC')
dti_uspac =dti.tz_convert('US/Pacific')
estimation_set['dayofweek'] = dti_uspac.dayofweek
estimation_set['time'] = dti_uspac.time
estimation_set['date'] = dti_uspac.date
estimation_set['hour'] = dti_uspac.hour
estimation_set['late_night'] = 0
estimation_set.loc[estimation_set['hour'].isin([22,23,0,1,2,3]),'late_night'] = 1

In [18]:
estimation_subset = estimation_set[estimation_set['chosen_mode3'].isin(['transit','uber'])]

In [19]:
print estimation_subset.groupby('chosen_mode').count().max(axis=1)
print estimation_subset.groupby(['chosen_mode3']).count().max(axis=1)

chosen_mode
basic        255
premium      202
transit    19189
dtype: int64
chosen_mode3
transit    19189
uber         457
dtype: int64


In [20]:
subset2 = estimation_set[estimation_set['chosen_mode3'].isin(['transit','uber'])]
subset2['walkwait'] = subset2['walk'] + subset2['wait']
subset2['ln_wait'] = np.log(subset2['wait']+1)
subset2['ln_walk'] = np.log(subset2['walk']+1)
subset2['ln_walkwait'] = np.log(subset2['walkwait']+1)
subset2['ln_trn_cost'] = np.log(subset2['transit_cost']+1)
subset2['ln_transit_time'] = np.log(subset2['transit_time']+1)
subset2['ln_uber_basic_time'] = np.log(subset2['uber_basic_time']+1)
subset2['ln_headway'] = np.log(subset2['headway']+1)
subset2['tot_trn_time'] = subset2['walk'] + subset2['wait'] + subset2['transit_time']
subset2['uber_time_save'] = subset2['tot_trn_time'] - subset2['uber_basic_time']
subset2['uber_pct_time_save'] = subset2['uber_time_save'] / subset2['tot_trn_time']
subset2['start_or_end_near_home'] = np.nan
subset2.loc[(subset2['start_is_near_home']==0) & (subset2['end_is_near_home']==0),'start_or_end_near_home'] = 0
subset2.loc[(subset2['start_is_near_home']==1) | (subset2['end_is_near_home']==1),'start_or_end_near_home'] = 1
subset2['start_or_end_near_work'] = np.nan
subset2.loc[(subset2['start_is_near_work']==0) & (subset2['end_is_near_work']==0),'start_or_end_near_work'] = 0
subset2.loc[(subset2['start_is_near_work']==1) | (subset2['end_is_near_work']==1),'start_or_end_near_work'] = 1

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  from ipykernel import kernelapp as app
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas

In [21]:
# --------------------------
# - SERIES 1               - 
# --------------------------
# try everything I think might matter except late_night
# n_transit: Number of transit options presented to the user
# n_arr:     Number of arrivals for the best transit option
# walk:      Walk time (in minutes) of the best transit option
# wait:      Wait time (in minutes) of the best transit option for the first arrival that the user has enough time to walk to
# headway:   Headway estimate based on 3 presented arrival times
# transit_time: IVT for best transit option
# uber_basic_time: IVT for best uberx option (same travel time for any uber service...)
# transit_cost: Fare (USD) for entire transit trip (includes transfers)
# uber_basic_cost: Fare (USD) for uberx (other uber service prices vary, does not include surge)

exog_cols = ['n_transit','n_arr','walk','wait','headway','transit_time','uber_basic_time','transit_cost','uber_basic_cost']
defn = subset2.dropna(subset=exog_cols)
y = defn['chosen_mode3']
x = defn.loc[:,exog_cols]
x['const'] = 1

s2_mdl = st.MNLogit(y, x)
s2_fit = s2_mdl.fit(maxiter=100)
print s2_fit.summary()

Optimization terminated successfully.
         Current function value: 0.105631
         Iterations 8
                          MNLogit Regression Results                          
Dep. Variable:           chosen_mode3   No. Observations:                19641
Model:                        MNLogit   Df Residuals:                    19631
Method:                           MLE   Df Model:                            9
Date:                Wed, 20 Jul 2016   Pseudo R-squ.:                 0.04404
Time:                        17:55:32   Log-Likelihood:                -2074.7
converged:                       True   LL-Null:                       -2170.3
                                        LLR p-value:                 2.363e-36
chosen_mode3=uber       coef    std err          z      P>|z|      [95.0% Conf. Int.]
-------------------------------------------------------------------------------------
n_transit            -0.5788      0.054    -10.705      0.000        -0.685    -0.473
n_arr   

In [22]:
# --------------------------
# - SERIES 1               - 
# --------------------------
# drop n_arr
exog_cols = ['n_transit','walk','wait','headway','transit_time','uber_basic_time','transit_cost','uber_basic_cost']
defn = subset2.dropna(subset=exog_cols)
y = defn['chosen_mode3']
x = defn.loc[:,exog_cols]
x['const'] = 1

s2_mdl = st.MNLogit(y, x)
s2_fit = s2_mdl.fit(maxiter=100)
print s2_fit.summary()

Optimization terminated successfully.
         Current function value: 0.105634
         Iterations 8
                          MNLogit Regression Results                          
Dep. Variable:           chosen_mode3   No. Observations:                19641
Model:                        MNLogit   Df Residuals:                    19632
Method:                           MLE   Df Model:                            8
Date:                Wed, 20 Jul 2016   Pseudo R-squ.:                 0.04402
Time:                        17:55:32   Log-Likelihood:                -2074.8
converged:                       True   LL-Null:                       -2170.3
                                        LLR p-value:                 4.887e-37
chosen_mode3=uber       coef    std err          z      P>|z|      [95.0% Conf. Int.]
-------------------------------------------------------------------------------------
n_transit            -0.5794      0.054    -10.719      0.000        -0.685    -0.473
walk    

In [23]:
# --------------------------
# - SERIES 1               - 
# --------------------------
# drop walk
exog_cols = ['n_transit','wait','headway','transit_time','uber_basic_time','transit_cost','uber_basic_cost']
defn = subset2.dropna(subset=exog_cols)
y = defn['chosen_mode3']
x = defn.loc[:,exog_cols]
x['const'] = 1

s2_mdl = st.MNLogit(y, x)
s2_fit = s2_mdl.fit(maxiter=100)
print s2_fit.summary()

Optimization terminated successfully.
         Current function value: 0.105636
         Iterations 8
                          MNLogit Regression Results                          
Dep. Variable:           chosen_mode3   No. Observations:                19641
Model:                        MNLogit   Df Residuals:                    19633
Method:                           MLE   Df Model:                            7
Date:                Wed, 20 Jul 2016   Pseudo R-squ.:                 0.04399
Time:                        17:55:32   Log-Likelihood:                -2074.8
converged:                       True   LL-Null:                       -2170.3
                                        LLR p-value:                 9.391e-38
chosen_mode3=uber       coef    std err          z      P>|z|      [95.0% Conf. Int.]
-------------------------------------------------------------------------------------
n_transit            -0.5804      0.054    -10.763      0.000        -0.686    -0.475
wait    

In [24]:
# --------------------------
# - SERIES 1               - 
# --------------------------
# drop headway
exog_cols = ['n_transit','wait','transit_time','uber_basic_time','transit_cost','uber_basic_cost']
defn = subset2.dropna(subset=exog_cols)
y = defn['chosen_mode3']
x = defn.loc[:,exog_cols]
x['const'] = 1

s2_mdl = st.MNLogit(y, x)
s2_fit = s2_mdl.fit(maxiter=100)
print s2_fit.summary()

Optimization terminated successfully.
         Current function value: 0.105652
         Iterations 8
                          MNLogit Regression Results                          
Dep. Variable:           chosen_mode3   No. Observations:                19641
Model:                        MNLogit   Df Residuals:                    19634
Method:                           MLE   Df Model:                            6
Date:                Wed, 20 Jul 2016   Pseudo R-squ.:                 0.04385
Time:                        17:55:32   Log-Likelihood:                -2075.1
converged:                       True   LL-Null:                       -2170.3
                                        LLR p-value:                 2.180e-38
chosen_mode3=uber       coef    std err          z      P>|z|      [95.0% Conf. Int.]
-------------------------------------------------------------------------------------
n_transit            -0.5801      0.054    -10.754      0.000        -0.686    -0.474
wait    

In [25]:
# --------------------------
# - SERIES 1               - 
# --------------------------
# drop uber time
exog_cols = ['n_transit','wait','transit_time','transit_cost','uber_basic_cost']
defn = subset2.dropna(subset=exog_cols)
y = defn['chosen_mode3']
x = defn.loc[:,exog_cols]
x['const'] = 1

s2_mdl = st.MNLogit(y, x)
s2_fit = s2_mdl.fit(maxiter=100)
print s2_fit.summary()

Optimization terminated successfully.
         Current function value: 0.105678
         Iterations 8
                          MNLogit Regression Results                          
Dep. Variable:           chosen_mode3   No. Observations:                19641
Model:                        MNLogit   Df Residuals:                    19635
Method:                           MLE   Df Model:                            5
Date:                Wed, 20 Jul 2016   Pseudo R-squ.:                 0.04362
Time:                        17:55:33   Log-Likelihood:                -2075.6
converged:                       True   LL-Null:                       -2170.3
                                        LLR p-value:                 5.468e-39
chosen_mode3=uber       coef    std err          z      P>|z|      [95.0% Conf. Int.]
-------------------------------------------------------------------------------------
n_transit            -0.5738      0.053    -10.730      0.000        -0.679    -0.469
wait    

In [26]:
# --------------------------
# - SERIES 1               - 
# --------------------------
# drop transit_time and uber_cost
exog_cols = ['n_transit','wait','transit_cost']
defn = subset2.dropna(subset=exog_cols)
y = defn['chosen_mode3']
x = defn.loc[:,exog_cols]
x['const'] = 1

s2_mdl = st.MNLogit(y, x)
s2_fit = s2_mdl.fit(maxiter=100)
print s2_fit.summary()

Optimization terminated successfully.
         Current function value: 0.105683
         Iterations 8
                          MNLogit Regression Results                          
Dep. Variable:           chosen_mode3   No. Observations:                19646
Model:                        MNLogit   Df Residuals:                    19642
Method:                           MLE   Df Model:                            3
Date:                Wed, 20 Jul 2016   Pseudo R-squ.:                 0.04338
Time:                        17:55:33   Log-Likelihood:                -2076.2
converged:                       True   LL-Null:                       -2170.4
                                        LLR p-value:                 1.420e-40
chosen_mode3=uber       coef    std err          z      P>|z|      [95.0% Conf. Int.]
-------------------------------------------------------------------------------------
n_transit            -0.5690      0.053    -10.787      0.000        -0.672    -0.466
wait    

In [27]:
# --------------------------
# - SERIES 2               - 
# --------------------------
# start again with everything; add is_near_home and work
exog_cols = ['n_transit','n_arr','walk','wait','headway','transit_time',
             'uber_basic_time','transit_cost','uber_basic_cost',
             'start_is_near_home','end_is_near_home','start_is_near_work','end_is_near_work']
defn = subset2.dropna(subset=exog_cols)
y = defn['chosen_mode3']
x = defn.loc[:,exog_cols]
x['const'] = 1

s2_mdl = st.MNLogit(y, x)
s2_fit = s2_mdl.fit(maxiter=100)
print s2_fit.summary()

Optimization terminated successfully.
         Current function value: 0.106525
         Iterations 8
                          MNLogit Regression Results                          
Dep. Variable:           chosen_mode3   No. Observations:                16026
Model:                        MNLogit   Df Residuals:                    16012
Method:                           MLE   Df Model:                           13
Date:                Wed, 20 Jul 2016   Pseudo R-squ.:                 0.05213
Time:                        17:55:33   Log-Likelihood:                -1707.2
converged:                       True   LL-Null:                       -1801.1
                                        LLR p-value:                 4.387e-33
 chosen_mode3=uber       coef    std err          z      P>|z|      [95.0% Conf. Int.]
--------------------------------------------------------------------------------------
n_transit             -0.5427      0.060     -9.098      0.000        -0.660    -0.426
n_arr

In [28]:
# --------------------------
# - SERIES 2               - 
# --------------------------
# drop walk and n_arr
exog_cols = ['n_transit','wait','headway','transit_time',
             'uber_basic_time','transit_cost','uber_basic_cost',
             'start_is_near_home','end_is_near_home','start_is_near_work','end_is_near_work']
defn = subset2.dropna(subset=exog_cols)
y = defn['chosen_mode3']
x = defn.loc[:,exog_cols]
x['const'] = 1

s2_mdl = st.MNLogit(y, x)
s2_fit = s2_mdl.fit(maxiter=100)
print s2_fit.summary()

Optimization terminated successfully.
         Current function value: 0.106526
         Iterations 8
                          MNLogit Regression Results                          
Dep. Variable:           chosen_mode3   No. Observations:                16026
Model:                        MNLogit   Df Residuals:                    16014
Method:                           MLE   Df Model:                           11
Date:                Wed, 20 Jul 2016   Pseudo R-squ.:                 0.05212
Time:                        17:55:33   Log-Likelihood:                -1707.2
converged:                       True   LL-Null:                       -1801.1
                                        LLR p-value:                 2.576e-34
 chosen_mode3=uber       coef    std err          z      P>|z|      [95.0% Conf. Int.]
--------------------------------------------------------------------------------------
n_transit             -0.5424      0.059     -9.121      0.000        -0.659    -0.426
wait 

In [29]:
# --------------------------
# - SERIES 2               - 
# --------------------------
# drop headway, transit_cost, transit_time
exog_cols = ['n_transit','wait',
             'uber_basic_time','uber_basic_cost',
             'start_is_near_home','end_is_near_home','start_is_near_work','end_is_near_work']
defn = subset2.dropna(subset=exog_cols)
y = defn['chosen_mode3']
x = defn.loc[:,exog_cols]
x['const'] = 1

s2_mdl = st.MNLogit(y, x)
s2_fit = s2_mdl.fit(maxiter=100)
print s2_fit.summary()

Optimization terminated successfully.
         Current function value: 0.106550
         Iterations 8
                          MNLogit Regression Results                          
Dep. Variable:           chosen_mode3   No. Observations:                16026
Model:                        MNLogit   Df Residuals:                    16017
Method:                           MLE   Df Model:                            8
Date:                Wed, 20 Jul 2016   Pseudo R-squ.:                 0.05191
Time:                        17:55:33   Log-Likelihood:                -1707.6
converged:                       True   LL-Null:                       -1801.1
                                        LLR p-value:                 3.524e-36
 chosen_mode3=uber       coef    std err          z      P>|z|      [95.0% Conf. Int.]
--------------------------------------------------------------------------------------
n_transit             -0.5437      0.059     -9.182      0.000        -0.660    -0.428
wait 

In [30]:
# --------------------------
# - SERIES 2               - 
# --------------------------
# drop uber_basic_time
exog_cols = ['n_transit','wait','uber_basic_cost',
             'start_is_near_home','end_is_near_home','start_is_near_work','end_is_near_work']
defn = subset2.dropna(subset=exog_cols)
y = defn['chosen_mode3']
x = defn.loc[:,exog_cols]
x['const'] = 1

s2_mdl = st.MNLogit(y, x)
s2_fit = s2_mdl.fit(maxiter=100)
print s2_fit.summary()

Optimization terminated successfully.
         Current function value: 0.106597
         Iterations 8
                          MNLogit Regression Results                          
Dep. Variable:           chosen_mode3   No. Observations:                16026
Model:                        MNLogit   Df Residuals:                    16018
Method:                           MLE   Df Model:                            7
Date:                Wed, 20 Jul 2016   Pseudo R-squ.:                 0.05149
Time:                        17:55:33   Log-Likelihood:                -1708.3
converged:                       True   LL-Null:                       -1801.1
                                        LLR p-value:                 1.363e-36
 chosen_mode3=uber       coef    std err          z      P>|z|      [95.0% Conf. Int.]
--------------------------------------------------------------------------------------
n_transit             -0.5512      0.059     -9.334      0.000        -0.667    -0.435
wait 

In [31]:
# --------------------------
# - SERIES 2               - 
# --------------------------
# drop uber_basic_cost
exog_cols = ['n_transit','wait',
             'start_is_near_home','end_is_near_home','start_is_near_work','end_is_near_work']
defn = subset2.dropna(subset=exog_cols)
y = defn['chosen_mode3']
x = defn.loc[:,exog_cols]
x['const'] = 1

s2_mdl = st.MNLogit(y, x)
s2_fit = s2_mdl.fit(maxiter=100)
print s2_fit.summary()

Optimization terminated successfully.
         Current function value: 0.106609
         Iterations 8
                          MNLogit Regression Results                          
Dep. Variable:           chosen_mode3   No. Observations:                16031
Model:                        MNLogit   Df Residuals:                    16024
Method:                           MLE   Df Model:                            6
Date:                Wed, 20 Jul 2016   Pseudo R-squ.:                 0.05115
Time:                        17:55:34   Log-Likelihood:                -1709.0
converged:                       True   LL-Null:                       -1801.2
                                        LLR p-value:                 4.217e-37
 chosen_mode3=uber       coef    std err          z      P>|z|      [95.0% Conf. Int.]
--------------------------------------------------------------------------------------
n_transit             -0.5629      0.058     -9.684      0.000        -0.677    -0.449
wait 

In [32]:
# --------------------------
# - SERIES 2               - 
# --------------------------
# drop start_is_near_home
exog_cols = ['n_transit','wait',
             'end_is_near_home','start_is_near_work','end_is_near_work']
defn = subset2.dropna(subset=exog_cols)
y = defn['chosen_mode3']
x = defn.loc[:,exog_cols]
x['const'] = 1

s2_mdl = st.MNLogit(y, x)
s2_fit = s2_mdl.fit(maxiter=100)
print s2_fit.summary()

Optimization terminated successfully.
         Current function value: 0.106682
         Iterations 8
                          MNLogit Regression Results                          
Dep. Variable:           chosen_mode3   No. Observations:                16031
Model:                        MNLogit   Df Residuals:                    16025
Method:                           MLE   Df Model:                            5
Date:                Wed, 20 Jul 2016   Pseudo R-squ.:                 0.05050
Time:                        17:55:34   Log-Likelihood:                -1710.2
converged:                       True   LL-Null:                       -1801.2
                                        LLR p-value:                 2.077e-37
 chosen_mode3=uber       coef    std err          z      P>|z|      [95.0% Conf. Int.]
--------------------------------------------------------------------------------------
n_transit             -0.5587      0.058     -9.630      0.000        -0.672    -0.445
wait 

In [33]:
# --------------------------
# - SERIES 2               - 
# --------------------------
# drop end_is_near_home
exog_cols = ['n_transit','wait',
             'start_is_near_work','end_is_near_work']
defn = subset2.dropna(subset=exog_cols)
y = defn['chosen_mode3']
x = defn.loc[:,exog_cols]
x['const'] = 1

s2_mdl = st.MNLogit(y, x)
s2_fit = s2_mdl.fit(maxiter=100)
print s2_fit.summary()

Optimization terminated successfully.
         Current function value: 0.106315
         Iterations 8
                          MNLogit Regression Results                          
Dep. Variable:           chosen_mode3   No. Observations:                16259
Model:                        MNLogit   Df Residuals:                    16254
Method:                           MLE   Df Model:                            4
Date:                Wed, 20 Jul 2016   Pseudo R-squ.:                 0.04909
Time:                        17:55:34   Log-Likelihood:                -1728.6
converged:                       True   LL-Null:                       -1817.8
                                        LLR p-value:                 1.592e-37
 chosen_mode3=uber       coef    std err          z      P>|z|      [95.0% Conf. Int.]
--------------------------------------------------------------------------------------
n_transit             -0.5597      0.058     -9.692      0.000        -0.673    -0.446
wait 

In [34]:
# --------------------------
# - SERIES 3               - 
# --------------------------
# start again with everything including is_near_home and work.  
# combine walk and wait to walkwait (access time to transit).
# combine start_is_near_home and end_is_near_home into start_or_end_near_home.
# combine start_is_near_work and end_is_near_work into start_or_end_near_work
exog_cols = ['n_transit','n_arr','walkwait','headway','transit_time',
             'uber_basic_time','transit_cost','uber_basic_cost',
             'start_or_end_near_home','start_or_end_near_work']
defn = subset2.dropna(subset=exog_cols)
y = defn['chosen_mode3']
x = defn.loc[:,exog_cols]
x['const'] = 1

s2_mdl = st.MNLogit(y, x)
s2_fit = s2_mdl.fit(maxiter=100)
print s2_fit.summary()

Optimization terminated successfully.
         Current function value: 0.106676
         Iterations 8
                          MNLogit Regression Results                          
Dep. Variable:           chosen_mode3   No. Observations:                16026
Model:                        MNLogit   Df Residuals:                    16015
Method:                           MLE   Df Model:                           10
Date:                Wed, 20 Jul 2016   Pseudo R-squ.:                 0.05079
Time:                        17:55:35   Log-Likelihood:                -1709.6
converged:                       True   LL-Null:                       -1801.1
                                        LLR p-value:                 5.710e-34
     chosen_mode3=uber       coef    std err          z      P>|z|      [95.0% Conf. Int.]
------------------------------------------------------------------------------------------
n_transit                 -0.5447      0.059     -9.174      0.000        -0.661    

In [35]:
# --------------------------
# - SERIES 3               - 
# --------------------------
# drop headway, n_arr
exog_cols = ['n_transit','walkwait','transit_time',
             'uber_basic_time','transit_cost','uber_basic_cost',
             'start_or_end_near_home','start_or_end_near_work']
defn = subset2.dropna(subset=exog_cols)
y = defn['chosen_mode3']
x = defn.loc[:,exog_cols]
x['const'] = 1

s2_mdl = st.MNLogit(y, x)
s2_fit = s2_mdl.fit(maxiter=100)
print s2_fit.summary()

Optimization terminated successfully.
         Current function value: 0.106683
         Iterations 8
                          MNLogit Regression Results                          
Dep. Variable:           chosen_mode3   No. Observations:                16026
Model:                        MNLogit   Df Residuals:                    16017
Method:                           MLE   Df Model:                            8
Date:                Wed, 20 Jul 2016   Pseudo R-squ.:                 0.05073
Time:                        17:55:35   Log-Likelihood:                -1709.7
converged:                       True   LL-Null:                       -1801.1
                                        LLR p-value:                 2.758e-35
     chosen_mode3=uber       coef    std err          z      P>|z|      [95.0% Conf. Int.]
------------------------------------------------------------------------------------------
n_transit                 -0.5442      0.059     -9.167      0.000        -0.661    

In [36]:
# --------------------------
# - SERIES 3               - 
# --------------------------
# drop transit_time, transit_cost
exog_cols = ['n_transit','walkwait',
             'uber_basic_time','uber_basic_cost',
             'start_or_end_near_home','start_or_end_near_work']
defn = subset2.dropna(subset=exog_cols)
y = defn['chosen_mode3']
x = defn.loc[:,exog_cols]
x['const'] = 1

s2_mdl = st.MNLogit(y, x)
s2_fit = s2_mdl.fit(maxiter=100)
print s2_fit.summary()

Optimization terminated successfully.
         Current function value: 0.106699
         Iterations 8
                          MNLogit Regression Results                          
Dep. Variable:           chosen_mode3   No. Observations:                16026
Model:                        MNLogit   Df Residuals:                    16019
Method:                           MLE   Df Model:                            6
Date:                Wed, 20 Jul 2016   Pseudo R-squ.:                 0.05059
Time:                        17:55:35   Log-Likelihood:                -1710.0
converged:                       True   LL-Null:                       -1801.1
                                        LLR p-value:                 1.150e-36
     chosen_mode3=uber       coef    std err          z      P>|z|      [95.0% Conf. Int.]
------------------------------------------------------------------------------------------
n_transit                 -0.5462      0.059     -9.245      0.000        -0.662    

In [37]:
# --------------------------
# - SERIES 3               - 
# --------------------------
# drop uber_basic_time, uber_basic_cost
exog_cols = ['n_transit','walkwait',
             'start_or_end_near_home','start_or_end_near_work']
defn = subset2.dropna(subset=exog_cols)
y = defn['chosen_mode3']
x = defn.loc[:,exog_cols]
x['const'] = 1

s2_mdl = st.MNLogit(y, x)
s2_fit = s2_mdl.fit(maxiter=100)
print s2_fit.summary()

Optimization terminated successfully.
         Current function value: 0.106765
         Iterations 8
                          MNLogit Regression Results                          
Dep. Variable:           chosen_mode3   No. Observations:                16031
Model:                        MNLogit   Df Residuals:                    16026
Method:                           MLE   Df Model:                            4
Date:                Wed, 20 Jul 2016   Pseudo R-squ.:                 0.04976
Time:                        17:55:35   Log-Likelihood:                -1711.6
converged:                       True   LL-Null:                       -1801.2
                                        LLR p-value:                 1.081e-37
     chosen_mode3=uber       coef    std err          z      P>|z|      [95.0% Conf. Int.]
------------------------------------------------------------------------------------------
n_transit                 -0.5659      0.058     -9.758      0.000        -0.680    

In [38]:
# --------------------------
# - SERIES 4               - 
# --------------------------
# Start with SERIES 3
# log the walkwait, headway, transit_time, uber_basic_time
exog_cols = ['n_transit','n_arr','ln_walkwait','ln_headway','ln_transit_time',
             'ln_uber_basic_time','transit_cost','uber_basic_cost',
             'start_or_end_near_home','start_or_end_near_work']
defn = subset2.dropna(subset=exog_cols)
y = defn['chosen_mode3']
x = defn.loc[:,exog_cols]
x['const'] = 1

s2_mdl = st.MNLogit(y, x)
s2_fit = s2_mdl.fit(maxiter=100)
print s2_fit.summary()

Optimization terminated successfully.
         Current function value: 0.106574
         Iterations 8
                          MNLogit Regression Results                          
Dep. Variable:           chosen_mode3   No. Observations:                15921
Model:                        MNLogit   Df Residuals:                    15910
Method:                           MLE   Df Model:                           10
Date:                Wed, 20 Jul 2016   Pseudo R-squ.:                 0.05267
Time:                        17:55:35   Log-Likelihood:                -1696.8
converged:                       True   LL-Null:                       -1791.1
                                        LLR p-value:                 3.696e-35
     chosen_mode3=uber       coef    std err          z      P>|z|      [95.0% Conf. Int.]
------------------------------------------------------------------------------------------
n_transit                 -0.5553      0.060     -9.289      0.000        -0.673    

In [39]:
# --------------------------
# - SERIES 4               - 
# --------------------------
# drop n_arr, headway
exog_cols = ['n_transit','ln_walkwait','ln_transit_time',
             'ln_uber_basic_time','transit_cost','uber_basic_cost',
             'start_or_end_near_home','start_or_end_near_work']
defn = subset2.dropna(subset=exog_cols)
y = defn['chosen_mode3']
x = defn.loc[:,exog_cols]
x['const'] = 1

s2_mdl = st.MNLogit(y, x)
s2_fit = s2_mdl.fit(maxiter=100)
print s2_fit.summary()

Optimization terminated successfully.
         Current function value: 0.106509
         Iterations 8
                          MNLogit Regression Results                          
Dep. Variable:           chosen_mode3   No. Observations:                15977
Model:                        MNLogit   Df Residuals:                    15968
Method:                           MLE   Df Model:                            8
Date:                Wed, 20 Jul 2016   Pseudo R-squ.:                 0.05259
Time:                        17:55:36   Log-Likelihood:                -1701.7
converged:                       True   LL-Null:                       -1796.2
                                        LLR p-value:                 1.371e-36
     chosen_mode3=uber       coef    std err          z      P>|z|      [95.0% Conf. Int.]
------------------------------------------------------------------------------------------
n_transit                 -0.5534      0.060     -9.275      0.000        -0.670    

In [40]:
# --------------------------
# - SERIES 4               - 
# --------------------------
# drop uber_basic_time, transit_cost
exog_cols = ['n_transit','ln_walkwait','ln_transit_time',
             'uber_basic_cost',
             'start_or_end_near_home','start_or_end_near_work']
defn = subset2.dropna(subset=exog_cols)
y = defn['chosen_mode3']
x = defn.loc[:,exog_cols]
x['const'] = 1

s2_mdl = st.MNLogit(y, x)
s2_fit = s2_mdl.fit(maxiter=100)
print s2_fit.summary()

Optimization terminated successfully.
         Current function value: 0.106521
         Iterations 8
                          MNLogit Regression Results                          
Dep. Variable:           chosen_mode3   No. Observations:                15977
Model:                        MNLogit   Df Residuals:                    15970
Method:                           MLE   Df Model:                            6
Date:                Wed, 20 Jul 2016   Pseudo R-squ.:                 0.05249
Time:                        17:55:36   Log-Likelihood:                -1701.9
converged:                       True   LL-Null:                       -1796.2
                                        LLR p-value:                 5.147e-38
     chosen_mode3=uber       coef    std err          z      P>|z|      [95.0% Conf. Int.]
------------------------------------------------------------------------------------------
n_transit                 -0.5496      0.059     -9.278      0.000        -0.666    

In [41]:
# --------------------------
# - SERIES 4               - 
# --------------------------
# replace ln_walkwait with walkwait
exog_cols = ['n_transit','ln_walkwait',
             'start_or_end_near_home','start_or_end_near_work']
defn = subset2.dropna(subset=exog_cols)
y = defn['chosen_mode3']
x = defn.loc[:,exog_cols]
x['const'] = 1

s2_mdl = st.MNLogit(y, x)
s2_fit = s2_mdl.fit(maxiter=100)
print s2_fit.summary()

Optimization terminated successfully.
         Current function value: 0.106640
         Iterations 8
                          MNLogit Regression Results                          
Dep. Variable:           chosen_mode3   No. Observations:                15982
Model:                        MNLogit   Df Residuals:                    15977
Method:                           MLE   Df Model:                            4
Date:                Wed, 20 Jul 2016   Pseudo R-squ.:                 0.05120
Time:                        17:55:36   Log-Likelihood:                -1704.3
converged:                       True   LL-Null:                       -1796.3
                                        LLR p-value:                 1.064e-38
     chosen_mode3=uber       coef    std err          z      P>|z|      [95.0% Conf. Int.]
------------------------------------------------------------------------------------------
n_transit                 -0.5744      0.058     -9.908      0.000        -0.688    

In [42]:
# --------------------------
# - SERIES 5               - 
# --------------------------
# Start with SERIES 4
# add late_night
exog_cols = ['n_transit','n_arr','ln_walkwait','ln_headway','ln_transit_time',
             'ln_uber_basic_time','transit_cost','uber_basic_cost',
             'start_or_end_near_home','start_or_end_near_work','late_night']
defn = subset2.dropna(subset=exog_cols)
y = defn['chosen_mode3']
x = defn.loc[:,exog_cols]
x['const'] = 1

s2_mdl = st.MNLogit(y, x)
s2_fit = s2_mdl.fit(maxiter=100)
print s2_fit.summary()

Optimization terminated successfully.
         Current function value: 0.106372
         Iterations 8
                          MNLogit Regression Results                          
Dep. Variable:           chosen_mode3   No. Observations:                15921
Model:                        MNLogit   Df Residuals:                    15909
Method:                           MLE   Df Model:                           11
Date:                Wed, 20 Jul 2016   Pseudo R-squ.:                 0.05447
Time:                        17:55:36   Log-Likelihood:                -1693.5
converged:                       True   LL-Null:                       -1791.1
                                        LLR p-value:                 7.661e-36
     chosen_mode3=uber       coef    std err          z      P>|z|      [95.0% Conf. Int.]
------------------------------------------------------------------------------------------
n_transit                 -0.5496      0.060     -9.197      0.000        -0.667    

In [43]:
# --------------------------
# - SERIES 5               - 
# --------------------------
# drop n_arr, headeay
exog_cols = ['n_transit','ln_walkwait','ln_transit_time',
             'ln_uber_basic_time','transit_cost','uber_basic_cost',
             'start_or_end_near_home','start_or_end_near_work','late_night']
defn = subset2.dropna(subset=exog_cols)
y = defn['chosen_mode3']
x = defn.loc[:,exog_cols]
x['const'] = 1

s2_mdl = st.MNLogit(y, x)
s2_fit = s2_mdl.fit(maxiter=100)
print s2_fit.summary()

Optimization terminated successfully.
         Current function value: 0.106316
         Iterations 8
                          MNLogit Regression Results                          
Dep. Variable:           chosen_mode3   No. Observations:                15977
Model:                        MNLogit   Df Residuals:                    15967
Method:                           MLE   Df Model:                            9
Date:                Wed, 20 Jul 2016   Pseudo R-squ.:                 0.05431
Time:                        17:55:36   Log-Likelihood:                -1698.6
converged:                       True   LL-Null:                       -1796.2
                                        LLR p-value:                 3.537e-37
     chosen_mode3=uber       coef    std err          z      P>|z|      [95.0% Conf. Int.]
------------------------------------------------------------------------------------------
n_transit                 -0.5482      0.060     -9.194      0.000        -0.665    

In [44]:
# --------------------------
# - SERIES 5               - 
# --------------------------
# drop uber_basic_time, transit_cost
exog_cols = ['n_transit','ln_walkwait','ln_transit_time',
             'uber_basic_cost',
             'start_or_end_near_home','start_or_end_near_work','late_night']
defn = subset2.dropna(subset=exog_cols)
y = defn['chosen_mode3']
x = defn.loc[:,exog_cols]
x['const'] = 1

s2_mdl = st.MNLogit(y, x)
s2_fit = s2_mdl.fit(maxiter=100)
print s2_fit.summary()

Optimization terminated successfully.
         Current function value: 0.106338
         Iterations 8
                          MNLogit Regression Results                          
Dep. Variable:           chosen_mode3   No. Observations:                15977
Model:                        MNLogit   Df Residuals:                    15969
Method:                           MLE   Df Model:                            7
Date:                Wed, 20 Jul 2016   Pseudo R-squ.:                 0.05411
Time:                        17:55:37   Log-Likelihood:                -1699.0
converged:                       True   LL-Null:                       -1796.2
                                        LLR p-value:                 1.761e-38
     chosen_mode3=uber       coef    std err          z      P>|z|      [95.0% Conf. Int.]
------------------------------------------------------------------------------------------
n_transit                 -0.5426      0.059     -9.166      0.000        -0.659    

In [45]:
# --------------------------
# - SERIES 5               - 
# --------------------------
# drop transit_time, uber_basic_cost
exog_cols = ['n_transit','ln_walkwait',
             'start_or_end_near_home','start_or_end_near_work','late_night']
defn = subset2.dropna(subset=exog_cols)
y = defn['chosen_mode3']
x = defn.loc[:,exog_cols]
x['const'] = 1

s2_mdl = st.MNLogit(y, x)
s2_fit = s2_mdl.fit(maxiter=100)
print s2_fit.summary()

Optimization terminated successfully.
         Current function value: 0.106467
         Iterations 8
                          MNLogit Regression Results                          
Dep. Variable:           chosen_mode3   No. Observations:                15982
Model:                        MNLogit   Df Residuals:                    15976
Method:                           MLE   Df Model:                            5
Date:                Wed, 20 Jul 2016   Pseudo R-squ.:                 0.05274
Time:                        17:55:37   Log-Likelihood:                -1701.5
converged:                       True   LL-Null:                       -1796.3
                                        LLR p-value:                 5.073e-39
     chosen_mode3=uber       coef    std err          z      P>|z|      [95.0% Conf. Int.]
------------------------------------------------------------------------------------------
n_transit                 -0.5681      0.058     -9.803      0.000        -0.682    

In [46]:
# --------------------------
# - SERIES 6               - 
# --------------------------
# start with SERIES 3, add late_night
exog_cols = ['n_transit','n_arr','walkwait','headway','transit_time',
             'uber_basic_time','transit_cost','uber_basic_cost',
             'start_or_end_near_home','start_or_end_near_work','late_night']
defn = subset2.dropna(subset=exog_cols)
y = defn['chosen_mode3']
x = defn.loc[:,exog_cols]
x['const'] = 1

s2_mdl = st.MNLogit(y, x)
s2_fit = s2_mdl.fit(maxiter=100)
print s2_fit.summary()

Optimization terminated successfully.
         Current function value: 0.106499
         Iterations 8
                          MNLogit Regression Results                          
Dep. Variable:           chosen_mode3   No. Observations:                16026
Model:                        MNLogit   Df Residuals:                    16014
Method:                           MLE   Df Model:                           11
Date:                Wed, 20 Jul 2016   Pseudo R-squ.:                 0.05236
Time:                        17:55:37   Log-Likelihood:                -1706.8
converged:                       True   LL-Null:                       -1801.1
                                        LLR p-value:                 1.708e-34
     chosen_mode3=uber       coef    std err          z      P>|z|      [95.0% Conf. Int.]
------------------------------------------------------------------------------------------
n_transit                 -0.5396      0.059     -9.088      0.000        -0.656    

In [47]:
# --------------------------
# - SERIES 6               - 
# --------------------------
# drop n_arr, headway
exog_cols = ['n_transit','walkwait','transit_time',
             'uber_basic_time','transit_cost','uber_basic_cost',
             'start_or_end_near_home','start_or_end_near_work','late_night']
defn = subset2.dropna(subset=exog_cols)
y = defn['chosen_mode3']
x = defn.loc[:,exog_cols]
x['const'] = 1

s2_mdl = st.MNLogit(y, x)
s2_fit = s2_mdl.fit(maxiter=100)
print s2_fit.summary()

Optimization terminated successfully.
         Current function value: 0.106500
         Iterations 8
                          MNLogit Regression Results                          
Dep. Variable:           chosen_mode3   No. Observations:                16026
Model:                        MNLogit   Df Residuals:                    16016
Method:                           MLE   Df Model:                            9
Date:                Wed, 20 Jul 2016   Pseudo R-squ.:                 0.05235
Time:                        17:55:37   Log-Likelihood:                -1706.8
converged:                       True   LL-Null:                       -1801.1
                                        LLR p-value:                 8.197e-36
     chosen_mode3=uber       coef    std err          z      P>|z|      [95.0% Conf. Int.]
------------------------------------------------------------------------------------------
n_transit                 -0.5394      0.059     -9.088      0.000        -0.656    

In [48]:
# --------------------------
# - SERIES 6               - 
# --------------------------
# drop transit_time, transit_cost
exog_cols = ['n_transit','walkwait',
             'uber_basic_time','uber_basic_cost',
             'start_or_end_near_home','start_or_end_near_work','late_night']
defn = subset2.dropna(subset=exog_cols)
y = defn['chosen_mode3']
x = defn.loc[:,exog_cols]
x['const'] = 1

s2_mdl = st.MNLogit(y, x)
s2_fit = s2_mdl.fit(maxiter=100)
print s2_fit.summary()

Optimization terminated successfully.
         Current function value: 0.106524
         Iterations 8
                          MNLogit Regression Results                          
Dep. Variable:           chosen_mode3   No. Observations:                16026
Model:                        MNLogit   Df Residuals:                    16018
Method:                           MLE   Df Model:                            7
Date:                Wed, 20 Jul 2016   Pseudo R-squ.:                 0.05214
Time:                        17:55:37   Log-Likelihood:                -1707.1
converged:                       True   LL-Null:                       -1801.1
                                        LLR p-value:                 4.330e-37
     chosen_mode3=uber       coef    std err          z      P>|z|      [95.0% Conf. Int.]
------------------------------------------------------------------------------------------
n_transit                 -0.5421      0.059     -9.175      0.000        -0.658    

In [88]:
# --------------------------
# - SERIES 6               - 
# --------------------------
# drop uber_basic_time, uber_basic_cost
exog_cols = ['n_transit','walkwait',
             'start_or_end_near_home','start_or_end_near_work','late_night']
defn = subset2.dropna(subset=exog_cols)
y = defn['chosen_mode3']
x = defn.loc[:,exog_cols]
x['const'] = 1

s2_mdl = st.MNLogit(y, x)
s2_fit = s2_mdl.fit(maxiter=100)
print s2_fit.summary()

Optimization terminated successfully.
         Current function value: 0.106564
         Iterations 8
                          MNLogit Regression Results                          
Dep. Variable:           chosen_mode3   No. Observations:                16031
Model:                        MNLogit   Df Residuals:                    16025
Method:                           MLE   Df Model:                            5
Date:                Wed, 20 Jul 2016   Pseudo R-squ.:                 0.05155
Time:                        18:05:24   Log-Likelihood:                -1708.3
converged:                       True   LL-Null:                       -1801.2
                                        LLR p-value:                 3.235e-38
     chosen_mode3=uber       coef    std err          z      P>|z|      [95.0% Conf. Int.]
------------------------------------------------------------------------------------------
n_transit                 -0.5596      0.058     -9.654      0.000        -0.673    

In [50]:
# FINAL MODELS:
# 1.
exog_cols = ['n_transit','wait','transit_cost']
defn = subset2.dropna(subset=exog_cols)
y = defn['chosen_mode3']; x = defn.loc[:,exog_cols]; x['const'] = 1
s1_mdl = st.MNLogit(y, x) 
s1_fit = s1_mdl.fit(maxiter=100)
# 2.
exog_cols = ['n_transit','wait',
             'start_is_near_work','end_is_near_work']
defn = subset2.dropna(subset=exog_cols)
y = defn['chosen_mode3']; x = defn.loc[:,exog_cols]; x['const'] = 1
s2_mdl = st.MNLogit(y, x) 
s2_fit = s2_mdl.fit(maxiter=100)
# 3.
exog_cols = ['n_transit','walkwait',
             'start_or_end_near_home','start_or_end_near_work']
defn = subset2.dropna(subset=exog_cols)
y = defn['chosen_mode3']; x = defn.loc[:,exog_cols]; x['const'] = 1
s3_mdl = st.MNLogit(y, x) 
s3_fit = s3_mdl.fit(maxiter=100)
# 4.
exog_cols = ['n_transit','ln_walkwait',
             'start_or_end_near_home','start_or_end_near_work']
defn = subset2.dropna(subset=exog_cols)
y = defn['chosen_mode3']; x = defn.loc[:,exog_cols]; x['const'] = 1
s4_mdl = st.MNLogit(y, x) 
s4_fit = s4_mdl.fit(maxiter=100)
# 5.
exog_cols = ['n_transit','ln_walkwait',
             'start_or_end_near_home','start_or_end_near_work','late_night']
defn = subset2.dropna(subset=exog_cols)
y = defn['chosen_mode3']; x = defn.loc[:,exog_cols]; x['const'] = 1
s5_mdl = st.MNLogit(y, x) 
s5_fit = s5_mdl.fit(maxiter=100)
# 6.
exog_cols = ['n_transit','walkwait',
             'start_or_end_near_home','start_or_end_near_work','late_night']
defn = subset2.dropna(subset=exog_cols)
y = defn['chosen_mode3']; x = defn.loc[:,exog_cols]; x['const'] = 1
s6_mdl = st.MNLogit(y, x) 
s6_fit = s6_mdl.fit(maxiter=100)

print "SERIES 1 FINAL MODEL"
print s1_fit.summary()
print "SERIES 2 FINAL MODEL"
print s2_fit.summary()
print "SERIES 3 FINAL MODEL"
print s3_fit.summary()
print "SERIES 4 FINAL MODEL"
print s4_fit.summary()
print "SERIES 5 FINAL MODEL"
print s5_fit.summary()
print "SERIES 6 FINAL MODEL"
print s6_fit.summary()

Optimization terminated successfully.
         Current function value: 0.105683
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.106315
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.106765
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.106640
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.106467
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.106564
         Iterations 8
SERIES 1 FINAL MODEL
                          MNLogit Regression Results                          
Dep. Variable:           chosen_mode3   No. Observations:                19646
Model:                        MNLogit   Df Residuals:                    19642
Method:                           MLE   Df Model:                            3
Date:                Wed, 20 Jul 2016   Pseudo R-sq

In [97]:
# --------------------------
# - SERIES 7               - 
# --------------------------
# Start with n_transit and late_night
# added variables one at a time, and only kept them if all variables remained significant
exog_cols = ['n_transit','late_night','transit_time','wait','start_or_end_near_home']
defn = subset2.dropna(subset=exog_cols)
y = defn['chosen_mode3']
x = defn.loc[:,exog_cols]
x['const'] = 1

s2_mdl = st.MNLogit(y, x)
s2_fit = s2_mdl.fit(maxiter=100)
print s2_fit.summary()

Optimization terminated successfully.
         Current function value: 0.106366
         Iterations 8
                          MNLogit Regression Results                          
Dep. Variable:           chosen_mode3   No. Observations:                18207
Model:                        MNLogit   Df Residuals:                    18201
Method:                           MLE   Df Model:                            5
Date:                Thu, 21 Jul 2016   Pseudo R-squ.:                 0.04687
Time:                        08:19:55   Log-Likelihood:                -1936.6
converged:                       True   LL-Null:                       -2031.8
                                        LLR p-value:                 3.080e-39
     chosen_mode3=uber       coef    std err          z      P>|z|      [95.0% Conf. Int.]
------------------------------------------------------------------------------------------
n_transit                 -0.5647      0.054    -10.364      0.000        -0.671    

In [86]:
# --------------------------
# - SERIES 7               - 
# --------------------------
# based mainly on Uber attributes
exog_cols = ['late_night','start_or_end_near_work']
defn = subset2.dropna(subset=exog_cols)
y = defn['chosen_mode3']
x = defn.loc[:,exog_cols]
x['const'] = 1

s2_mdl = st.MNLogit(y, x)
s2_fit = s2_mdl.fit(maxiter=500)
print s2_fit.summary()

Optimization terminated successfully.
         Current function value: 0.110295
         Iterations 8
                          MNLogit Regression Results                          
Dep. Variable:           chosen_mode3   No. Observations:                16254
Model:                        MNLogit   Df Residuals:                    16249
Method:                           MLE   Df Model:                            4
Date:                Wed, 20 Jul 2016   Pseudo R-squ.:                 0.01372
Time:                        18:04:51   Log-Likelihood:                -1792.7
converged:                       True   LL-Null:                       -1817.7
                                        LLR p-value:                 3.798e-10
     chosen_mode3=uber       coef    std err          z      P>|z|      [95.0% Conf. Int.]
------------------------------------------------------------------------------------------
late_night                 0.6245      0.190      3.289      0.001         0.252    