# PROJECT:

In [15]:
import pandas as pd
import biogeme.database as db
import biogeme.biogeme as bio
from biogeme.expressions import Beta, Variable
from biogeme import models
from biogeme import results as res

In [16]:
df = pd.read_table('lpmc11.dat')
database = db.Database('lpmc', df)
df

Unnamed: 0,trip_id,household_id,person_n,trip_n,travel_mode,purpose,fueltype,faretype,bus_scale,survey_year,...,dur_pt_access,dur_pt_rail,dur_pt_bus,dur_pt_int,pt_interchanges,dur_driving,cost_transit,cost_driving_fuel,cost_driving_ccharge,driving_traffic_percent
0,1,0,0,1,4,3,1,1,1.0,1,...,0.109444,0.000000,0.055556,0.000000,0,0.059444,1.5,0.15,0.0,0.112150
1,13,1,1,1,4,3,1,5,0.0,1,...,0.241389,0.000000,0.122222,0.000000,0,0.132222,0.0,0.50,0.0,0.065126
2,19,4,0,1,3,3,6,5,0.0,1,...,0.222500,0.000000,0.312222,0.000000,0,0.221667,0.0,0.56,0.0,0.086466
3,20,5,1,0,4,3,1,5,0.0,1,...,0.381667,0.000000,0.062222,0.000000,0,0.117222,0.0,0.41,0.0,0.097156
4,39,9,2,0,4,3,1,5,0.0,1,...,0.146944,0.000000,0.225000,0.000000,0,0.200833,0.0,0.48,0.0,0.378976
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4995,81035,17603,1,1,3,1,1,1,1.0,3,...,0.278056,0.216667,0.000000,0.000000,0,0.392222,2.4,0.98,0.0,0.547450
4996,81040,17604,2,0,3,1,1,2,0.0,3,...,0.264444,0.000000,0.353333,0.176667,1,0.288889,0.0,0.85,0.0,0.175000
4997,81045,17605,0,1,4,3,1,5,0.0,3,...,0.128889,0.000000,0.045833,0.000000,0,0.067778,0.0,0.17,0.0,0.024590
4998,81066,17608,0,3,3,3,6,1,1.0,3,...,0.092222,0.000000,0.389444,0.000000,0,0.193333,1.5,0.61,0.0,0.485632


# Model 0

**Variables:**

In [17]:
dur_walking = Variable('dur_walking') #time for walking
dur_cycling = Variable('dur_cycling') #time for cycling

dur_pt_access = Variable('dur_pt_access')
dur_pt_rail = Variable('dur_pt_rail')
dur_pt_bus = Variable('dur_pt_bus')
dur_pt_int = Variable('dur_pt_int')

dur_pt = dur_pt_access + dur_pt_rail + dur_pt_bus + dur_pt_int #time for public transport
dur_driving = Variable('dur_driving')

cost_transit = Variable('cost_transit') #cost for public transport
cost_driving_fuel = Variable('cost_driving_fuel')
cost_driving_ccharge = Variable('cost_driving_ccharge')

cost_driving = cost_driving_fuel + cost_driving_ccharge #cost for driving

travel_mode = Variable('travel_mode') #Choice

**Parameters:**

In [18]:
constant_walking = Beta('constant_walking', 0, None, None, 0)
constant_cycling = Beta('constant_cycling', 0, None, None, 0)
constant_pt = Beta('constant_pt', 0, None, None, 0)
beta_cost = Beta('beta_cost', 0, None, None, 0)
beta_time = Beta('beta_time', 0, None, None, 0)

In [19]:
Opt1_Model0 = (
    constant_walking
    + beta_time * dur_walking
)

Opt2_Model0 = (
    constant_cycling
    + beta_time * dur_cycling
)

Opt3_Model0 = (
    constant_pt
    + beta_cost * cost_transit
    + beta_time * dur_pt
)

Opt4_Model0 = (
    beta_cost * cost_driving
    + beta_time * dur_driving
)

In [20]:
V_Model0 = {1: Opt1_Model0, 2: Opt2_Model0, 3: Opt3_Model0, 4: Opt4_Model0}
logprob_Model0 = models.loglogit(V_Model0, None, travel_mode)
biogeme_Model0 = bio.BIOGEME(database, logprob_Model0)
biogeme_Model0.modelName = 'logit_Model0'
res_Model0 = biogeme_Model0.estimate()

In [21]:
print(res_Model0.printGeneralStatistics())

Number of estimated parameters:	5
Sample size:	5000
Excluded observations:	0
Init log likelihood:	-4676.875
Final log likelihood:	-4676.875
Likelihood ratio test for the init. model:	-0
Rho-square for the init. model:	0
Rho-square-bar for the init. model:	-0.00107
Akaike Information Criterion:	9363.751
Bayesian Information Criterion:	9396.337
Final gradient norm:	2.6771E-04
Nbr of threads:	16



In [22]:
res_Model0.getEstimatedParameters()

Unnamed: 0,Value,Rob. Std err,Rob. t-test,Rob. p-value
beta_cost,-0.156585,0.012667,-12.361233,0.0
beta_time,-5.226955,0.191905,-27.237207,0.0
constant_cycling,-2.485329,0.086252,-28.814839,0.0
constant_pt,0.726783,0.047157,15.411971,0.0
constant_walking,1.187012,0.078799,15.063738,0.0


# Model 1

In [23]:
constant_walking = Beta('constant_walking', 0, None, None, 0)
constant_cycling = Beta('constant_cycling', 0, None, None, 0)
constant_pt = Beta('constant_pt', 0, None, None, 0)
beta_cost = Beta('beta_cost', 0, None, None, 0)
beta_time_walking = Beta('beta_time_walking', 0, None, None, 0)
beta_time_cycling = Beta('beta_time_cycling', 0, None, None, 0)
beta_time_pt = Beta('beta_time_pt', 0, None, None, 0)
beta_time_driving = Beta('beta_time_driving', 0, None, None, 0)

In [24]:
Opt1_Model1 = (
    constant_walking
    + beta_time_walking * dur_walking
)

Opt2_Model1 = (
    constant_cycling
    + beta_time_cycling * dur_cycling
)

Opt3_Model1 = (
    constant_pt
    + beta_cost * cost_transit
    + beta_time_pt * dur_pt
)

Opt4_Model1 = (
    beta_cost * cost_driving
    + beta_time_driving * dur_driving
)

In [25]:
V_Model1 = {1: Opt1_Model1, 2: Opt2_Model1, 3: Opt3_Model1, 4: Opt4_Model1}
logprob_Model1 = models.loglogit(V_Model1, None, travel_mode)
biogeme_Model1 = bio.BIOGEME(database, logprob_Model1)
biogeme_Model1.modelName = 'logit_Model1'
res_Model1 = biogeme_Model1.estimate()

In [26]:
print(res_Model1.printGeneralStatistics())

Number of estimated parameters:	8
Sample size:	5000
Excluded observations:	0
Init log likelihood:	-4372.911
Final log likelihood:	-4372.911
Likelihood ratio test for the init. model:	5.455222e-07
Rho-square for the init. model:	6.24e-11
Rho-square-bar for the init. model:	-0.00183
Akaike Information Criterion:	8761.822
Bayesian Information Criterion:	8813.959
Final gradient norm:	2.0215E-02
Nbr of threads:	16



In [27]:
res_Model1.getEstimatedParameters()

Unnamed: 0,Value,Rob. Std err,Rob. t-test,Rob. p-value
beta_cost,-0.136284,0.013922,-9.789202,0.0
beta_time_cycling,-5.435232,0.49287,-11.02773,0.0
beta_time_driving,-5.792767,0.384527,-15.064659,0.0
beta_time_pt,-3.126067,0.243833,-12.820516,0.0
beta_time_walking,-8.160375,0.386256,-21.126839,0.0
constant_cycling,-2.560479,0.150686,-16.992175,0.0
constant_pt,-0.406022,0.069442,-5.846891,5.008459e-09
constant_walking,1.933296,0.129137,14.970911,0.0


In [29]:
res_Model1.likelihood_ratio_test(res_Model0, 0.01)

LRTuple(message='H0 can be rejected at level 1.0%', statistic=607.9289532954572, threshold=11.344866730144373)

# Model 2:

- Interaction of age with ASCs:

In [30]:
age = Variable('age')

In [33]:
constant_walking = Beta('constant_walking', 0, None, None, 0)
constant_cycling = Beta('constant_cycling', 0, None, None, 0)
constant_pt = Beta('constant_pt', 0, None, None, 0)
beta_cost = Beta('beta_cost', 0, None, None, 0)
beta_time_walking = Beta('beta_time_walking', 0, None, None, 0)
beta_time_cycling = Beta('beta_time_cycling', 0, None, None, 0)
beta_time_pt = Beta('beta_time_pt', 0, None, None, 0)
beta_time_driving = Beta('beta_time_driving', 0, None, None, 0)

interacted_constant_walking = constant_walking * age
interacted_constant_cycling = constant_cycling * age
interacted_constant_pt = constant_pt * age

In [36]:
Opt1_Model2_ASC = (
    interacted_constant_walking
    + beta_time_walking * dur_walking
)

Opt2_Model2_ASC = (
    interacted_constant_cycling
    + beta_time_cycling * dur_cycling
)

Opt3_Model2_ASC = (
    interacted_constant_pt
    + beta_cost * cost_transit
    + beta_time_pt * dur_pt
)

Opt4_Model2_ASC = (
    beta_cost * cost_driving
    + beta_time_driving * dur_driving
)

In [37]:
V_Model2_ASC = {1: Opt1_Model2_ASC, 2: Opt2_Model2_ASC, 3: Opt3_Model2_ASC, 4: Opt4_Model2_ASC}
logprob_Model2_ASC = models.loglogit(V_Model2_ASC, None, travel_mode)
biogeme_Model2_ASC = bio.BIOGEME(database, logprob_Model2_ASC)
biogeme_Model2_ASC.modelName = 'logit_Model2_ASC'
res_Model2_ASC = biogeme_Model2_ASC.estimate()

In [38]:
print(res_Model2_ASC.printGeneralStatistics())

Number of estimated parameters:	8
Sample size:	5000
Excluded observations:	0
Init log likelihood:	-6931.472
Final log likelihood:	-4615.576
Likelihood ratio test for the init. model:	4631.793
Rho-square for the init. model:	0.334
Rho-square-bar for the init. model:	0.333
Akaike Information Criterion:	9247.151
Bayesian Information Criterion:	9299.289
Final gradient norm:	2.0723E-02
Nbr of threads:	16



In [39]:
res_Model2_ASC.getEstimatedParameters()

Unnamed: 0,Value,Rob. Std err,Rob. t-test,Rob. p-value
beta_cost,-0.150735,0.014564,-10.349605,0.0
beta_time_cycling,-8.102043,0.665425,-12.175743,0.0
beta_time_driving,-5.750553,0.391533,-14.687289,0.0
beta_time_pt,-3.123072,0.24445,-12.775895,0.0
beta_time_walking,-5.264886,0.197646,-26.637894,0.0
constant_cycling,-0.049277,0.003781,-13.031048,0.0
constant_pt,-0.010085,0.001345,-7.499115,6.417089e-14
constant_walking,0.015473,0.001602,9.659048,0.0


Need to find a test! (Probably a Cox test would be ok)

- Interaction of age with time:

In [40]:
constant_walking = Beta('constant_walking', 0, None, None, 0)
constant_cycling = Beta('constant_cycling', 0, None, None, 0)
constant_pt = Beta('constant_pt', 0, None, None, 0)
beta_cost = Beta('beta_cost', 0, None, None, 0)
beta_time_walking = Beta('beta_time_walking', 0, None, None, 0)
beta_time_cycling = Beta('beta_time_cycling', 0, None, None, 0)
beta_time_pt = Beta('beta_time_pt', 0, None, None, 0)
beta_time_driving = Beta('beta_time_driving', 0, None, None, 0)

int_beta_time_walking = beta_time_walking * age
int_beta_time_cycling = beta_time_cycling * age
int_beta_time_pt = beta_time_pt * age
int_beta_time_driving = beta_time_driving * age

In [41]:
Opt1_Model2_time = (
    constant_walking
    + int_beta_time_walking * dur_walking
)

Opt2_Model2_time = (
    constant_cycling
    + int_beta_time_cycling * dur_cycling
)

Opt3_Model2_time= (
    constant_pt
    + beta_cost * cost_transit
    + int_beta_time_pt * dur_pt
)

Opt4_Model2_time = (
    beta_cost * cost_driving
    + int_beta_time_driving * dur_driving
)

In [42]:
V_Model2_time = {1: Opt1_Model2_time, 2: Opt2_Model2_time, 3: Opt3_Model2_time, 4: Opt4_Model2_time}
logprob_Model2_time = models.loglogit(V_Model2_time, None, travel_mode)
biogeme_Model2_time = bio.BIOGEME(database, logprob_Model2_time)
biogeme_Model2_time.modelName = 'logit_Model2_time'
res_Model2_time = biogeme_Model2_time.estimate()

In [44]:
print(res_Model2_time.printGeneralStatistics())

Number of estimated parameters:	8
Sample size:	5000
Excluded observations:	0
Init log likelihood:	-6931.472
Final log likelihood:	-4608.252
Likelihood ratio test for the init. model:	4646.44
Rho-square for the init. model:	0.335
Rho-square-bar for the init. model:	0.334
Akaike Information Criterion:	9232.503
Bayesian Information Criterion:	9284.641
Final gradient norm:	1.7907E-03
Nbr of threads:	16



In [45]:
res_Model2_time.getEstimatedParameters()

Unnamed: 0,Value,Rob. Std err,Rob. t-test,Rob. p-value
beta_cost,-0.135695,0.013238,-10.25061,0.0
beta_time_cycling,-0.143847,0.012788,-11.248969,0.0
beta_time_driving,-0.147227,0.011303,-13.02606,0.0
beta_time_pt,-0.093446,0.006574,-14.214623,0.0
beta_time_walking,-0.148702,0.007218,-20.60078,0.0
constant_cycling,-2.496561,0.140597,-17.756801,0.0
constant_pt,-0.122893,0.059507,-2.065189,0.038905
constant_walking,0.927586,0.091251,10.165217,0.0
