In [1]:
import os
import shutil
import pandas as pd
import biogeme.biogeme as bio
from biogeme.models import loglogit
import pycmtensor as cmt
from pycmtensor.expressions import Beta

In [2]:
nb_path = os.path.abspath("")
df_rp = pd.read_csv("data/model_average_RP.csv")
df_rp.columns = df_rp.columns.str.upper()

df_rp["PURPOSE_WORK"] = 0
df_rp["PURPOSE_HOME"] = 0
df_rp["PURPOSE_LEISURE"] = 0
df_rp.loc[df_rp["PURPOSE"].str.split(" ", expand=True)[0] == "Work", "PURPOSE_WORK"] = 1
df_rp.loc[df_rp["PURPOSE"].str.split(" ", expand=True)[0] == "Home", "PURPOSE_HOME"] = 1
df_rp.loc[
    df_rp["PURPOSE"].isin(
        [
            "Cinema or other night out",
            "Clothes shopping",
            "Family Activity",
            "Leisure Other",
            "Museum/cultural",
            "Social",
            "Restaurant",
            "Sports activity",
            "Shopping - Major",
        ]
    ),
    "PURPOSE_LEISURE",
] = 1

df_rp["WEEKEND"] = 0
df_rp.loc[df_rp["DAY_OF_THE_WEEK_START"].isin(["Saturday", "Sunday"]), "WEEKEND"] = 1
df_rp.drop(["PURPOSE", "DAY_OF_THE_WEEK_START"], axis=1, inplace=True)
df_rp.fillna(0, inplace=True)

db = cmt.Database("model_average_rp", df_rp, choiceVar="CHOICE")
globals().update(db.variables)
globals()["CITY_LEEDS"] = globals()["CITY.LEEDS"]

# additional steps to format database
db.data["CHOICE"] -= 1
for i in [
    "TOTAL_CAR_COST",
    "BUS_COST_TOTAL_PER_LEG",
    "RAIL_COST_TOTAL_PER_LEG",
    "TAXI_COST",
]:
    db.data[i] /= 100.0

for i in [
    "CAR_DISTANCE_KM",
    "BUS_DISTANCE_KM",
    "RAIL_DISTANCE_KM",
    "TAXI_DISTANCE_KM",
    "CYCLING_DISTANCE_KM",
    "WALKING_DISTANCE_KM",
]:
    db.data[i] /= 100.0

for i in [
    "CAR_TRAVEL_TIME_MIN",
    "BUS_TRAVEL_TIME_MIN",
    "RAIL_TRAVEL_TIME_MIN",
    "TAXI_TRAVEL_TIME_MIN",
    "CYCLING_TRAVEL_TIME_MIN",
    "WALKING_TRAVEL_TIME_MIN",
    "BUS_IVT_TIME_MIN",
    "BUS_ACCESS_EGRESS_TIME_MIN",
    "RAIL_IVT_TIME_MIN",
    "RAIL_ACCESS_EGRESS_TIME_MIN",
]:
    db.data[i] /= 60.0



In [3]:
# specify Beta parameters
b_cost = Beta("b_cost", 0.0, None, None, 0)
b_time = Beta("b_time", 0.0, None, None, 0)
b_dist = Beta("b_dist", 0.0, None, None, 0)

b_IVT = Beta("b_IVT", 0.0, None, None, 0)
b_egress = Beta("b_egress", 0.0, None, None, 0)
b_transfers = Beta("b_transfers", 0.0, None, None, 0)

b_ncar = Beta("b_ncar", 0.0, None, None, 0)
b_nbicycle = Beta("b_nbicycle", 0.0, None, None, 0)

b_weekend_car = Beta("b_weekend_car", 0.0, None, None, 0)
b_weekend_pt = Beta("b_weekend_pt", 0.0, None, None, 0)
b_weekend_phys = Beta("b_weekend_phys", 0.0, None, None, 0)

b_cleeds_car = Beta("b_cleeds_car", 0.0, None, None, 0)
b_cleeds_pt = Beta("b_cleeds_pt", 0.0, None, None, 0)
b_cleeds_phys = Beta("b_cleeds_phys", 0.0, None, None, 0)

b_female_car = Beta("b_female_car", 0.0, None, None, 0)
b_female_pt = Beta("b_female_pt", 0.0, None, None, 0)
b_female_phys = Beta("b_female_phys", 0.0, None, None, 0)

b_age_car = Beta("b_age_car", 0.0, None, None, 0)
b_age_pt = Beta("b_age_pt", 0.0, None, None, 0)
b_age_phys = Beta("b_age_phys", 0.0, None, None, 0)

b_uk_car = Beta("b_uk_car", 0.0, None, None, 0)
b_uk_pt = Beta("b_uk_pt", 0.0, None, None, 0)
b_uk_phys = Beta("b_uk_phys", 0.0, None, None, 0)

b_edu_gba_car = Beta("b_edu_gba_car", 0.0, None, None, 0)
b_edu_gba_pt = Beta("b_edu_gba_pt", 0.0, None, None, 0)
b_edu_gba_phys = Beta("b_edu_gba_phys", 0.0, None, None, 0)

b_n_emp_car = Beta("b_n_emp_car", 0.0, None, None, 0)
b_n_emp_pt = Beta("b_n_emp_pt", 0.0, None, None, 0)
b_n_emp_phys = Beta("b_n_emp_phys", 0.0, None, None, 0)

b_mar_car = Beta("b_mar_car", 0.0, None, None, 0)
b_mar_pt = Beta("b_mar_pt", 0.0, None, None, 0)
b_mar_phys = Beta("b_mar_phys", 0.0, None, None, 0)

b_hhsize_car = Beta("b_hhsize_car", 0.0, None, None, 0)
b_hhsize_pt = Beta("b_hhsize_pt", 0.0, None, None, 0)
b_hhsize_phys = Beta("b_hhsize_phys", 0.0, None, None, 0)

b_ppinc_lt30k_car = Beta("b_ppinc_lt30k_car", 0.0, None, None, 0)
b_ppinc_lt30k_pt = Beta("b_ppinc_lt30k_pt", 0.0, None, None, 0)
b_ppinc_lt30k_phys = Beta("b_ppinc_lt30k_phys", 0.0, None, None, 0)

b_hhinc_lt50k_car = Beta("b_hhinc_lt50k_car", 0.0, None, None, 0)
b_hhinc_lt50k_pt = Beta("b_hhinc_lt50k_pt", 0.0, None, None, 0)
b_hhinc_lt50k_phys = Beta("b_hhinc_lt50k_phys", 0.0, None, None, 0)

b_ft_car = Beta("b_ft_car", 0.0, None, None, 0)
b_ft_pt = Beta("b_ft_pt", 0.0, None, None, 0)
b_ft_phys = Beta("b_ft_phys", 0.0, None, None, 0)

asc_car = Beta("asc_car", 0.0, None, None, 1)
asc_bus = Beta("asc_bus", 0.0, None, None, 0)
asc_rail = Beta("asc_rail", 0.0, None, None, 0)
asc_taxi = Beta("asc_taxi", 0.0, None, None, 0)
asc_cycling = Beta("asc_cycling", 0.0, None, None, 0)
asc_walking = Beta("asc_walking", 0.0, None, None, 0)

U_1 = (
	b_cost * TOTAL_CAR_COST + b_time * CAR_TRAVEL_TIME_MIN
	+ b_dist * CAR_DISTANCE_KM
	+ b_ncar * N_CAR 
	+ b_weekend_car * WEEKEND 
	+ b_cleeds_car * CITY_LEEDS 
	+ b_female_car * FEMALE
	+ b_age_car * (AGE <= 3)
	+ b_uk_car * (BRITISH_ORIGIN)
	# + b_edu_gba_car * ((MASTERS + PHD) > 0)
	+ b_n_emp_car * NB_HH_EMPLOYED
	# + b_mar_car * (MAR == 2)
	+ b_hhsize_car * HH_SIZE
	# + b_hhinc_lt50k_car * (INCOME_HH <= 5)
	# + b_ppinc_lt30k_car * (INCOME_PERSO <= 3)
	+ b_ft_car * OCC_FULL_TIME
	+ asc_car
)

U_2 = (
	b_cost * BUS_COST_TOTAL_PER_LEG + b_time * BUS_TRAVEL_TIME_MIN
	+ b_dist * BUS_DISTANCE_KM
	+ b_IVT * BUS_IVT_TIME_MIN + b_egress * BUS_ACCESS_EGRESS_TIME_MIN
	+ b_transfers * BUS_TRANSFERS
	+ b_weekend_pt * WEEKEND 
	+ b_cleeds_pt * CITY_LEEDS
	# + b_female_pt * FEMALE
	+ b_age_pt * (AGE <= 3)
	+ b_uk_pt * (BRITISH_ORIGIN)
	+ b_edu_gba_pt * ((MASTERS + PHD) > 0)
	+ b_n_emp_pt * NB_HH_EMPLOYED
	+ b_mar_pt * (MAR == 2)
	+ b_hhsize_pt * HH_SIZE
	+ b_hhinc_lt50k_pt * (INCOME_HH <= 5)
	# + b_ppinc_lt30k_pt * (INCOME_PERSO <= 3)
	+ b_ft_pt * OCC_FULL_TIME
	+ asc_bus 
)

U_3 = (
	b_cost * RAIL_COST_TOTAL_PER_LEG + b_time * CAR_TRAVEL_TIME_MIN
	+ b_dist * RAIL_DISTANCE_KM
	+ b_IVT * RAIL_IVT_TIME_MIN + b_egress * RAIL_ACCESS_EGRESS_TIME_MIN
	+ b_transfers * RAIL_TRANSFERS
	+ b_weekend_pt * WEEKEND 
	+ b_cleeds_pt * CITY_LEEDS
	# + b_female_pt * FEMALE
	+ b_age_pt * (AGE <= 3)
	+ b_uk_pt * (BRITISH_ORIGIN)
	+ b_edu_gba_pt * ((MASTERS + PHD) > 0)
	+ b_n_emp_pt * NB_HH_EMPLOYED
	+ b_mar_pt * (MAR == 2)
	+ b_hhsize_pt * HH_SIZE
	+ b_hhinc_lt50k_pt * (INCOME_HH <= 5)
	# + b_ppinc_lt30k_pt * (INCOME_PERSO <= 3)
	+ b_ft_pt * OCC_FULL_TIME
	+ asc_rail
)

U_4 = (
	b_cost * TAXI_COST + b_time * TAXI_TRAVEL_TIME_MIN
	+ b_dist * TAXI_DISTANCE_KM
	+ asc_taxi
)

U_5 = (
	b_time * CYCLING_TRAVEL_TIME_MIN + b_dist * CYCLING_DISTANCE_KM
	+ b_nbicycle * N_BICYCLE 
	+ b_weekend_phys * WEEKEND
	+ b_cleeds_phys * CITY_LEEDS 
	+ b_female_phys * FEMALE
	+ b_age_phys * (AGE <= 3)
	# + b_uk_phys * (BRITISH_ORIGIN)
	+ b_edu_gba_phys * ((MASTERS + PHD) > 0)
	+ b_n_emp_phys * NB_HH_EMPLOYED
	+ b_mar_phys * (MAR == 2)
	# + b_hhsize_phys * HH_SIZE
	+ b_hhinc_lt50k_phys * (INCOME_HH <= 5)
	+ b_ppinc_lt30k_phys * (INCOME_PERSO <= 3)
	+ b_ft_phys * OCC_FULL_TIME
	+ asc_cycling
)

U_6 = (
	b_time * WALKING_TRAVEL_TIME_MIN + b_dist * WALKING_DISTANCE_KM
	+ b_weekend_phys * WEEKEND
	+ b_cleeds_phys * CITY_LEEDS 
	+ b_female_phys * FEMALE
	+ b_age_phys * (AGE <= 3)
	# + b_uk_phys * (BRITISH_ORIGIN)
	+ b_edu_gba_phys * ((MASTERS + PHD) > 0)
	+ b_n_emp_phys * NB_HH_EMPLOYED
	+ b_mar_phys * (MAR == 2)
	# + b_hhsize_phys * HH_SIZE
	+ b_hhinc_lt50k_phys * (INCOME_HH <= 5)
	+ b_ppinc_lt30k_phys * (INCOME_PERSO <= 3)
	+ b_ft_phys * OCC_FULL_TIME
	+ asc_walking
)

In [4]:
# Associate utility functions with the numbering of alternatives
V = {0: U_1, 1: U_2, 2: U_3, 3:U_4, 4:U_5, 5:U_6}

# Associate the availability conditions with the alternatives
av = {0: AVAIL_CAR, 1: AVAIL_BUS, 2: AVAIL_RAIL, 3: AVAIL_TAXI, 4: AVAIL_CYCLING, 5: AVAIL_WALKING}

# Definition of the model. This is the contribution of each
# observation to the log likelihood function.
logprob = loglogit(V, av, CHOICE)

# Create the Biogeme object
biogeme = bio.BIOGEME(db, logprob)
biogeme.modelName = 'logit_RP'

# Calculate the null log likelihood for reporting.
biogeme.calculateNullLoglikelihood(av)

if os.path.isdir(biogeme.modelName):
    shutil.rmtree(biogeme.modelName)
try:
    os.mkdir(biogeme.modelName)
    os.chdir(biogeme.modelName)
    
    # Estimate the parameters
    results = biogeme.estimate()
    os.chdir("..")
except:
  print("An exception occurred")

# Get the results in a pandas table
pandasResults = results.getEstimatedParameters()

In [5]:
print(results.printGeneralStatistics())
print(pandasResults)

Number of estimated parameters:	41
Sample size:	10120
Excluded observations:	0
Null log likelihood:	-12112.24
Init log likelihood:	-12112.24
Final log likelihood:	-4126.621
Likelihood ratio test for the null model:	15971.23
Rho-square for the null model:	0.659
Rho-square-bar for the null model:	0.656
Likelihood ratio test for the init. model:	15971.23
Rho-square for the init. model:	0.659
Rho-square-bar for the init. model:	0.656
Akaike Information Criterion:	8335.242
Bayesian Information Criterion:	8631.355
Final gradient norm:	3.1528E-02
Nbr of threads:	8

                        Value   Std err     t-test       p-value  \
asc_bus             -1.820452  0.401122  -4.538398  5.668312e-06   
asc_cycling         -4.067429  0.396702 -10.253112  0.000000e+00   
asc_rail            -0.695897  0.425567  -1.635222  1.020025e-01   
asc_taxi            -4.401003  0.412875 -10.659417  0.000000e+00   
asc_walking         -0.219277  0.378096  -0.579952  5.619467e-01   
b_IVT                3.1170