# sim dataset

Biogeme Logit model


In [1]:
import shutil
import pycmtensor as cmt
from pycmtensor.expressions import Beta


In [2]:
import os
import pandas as pd

# read csv file and set columns to UPPERCASE
nb_path = os.path.abspath("")
model_average_sim = pd.read_csv("data/model_average_sim.csv")
model_average_sim.columns = model_average_sim.columns.str.upper()

# load database
db = cmt.Database("model_average_sim", model_average_sim, choiceVar="CHOICE")
globals().update(db.variables)

# additional steps to format database
db.data["CHOICE"] -= 1
time_columns = [
    "CAR_TIME",
    "AIR_TIME",
    "AIR_ACCESS",
    "RAIL_TIME",
    "RAIL_ACCESS",
    "HSR_TIME",
    "HSR_ACCESS",
]
cost_columns = ["CAR_COST", "AIR_COST", "RAIL_COST", "HSR_COST"]
income_columns = ["INCOME"]
db.autoscale(variables=time_columns, default=60.0, verbose=True)
db.autoscale(variables=cost_columns, default=100.0, verbose=True)
db.autoscale(variables=income_columns, default=1000.0, verbose=True)


In [3]:
# specify Beta parameters
b_cost = Beta("b_cost", 0.0, None, None, 0)
b_time = Beta("b_time", 0.0, None, None, 0)
b_access = Beta("b_access", 0.0, None, None, 0)
b_income_rail = Beta("b_income_rail", 0.0, None, None, 0)
b_income_hsr = Beta("b_income_hsr", 0.0, None, None, 0)
b_income_car = Beta("b_income_car", 0.0, None, None, 0)
b_female_car = Beta("b_female_car", 0.0, None, None, 0)
b_female_rail = Beta("b_female_rail", 0.0, None, None, 0)
b_female_air = Beta("b_female_air", 0.0, None, None, 0)

asc_car = Beta("asc_car", 0.0, None, None, 0)
asc_air = Beta("asc_air", 0.0, None, None, 0)
asc_rail = Beta("asc_rail", 0.0, None, None, 0)
asc_hsr = Beta("asc_hsr", 0.0, None, None, 1)

U_car = (
    b_cost * CAR_COST
    + b_time * CAR_TIME
    + b_income_car * INCOME
    + b_female_car * FEMALE
    + asc_car
)
U_air = (
    b_cost * AIR_COST
    + b_time * AIR_TIME
    + b_access * AIR_ACCESS
    + b_female_air * FEMALE
    + asc_air
)
U_rail = (
    b_cost * RAIL_COST
    + b_time * RAIL_TIME
    + b_access * RAIL_ACCESS
    + b_income_rail * INCOME
    + b_female_rail * FEMALE
    + asc_rail
)
U_hsr = (
    b_cost * HSR_COST
    + b_time * HSR_TIME
    + b_access * HSR_ACCESS
    + b_income_hsr * INCOME
    + asc_hsr
)

# Associate utility functions with the numbering of alternatives
V = {0: U_car, 1: U_air, 2: U_rail, 3: U_hsr}

# Associate the availability conditions with the alternatives
av = {0: 1, 1: 1, 2: 1, 3: 1}


In [4]:
import biogeme.biogeme as bio
from biogeme.models import loglogit

# Definition of the model. This is the contribution of each
# observation to the log likelihood function.
logprob = loglogit(V, av, CHOICE)

# Create the Biogeme object
biogeme = bio.BIOGEME(db, logprob)
biogeme.modelName = "logit_sim"

# Calculate the null log likelihood for reporting.
biogeme.calculateNullLoglikelihood(av)

if os.path.isdir(biogeme.modelName):
    shutil.rmtree(biogeme.modelName)
try:
    os.mkdir(biogeme.modelName)
    os.chdir(biogeme.modelName)

    # Estimate the parameters
    results = biogeme.estimate()
    os.chdir("..")
except:
    print("An exception occurred")


In [9]:
print(results.printGeneralStatistics())
# Get the results in a pandas table
print(results.getEstimatedParameters())


Number of estimated parameters:	12
Sample size:	4000
Excluded observations:	0
Null log likelihood:	-5545.177
Init log likelihood:	-5545.177
Final log likelihood:	-3717.08
Likelihood ratio test for the null model:	3656.195
Rho-square for the null model:	0.33
Rho-square-bar for the null model:	0.328
Likelihood ratio test for the init. model:	3656.195
Rho-square for the init. model:	0.33
Rho-square-bar for the init. model:	0.328
Akaike Information Criterion:	7458.16
Bayesian Information Criterion:	7533.688
Final gradient norm:	2.0456E-02
Nbr of threads:	8

                  Value   Std err     t-test       p-value  Rob. Std err  \
asc_air       -1.167891  0.216253  -5.400574  6.642803e-08      0.211729   
asc_car        0.792019  0.194766   4.066510  4.772250e-05      0.197371   
asc_rail      -1.070130  0.287826  -3.717971  2.008296e-04      0.277322   
b_access      -0.424674  0.106047  -4.004570  6.213036e-05      0.105954   
b_cost        -2.977958  0.115245 -25.840130  0.000000e+00  