# SP dataset
Biogeme Logit model

In [1]:
import shutil
import pycmtensor as cmt
from pycmtensor.expressions import Beta


In [2]:
import os
import pandas as pd

# read csv file and set columns to UPPERCASE
nb_path = os.path.abspath("")
model_average_sp = pd.read_csv("data/model_average_SP.csv")
model_average_sp.columns = model_average_sp.columns.str.upper()

# load database
db = cmt.Database("model_average_sp", model_average_sp, choiceVar="CHOICE")
globals().update(db.variables)

# additional steps to format database
db.data['CHOICE'] -= 1
db.choices = sorted(db.data["CHOICE"].unique()) # save original choices
db.autoscale(list(db.data.loc[:, 'TT1': 'HH_INC_ABS'].columns), verbose=False)


In [3]:
# specify Beta parameters
b_cost = Beta("b_cost", 0.0, None, None, 0)
b_time = Beta("b_time", 0.0, None, None, 0)
b_hw = Beta("b_hw", 0.0, None, None, 0)
b_ch = Beta("b_ch", 0.0, None, None, 0)
b_hh_inc1 = Beta("b_hh_inc1", 0.0, None, None, 0)
b_hh_inc2 = Beta("b_hh_inc2", 0.0, None, None, 1)
b_carav1 = Beta("b_carav1", 0.0, None, None, 0)
b_carav2 = Beta("b_carav2", 0.0, None, None, 1)
b_com = Beta("b_com", 0.0, None, None, 0)
b_shop = Beta("b_shop", 0.0, None, None, 0)
b_bis = Beta("b_bis", 0.0, None, None, 0)
b_lei = Beta("b_lei", 0.0, None, None, 0)
asc_alt1 = Beta("asc_alt1", 0.0, None, None, 0)
asc_alt2 = Beta("asc_alt2", 0.0, None, None, 1)

U_1 = (
    b_cost * TC1 + b_time * TT1 + b_hw * HW1 + b_ch * CH1 
    + b_hh_inc1 * HH_INC_ABS + b_carav1 * CAR_AVAILABILITY
    + b_com * COMMUTE + b_shop * SHOPPING + b_bis * BUSINESS + b_lei * LEISURE
    + asc_alt1
)
U_2 = (
    b_cost * TC2 + b_time * TT2 + b_hw * HW2 + b_ch * CH2 
    + b_hh_inc2 * HH_INC_ABS + b_carav2 * CAR_AVAILABILITY
    + asc_alt2
)

# Associate utility functions with the numbering of alternatives
V = {0: U_1, 1: U_2}

# Associate the availability conditions with the alternatives
av = {0: 1, 1: 1}


In [4]:
import biogeme.biogeme as bio
from biogeme.models import loglogit

# Definition of the model. This is the contribution of each
# observation to the log likelihood function.
logprob = loglogit(V, av, CHOICE)

# Create the Biogeme object
biogeme = bio.BIOGEME(db, logprob)
biogeme.modelName = 'logit_SP'

# Calculate the null log likelihood for reporting.
biogeme.calculateNullLoglikelihood(av)

if os.path.isdir(biogeme.modelName):
    shutil.rmtree(biogeme.modelName)
try:
    os.mkdir(biogeme.modelName)
    os.chdir(biogeme.modelName)
    
    # Estimate the parameters
    results = biogeme.estimate()
    os.chdir("..")
except:
  print("An exception occurred")


In [5]:
print(results.printGeneralStatistics())
# Get the results in a pandas table
print(results.getEstimatedParameters())


Number of estimated parameters:	11
Sample size:	2790
Excluded observations:	0
Null log likelihood:	-1933.881
Init log likelihood:	-1933.881
Final log likelihood:	-1324.212
Likelihood ratio test for the null model:	1219.337
Rho-square for the null model:	0.315
Rho-square-bar for the null model:	0.31
Likelihood ratio test for the init. model:	1219.337
Rho-square for the init. model:	0.315
Rho-square-bar for the init. model:	0.31
Akaike Information Criterion:	2670.425
Bayesian Information Criterion:	2735.697
Final gradient norm:	7.1856E-03
Nbr of threads:	8

               Value   Std err     t-test       p-value  Rob. Std err  \
asc_alt1    0.088713  0.091370   0.970926  3.315852e-01      0.093049   
b_bis      -0.113828  0.145422  -0.782745  4.337766e-01      0.146682   
b_carav1   -0.280896  0.103336  -2.718273  6.562376e-03      0.102303   
b_ch       -1.146564  0.048602 -23.591000  0.000000e+00      0.050669   
b_com       0.121408  0.089838   1.351410  1.765642e-01      0.087093   
