In [1]:
import pandas as pd
import pickle
from urllib.request import urlopen
import biogeme.database as db
import biogeme.biogeme as bio
import biogeme.results as res
from biogeme.expressions import (
    Beta,
    log,
    DefineVariable,
)
import biogeme.messaging as msg
from biogeme import models


Consider the logit model introduced below. The data is stated preferences data, where each individual has responded to several questions. This creates serial correlation. You are asked to modify the specification in order to capture this serial correlation, and to estimate the parameters of the new model.

Note that the identifier for each individual is found in column ``ID``. 

In [2]:
url_root = (
    'https://courses.edx.org/'
    'asset-v1:EPFLx+ChoiceModels2x+3T2021+type@asset+block@'
)

# Read the data

In [3]:
theData = pd.read_table(f'{url_root}swissmetro.dat')
database = db.Database('swissmetro', theData)
globals().update(database.variables)


Exclude several observations

In [4]:
exclude = ((PURPOSE != 1) * (PURPOSE != 3) + (CHOICE == 0)) > 0
database.remove(exclude)

# Model specification

Parameters to be estimated.

In [5]:
ASC_CAR = Beta('ASC_CAR', 0.0, None, None, 0)
ASC_SM = Beta('ASC_SM', 0.0, None, None, 0)
BETA_CAR_COST = Beta('BETA_CAR_COST', 0.0, None, None, 0)
BETA_HE = Beta('BETA_HE', 0.0, None, None, 0)
BETA_SM_COST = Beta('BETA_SM_COST', 0.0, None, None, 0)
BETA_TIME = Beta('BETA_TIME', 0.0, None, None, 0)
BETA_TRAIN_COST = Beta('BETA_TRAIN_COST', 0.0, None, None, 0)

Define new variables. In order to speed up the estimation process, the new variables are stored in new columns of the database, using the expression ``DefineVariable``. It avoids to recalculate them at each iteration of the algorithm. 

In [6]:
CAR_AV_SP = DefineVariable(
    'CAR_AV_SP',
    CAR_AV * (SP != 0),
    database,
)
SM_COST = DefineVariable(
    'SM_COST',
    SM_CO * (GA == 0),
    database,
)
TRAIN_AV_SP = DefineVariable(
    'TRAIN_AV_SP',
    TRAIN_AV * (SP != 0),
    database,
)
TRAIN_COST = DefineVariable(
    'TRAIN_COST',
    TRAIN_CO * (GA == 0),
    database,
)

TRAIN_TT_SCALED = DefineVariable(
    'TRAIN_TT_SCALED',
    TRAIN_TT / 100.0,
    database,
)
TRAIN_COST_SCALED = DefineVariable(
    'TRAIN_COST_SCALED', TRAIN_COST / 100, database
)
SM_TT_SCALED = DefineVariable(
    'SM_TT_SCALED',
    SM_TT / 100.0,
    database,
)
SM_COST_SCALED = DefineVariable(
    'SM_COST_SCALED',
    SM_COST / 100,
    database,
)
CAR_TT_SCALED = DefineVariable(
    'CAR_TT_SCALED',
    CAR_TT / 100,
    database,
)
CAR_CO_SCALED = DefineVariable(
    'CAR_CO_SCALED',
    CAR_CO / 100,
    database,
)
TRAIN_HE_SCALED = DefineVariable(
    'TRAIN_HE_SCALED',
    TRAIN_HE / 100,
    database,
)
SM_HE_SCALED = DefineVariable(
    'SM_HE_SCALED',
    SM_HE / 100,
    database,
)


Utility functions

In [7]:
V_Car_SP = (
    ASC_CAR
    + BETA_TIME * CAR_TT_SCALED
    + BETA_CAR_COST * CAR_CO_SCALED
)
V_SBB_SP = (
    BETA_TIME * TRAIN_TT_SCALED
    + BETA_TRAIN_COST * TRAIN_COST_SCALED
    + BETA_HE * TRAIN_HE_SCALED
)
V_SM_SP = (
    ASC_SM
    + BETA_TIME * SM_TT_SCALED
    + BETA_SM_COST * SM_COST_SCALED
    + BETA_HE * SM_HE_SCALED
)
#
V = {3: V_Car_SP, 1: V_SBB_SP, 2: V_SM_SP}
av = {3: CAR_AV_SP, 1: TRAIN_AV_SP, 2: SM_AV}


The choice probability for each observation is given by a logit model, conditional on the agent effect.

In [8]:
logprob = models.loglogit(V, av, CHOICE)

In [9]:
biogeme = bio.BIOGEME(database, logprob)
biogeme.modelName = 'logit'
results = biogeme.estimate()


In [10]:
print(results.printGeneralStatistics())

Number of estimated parameters:	7
Sample size:	6768
Excluded observations:	3960
Init log likelihood:	-6964.663
Final log likelihood:	-5068.559
Likelihood ratio test for the init. model:	3792.209
Rho-square for the init. model:	0.272
Rho-square-bar for the init. model:	0.271
Akaike Information Criterion:	10151.12
Bayesian Information Criterion:	10198.86
Final gradient norm:	1.2900E-02
Nbr of threads:	16



In [11]:
results.getEstimatedParameters()

Unnamed: 0,Value,Std err,t-test,p-value,Rob. Std err,Rob. t-test,Rob. p-value
ASC_CAR,-0.97121,0.114883,-8.453894,0.0,0.134449,-7.223614,5.062617e-13
ASC_SM,-0.44407,0.080233,-5.534775,3.116279e-08,0.102341,-4.339139,1.43042e-05
BETA_CAR_COST,-0.948548,0.090329,-10.501002,0.0,0.115599,-8.205529,2.220446e-16
BETA_HE,-0.542112,0.100386,-5.400258,6.654498e-08,0.101233,-5.355091,8.551318e-08
BETA_SM_COST,-1.089252,0.052616,-20.702053,0.0,0.070339,-15.485737,0.0
BETA_TIME,-1.112112,0.060944,-18.247961,0.0,0.120101,-9.259816,0.0
BETA_TRAIN_COST,-2.932868,0.11253,-26.06291,0.0,0.169339,-17.319523,0.0
