In [1]:
import pandas as pd
import biogeme.database as db
import biogeme.biogeme as bio
import biogeme.models as models
from biogeme.expressions import Beta

The objective of this series of exercises is to reproduce the results presented in the video. The starting point is the logit model presented below. 

1. Load the results of the model where the travel time coefficient is normally distributed within the population.

2. Load the results of the model where the travel time coefficient is log normally distributed within the population.

3. Load the results of the model with two latent classes: one where the travel time coefficient is constrained to be zero, and one where the travel time coefficient is estimated.

4. Load the results of the latent class model with the class membership model.
  
5. Estimate the same latent class model as above, where the travel time coefficient is normally distributed within the class. 

6. Compare the results

**Tip:**<div class="alert alert-block alert-info">The estimation of mixture models may take a significant amount of time. It is therefore recommended to start investigating the models with a low number of draws, keeping in mind that the results will not be correct. Once the code is running properly, set the number of draws to 1000, say, and get a cup of coffee while the notebook is running to obtain more accurate results.</div>

# Data

In [2]:
url_data = (
    'https://courses.edx.org/'
    'asset-v1:EPFLx+ChoiceModels2x+3T2021+type@asset+block@'
    'swissmetro.dat'
)

In [3]:
pandas = pd.read_table(url_data)
database = db.Database('swissmetro', pandas)

The following statement allows you to use the names of the variables as Python variable.

In [4]:
globals().update(database.variables)

We exclude some observations

In [5]:
exclude = ((PURPOSE != 1) * (PURPOSE != 3) + (CHOICE == 0)) > 0
database.remove(exclude)

# Parameters

In [6]:
ASC_CAR = Beta('ASC_CAR', 0, None, None, 0)
ASC_TRAIN = Beta('ASC_TRAIN', 0, None, None, 0)
B_TIME = Beta('B_TIME', 0, None, None, 0)
B_COST = Beta('B_COST', 0, None, None, 0)
B_FR = Beta('B_FR', 0, None, None, 0)

# Variables

In [7]:
SM_COST = SM_CO * (GA == 0)
TRAIN_COST = TRAIN_CO * (GA == 0)
CAR_AV_SP = CAR_AV * (SP != 0)
TRAIN_AV_SP = TRAIN_AV * (SP != 0)
TRAIN_TT_SCALED = TRAIN_TT / 100
TRAIN_COST_SCALED = TRAIN_COST / 100
SM_TT_SCALED = SM_TT / 100
SM_COST_SCALED = SM_COST / 100
CAR_TT_SCALED = CAR_TT / 100
CAR_CO_SCALED = CAR_CO / 100
TRAIN_HE_SCALED = TRAIN_HE / 1000
SM_HE_SCALED = SM_HE / 1000
LOW_INC = INCOME <= 1

# Availability conditions

In [8]:
av = {1: TRAIN_AV_SP,
      2: SM_AV,
      3: CAR_AV_SP}

# Logit model

## Utility functions

In [9]:
V1 = (ASC_TRAIN +
      B_TIME * TRAIN_TT_SCALED +
      B_COST * TRAIN_COST_SCALED +
      B_FR * TRAIN_HE_SCALED)
V2 = (B_TIME * SM_TT_SCALED +
      B_COST * SM_COST_SCALED +
      B_FR * SM_HE_SCALED)
V3 = (ASC_CAR +
      B_TIME * CAR_TT_SCALED +
      B_COST * CAR_CO_SCALED)
V = {1: V1,
     2: V2,
     3: V3}

## Model

In [10]:
logprob = models.loglogit(V, av, CHOICE)

## Estimation

In [11]:
biogeme = bio.BIOGEME(database, logprob)
biogeme.modelName = '01logit'
results_logit = biogeme.estimate()

## Results

In [12]:
stats_logit = results_logit.getGeneralStatistics()
print(results_logit.printGeneralStatistics())

Number of estimated parameters:	5
Sample size:	6768
Excluded observations:	3960
Init log likelihood:	-5315.386
Final log likelihood:	-5315.386
Likelihood ratio test for the init. model:	-0
Rho-square for the init. model:	0
Rho-square-bar for the init. model:	-0.000941
Akaike Information Criterion:	10640.77
Bayesian Information Criterion:	10674.87
Final gradient norm:	8.1247E-03
Nbr of threads:	16



In [13]:
param_logit = results_logit.getEstimatedParameters()
param_logit

Unnamed: 0,Value,Std err,t-test,p-value,Rob. Std err,Rob. t-test,Rob. p-value
ASC_CAR,-0.261838,0.047307,-5.534875,3.114498e-08,0.061496,-4.257798,2.064506e-05
ASC_TRAIN,-0.451015,0.069678,-6.472835,9.618062e-11,0.09324,-4.837114,1.31738e-06
B_COST,-1.084663,0.051826,-20.929115,0.0,0.068235,-15.895902,0.0
B_FR,-5.35324,0.963865,-5.553932,2.793141e-08,0.983023,-5.44569,5.160495e-08
B_TIME,-1.276782,0.056938,-22.424014,0.0,0.104436,-12.225485,0.0
