In [1]:
import pandas as pd
import pickle
from urllib.request import urlopen
import biogeme.database as db
import biogeme.biogeme as bio
import biogeme.models as models
import biogeme.results as res
from biogeme.expressions import Beta, log, bioDraws, MonteCarlo

The objective of this exercise is to calculate individual level parameters of a mixture model. Consider the model presented below, where the coefficient of travel time is distributed. 

1. Calculate the expected value of the distributed coefficient for each observation in the sample.
2. For each alternative $i$, plot the distribution of the expected value for all observations where $i$ is chosen.

The estimation of the model may take a significant amount of time. Therefore, this notebook can be executed using models that have been estimated. The results are read from file if the variable `numberOfDraws` below is set to `None`.

In [2]:
#numberOfDraws=10000
numberOfDraws = None

The following function obtains the data from the file available online.

In [3]:
url_root = (
    'https://courses.edx.org/'
    'asset-v1:EPFLx+ChoiceModels2x+3T2021+type@asset+block@'
)

In [4]:
def get_results_from_url(file):
    pickle_file = f'{url_root}{file}'

    with urlopen(pickle_file) as p:
        data = pickle.load(p)
    return res.bioResults(data)

# Read the data

In [5]:
df = pd.read_table(f'{url_root}swissmetro.dat')
database = db.Database('swissmetro', df)
globals().update(database.variables)
exclude = ((PURPOSE != 1) * (PURPOSE != 3) + (CHOICE == 0)) > 0
database.remove(exclude)

# List of parameters to be estimated

In [6]:
ASC_CAR = Beta('ASC_CAR', 0, None, None, 0)
ASC_TRAIN = Beta('ASC_TRAIN', 0, None, None, 0)
ASC_SM = Beta('ASC_SM', 0, None, None, 1)
B_COST = Beta('B_COST', 0, None, None, 0)
B_TIME = Beta('B_TIME', 0, None, None, 0)
B_TIME_S = Beta('B_TIME_S', 1, None, None, 0)
B_TIME_RND = B_TIME + B_TIME_S * bioDraws('B_TIME_RND', 'NORMAL')

# Definition of variables

In [7]:
SM_COST = SM_CO * (GA == 0)
TRAIN_COST = TRAIN_CO * (GA == 0)
CAR_AV_SP = CAR_AV * (SP != 0)
TRAIN_AV_SP = TRAIN_AV * (SP != 0)
TRAIN_TT_SCALED = TRAIN_TT / 100
TRAIN_COST_SCALED = TRAIN_COST / 100
SM_TT_SCALED = SM_TT / 100
SM_COST_SCALED = SM_COST / 100
CAR_TT_SCALED = CAR_TT / 100
CAR_CO_SCALED = CAR_CO / 100
TRAIN_HE_SCALED = TRAIN_HE / 1000
SM_HE_SCALED = SM_HE / 1000
LOW_INC = INCOME <= 1
BUSINESS = (PURPOSE == 3)

# Utility functions and availability condition

In [8]:
V1 = (ASC_TRAIN +
      B_TIME_RND * TRAIN_TT_SCALED +
      B_COST * TRAIN_COST_SCALED)
V2 = (B_TIME_RND * SM_TT_SCALED +
      B_COST * SM_COST_SCALED)
V3 = (ASC_CAR +
      B_TIME_RND * CAR_TT_SCALED +
      B_COST * CAR_CO_SCALED)

V = {1: V1,
     2: V2,
     3: V3}

av = {1: TRAIN_AV_SP,
      2: SM_AV,
      3: CAR_AV_SP}

# Estimation

If the variable `numberOfDraws` is `None`, the results are read from the file. If not, estimation is performed. It may take a while. 

In [9]:
name = 'swissmetroRandomCoefficients' 
if numberOfDraws is None:
    results = get_results_from_url(f'{name}.pickle')
else:
    prob = models.logit(V, None, CHOICE)
    logprob = log(MonteCarlo(prob))
    biogeme = bio.BIOGEME(database, logprob, numberOfDraws=numberOfDraws)
    biogeme.modelName = name
    results = biogeme.estimate()
    print(f'Results saved in file {results.data.pickleFileName}')

In [10]:
results.getEstimatedParameters()

Unnamed: 0,Value,Std err,t-test,p-value,Rob. Std err,Rob. t-test,Rob. p-value
ASC_CAR,-0.984734,0.034848,-28.258202,0.0,0.033608,-29.300813,0.0
ASC_TRAIN,-1.505986,0.046929,-32.090459,0.0,0.046043,-32.708387,0.0
B_COST,-0.662681,0.042402,-15.628515,0.0,0.048266,-13.729648,0.0
B_TIME,-0.140265,0.035929,-3.903931,9.5e-05,0.032522,-4.312954,1.6e-05
B_TIME_S,0.099318,0.077782,1.276874,0.201647,0.063169,1.572266,0.115889
