In [1]:
import pandas as pd

import biogeme.biogeme_logging as blog
from biogeme.biogeme import BIOGEME
from biogeme.expressions import Beta, Variable
import biogeme.database as db
from biogeme.bayesian_estimation import BayesianResults, get_pandas_estimated_parameters
from biogeme.models import lognested
from biogeme.nests import NestsForNestedLogit, OneNestForNestedLogit


logger = blog.get_screen_logger(level=blog.INFO)

  from tqdm.autonotebook import tqdm


In [2]:
df = pd.read_table("http://transp-or.epfl.ch/data/swissmetro.dat", sep='\t')
(((df.PURPOSE != 1) * (df.PURPOSE != 3) + (df.CHOICE == 0)) > 0).value_counts()

database = db.Database('swissmetro', df)

GROUP = Variable('GROUP')
SURVEY = Variable('SURVEY')
SP = Variable('SP')
ID = Variable('ID')
PURPOSE = Variable('PURPOSE')
FIRST = Variable('FIRST')
TICKET = Variable('TICKET')
WHO = Variable('WHO')
LUGGAGE = Variable('LUGGAGE')
AGE = Variable('AGE')
MALE = Variable('MALE')
INCOME = Variable('INCOME')
GA = Variable('GA')
ORIGIN = Variable('ORIGIN')
DEST = Variable('DEST')
TRAIN_AV = Variable('TRAIN_AV')
CAR_AV = Variable('CAR_AV')
SM_AV = Variable('SM_AV')
TRAIN_TT = Variable('TRAIN_TT')
TRAIN_CO = Variable('TRAIN_CO')
TRAIN_HE = Variable('TRAIN_HE')
SM_TT = Variable('SM_TT')
SM_CO = Variable('SM_CO')
SM_HE = Variable('SM_HE')
SM_SEATS = Variable('SM_SEATS')
CAR_TT = Variable('CAR_TT')
CAR_CO = Variable('CAR_CO')
CHOICE = Variable('CHOICE')

exclude = ((PURPOSE != 1) * (PURPOSE != 3) + (CHOICE == 0)) > 0
print(f"Removing {(((df.PURPOSE != 1) * (df.PURPOSE != 3) + (df.CHOICE == 0)) > 0).sum()} rows from the database based on the exclusion criteria.")
database.remove(exclude)

SM_COST = database.define_variable('SM_COST', SM_CO * (GA == 0))
TRAIN_COST = database.define_variable('TRAIN_COST', TRAIN_CO * (GA == 0))
CAR_AV_SP = database.define_variable('CAR_AV_SP', CAR_AV * (SP != 0))
TRAIN_AV_SP = database.define_variable('TRAIN_AV_SP', TRAIN_AV * (SP != 0))
TRAIN_TT_SCALED = database.define_variable('TRAIN_TT_SCALED', TRAIN_TT / 100)
TRAIN_COST_SCALED = database.define_variable('TRAIN_COST_SCALED', TRAIN_COST / 100)
SM_TT_SCALED = database.define_variable('SM_TT_SCALED', SM_TT / 100)
SM_COST_SCALED = database.define_variable('SM_COST_SCALED', SM_COST / 100)
CAR_TT_SCALED = database.define_variable('CAR_TT_SCALED', CAR_TT / 100)
CAR_CO_SCALED = database.define_variable('CAR_CO_SCALED', CAR_CO / 100)
database.panel('ID')

Removing 3960 rows from the database based on the exclusion criteria.


In [3]:
asc_car = Beta('asc_car', 0, None, None, 0)
asc_train = Beta('asc_train', 0, None, None, 0)
asc_sm = Beta('asc_sm', 0, None, None, 1)
b_time = Beta('b_time', 0, None, 0, 0)
b_cost = Beta('b_cost', 0, None, 0, 0)
nest_parameter = Beta('nest_parameter', 1, 1, 3, 0)

v_train = asc_train + b_time * TRAIN_TT_SCALED + b_cost * TRAIN_COST_SCALED
v_swissmetro = asc_sm + b_time * SM_TT_SCALED + b_cost * SM_COST_SCALED
v_car = asc_car + b_time * CAR_TT_SCALED + b_cost * CAR_CO_SCALED

v = {1: v_train, 2: v_swissmetro, 3: v_car}
av = {1: TRAIN_AV_SP, 2: SM_AV, 3: CAR_AV_SP}

In [4]:
existing = OneNestForNestedLogit(
    nest_param=nest_parameter, list_of_alternatives=[1, 2], name='existing'
)

nests = NestsForNestedLogit(choice_set=list(v), tuple_of_nests=(existing,))

The following elements do not appear in any nest and are assumed each to be alone in a separate nest: {3}. If it is not the intention, check the assignment of alternatives to nests. 


In [5]:
log_probability = lognested(v, av, nests, CHOICE)
the_biogeme = BIOGEME(
    database,
    log_probability,
)
the_biogeme.model_name = 'b09_nested'

Biogeme parameters read from biogeme.toml. 


In [6]:
results = the_biogeme.bayesian_estimation()

*** Initial values of the parameters are obtained from the file __b09_nested.iter 
Cannot read file __b09_nested.iter. Statement is ignored. 
Starting values for the algorithm: {} 
macOS / Linux (bash/zsh):
  export XLA_FLAGS="--xla_force_host_platform_device_count=<number_of_cores>"

Jupyter (new cell, before `import jax`):
  %env XLA_FLAGS="--xla_force_host_platform_device_count=<number_of_cores>"

Detected CPU devices: 1 | System logical cores: 16
Current XLA_FLAGS: (none set)
Platform: Linux 6.6.87.2-microsoft-standard-WSL2 | Python: 3.12.3
 
Auto sampling: JAX available (devices=1, platforms=cpu) → numpyro/vectorized 
  pm.sample_prior_predictive(
Sampling: [asc_car, asc_train, b_cost, b_time, nest_parameter]
sample: 100%|██████████| 4000/4000 [02:14<00:00, 29.68it/s]


posterior_predictive_loglike finished in 44 ms
waic_res finished in 106 ms
waic finished in 106 ms


See http://arxiv.org/abs/1507.04544 for details


loo_res finished in 748 ms
loo finished in 748 ms


Diagnostics computation took 3.0 seconds (cached). 
File b09_nested.html has been generated. 
Save simulation results on b09_nested.nc 
Saved Bayesian results (posterior + metadata) to b09_nested.nc 


In [7]:
print(results.short_summary())

Sample size                                              6768
Sampler                                                  NUTS
Number of chains                                         4
Number of draws per chain                                2000
Total number of draws                                    8000
Acceptance rate target                                   0.9
Run time                                                 0:02:49.841878
Posterior predictive log-likelihood (sum of log mean p)  -5313.70
Expected log-likelihood E[log L(Y|θ)]                    -5333.81
Best-draw log-likelihood (posterior upper bound)         -5331.31
WAIC (Widely Applicable Information Criterion)           -5351.00
WAIC Standard Error                                      134.66
Effective number of parameters (p_WAIC)                  37.31
LOO (Leave-One-Out Cross-Validation)                     -5352.27
LOO Standard Error                                       135.23
Effective number of parameters (p_LOO) 

In [8]:
pandas_results = get_pandas_estimated_parameters(estimation_results=results)
display(pandas_results)

Unnamed: 0,Name,Value (mean),Value (median),Value (mode),std err.,z-value,p-value,HDI low,HDI high,R hat,ESS (bulk),ESS (tail)
0,asc_train,-0.631844,-0.6347,-0.639709,0.072517,-8.713083,0.0,-0.758867,-0.489264,1.000103,3519.067763,3975.355658
1,asc_car,-0.17167,-0.171064,-0.166446,0.04507,-3.808947,0.0005,-0.261486,-0.09173,0.999946,3814.859329,3995.221592
2,b_time,-1.260955,-1.260555,-1.258069,0.058119,-21.696104,0.0,-1.367367,-1.150115,1.000477,3800.530231,4251.918096
3,b_cost,-1.074471,-1.074233,-1.076399,0.052557,-20.443898,0.0,-1.178217,-0.981657,1.00004,4807.083969,4246.462623
4,nest_parameter,1.062372,1.05111,1.019655,0.049209,21.588985,0.0,1.000012,1.152205,1.000565,3149.824343,3013.358342


In [9]:
corr = nests.correlation(
    parameters=results.get_beta_values(),
    alternatives_names={1: 'Train', 2: 'Swissmetro', 3: 'Car'}
)
print(corr)

               Train  Swissmetro  Car
Train       1.000000    0.113973  0.0
Swissmetro  0.113973    1.000000  0.0
Car         0.000000    0.000000  1.0
