# Swissmetro test

In [12]:
import os, sys
sys.path.insert(0, os.path.abspath(".."))

import numpy as np
import pandas as pd
import pycmtensor as cmt
from pycmtensor.expressions import Beta
from pycmtensor.models import MNL
from pycmtensor.statistics import elasticities

In [2]:
swissmetro = pd.read_csv("../data/swissmetro.dat", sep="\t")
swissmetro.drop(swissmetro[swissmetro["CHOICE"] == 0].index, inplace=True)
swissmetro["CHOICE"] -= 1  # set the first choice index to 0
db = cmt.Data(df=swissmetro, choice="CHOICE")
db.autoscale_data(except_for=["ID", "ORIGIN", "DEST"])  # scales dataset
db.split_db(split_frac=0.8)  # split dataset

In [3]:
b_cost = Beta("b_cost", 0.0, None, None, 0)
b_time = Beta("b_time", 0.0, None, None, 0)
asc_train = Beta("asc_train", 0.0, None, None, 0)
asc_car = Beta("asc_car", 0.0, None, None, 0)
asc_sm = Beta("asc_sm", 0.0, None, None, 1)

U_1 = b_cost * db["TRAIN_CO"] + b_time * db["TRAIN_TT"] + asc_train
U_2 = b_cost * db["SM_CO"] + b_time * db["SM_TT"] + asc_sm
U_3 = b_cost * db["CAR_CO"] + b_time * db["CAR_TT"] + asc_car

# specify the utility function and the availability conditions
U = [U_1, U_2, U_3]  # utility
AV = [db["TRAIN_AV"], db["SM_AV"], db["CAR_AV"]]  # availability

mymodel = MNL(db, locals(), U, AV, name="MNL")
mymodel.config.set_hyperparameter("max_steps", 100)

Model training

In [4]:
mymodel.train(db)

[2022-09-16 12:37:46] INFO: Start (n=8575)
[2022-09-16 12:37:55] INFO: End (t=00:00:08, VE=41.045%, LL=-7493.118838579969, S=43)


Print results and statistics

Beta parameters

In [6]:
print(mymodel.results.beta_statistics())

              value   std err     t-test   p-value rob. std err  rob. t-test  \
asc_car   -0.942329   0.04768 -19.763727       0.0     0.076445   -12.326843   
asc_sm          0.0         -          -         -            -            -   
asc_train -1.878122  0.050834 -36.946285       0.0     0.016206  -115.890019   
b_cost     0.020012  0.019996   1.000844  0.316902     0.009011     2.220941   
b_time    -0.531807  0.054323  -9.789711       0.0     0.000386 -1378.755748   

          rob. p-value  
asc_car            0.0  
asc_sm               -  
asc_train          0.0  
b_cost        0.026355  
b_time             0.0  


Model statistics

In [7]:
print(mymodel.results.model_statistics())

                                          value
Number of training samples used          8575.0
Number of validation samples used        2144.0
Init. log likelihood               -8870.789689
Final log likelihood               -7493.118839
Accuracy                                 58.96%
Likelihood ratio test                 2755.3417
Rho square                             0.155304
Rho square bar                         0.154741
Akaike Information Criterion       14996.237677
Bayesian Information Criterion     15031.520709
Final gradient norm                    0.015869


Correlation matrix

In [8]:
print(mymodel.results.model_correlation_matrix())

             b_cost    b_time  asc_train   asc_car
b_cost     1.000000  0.220764   0.238790 -0.014013
b_time     0.220764  1.000000   0.756603  0.814204
asc_train  0.238790  0.756603   1.000000  0.686436
asc_car   -0.014013  0.814204   0.686436  1.000000


Benchmark

In [9]:
print(mymodel.results.benchmark())

                       value
Seed                     900
Model build time    00:00:06
Model train time    00:00:08
iterations per sec   406.6/s


Predictions

In [10]:
# predictions
print(mymodel.predict(db, return_choices=False))
print(np.unique(mymodel.predict(db), return_counts=True))

[[0.11709831 0.5815749  0.30132679]
 [0.1163768  0.58444756 0.29917564]
 [0.11763801 0.57752885 0.30483315]
 ...
 [0.12554841 0.54182345 0.33262814]
 [0.12458464 0.53767492 0.33774044]
 [0.12704342 0.53968389 0.3332727 ]]
(array([1, 2], dtype=int64), array([10422,   297], dtype=int64))


Elasticities

In [13]:
print(elasticities(mymodel, db, 0, "TRAIN_TT"))

[-0.10702093 -0.05893466 -0.08508935 ... -0.04266499 -0.05229369
 -0.0884445 ]
