In [1]:
import pandas as pd
import numpy as np
from IPython.display import display

# Pandas options
pd.set_option("display.html.use_mathjax", False)
pd.set_option("display.precision", 3)
pd.set_option('display.max_columns', None)

# dataset dir
data_dir = "data/"

# define file paths
survey_files = [
    "model_average_SP.csv",
    "model_average_RP.csv",
    "model_average_sim",
]

# read a sample of the data
df_sp = pd.read_csv(data_dir + survey_files[0])
df_sp.columns = df_sp.columns.str.upper()
df_sp['ALT1_AV'] = 1
df_sp['ALT2_AV'] = 1
display(df_sp.head())
# display(df_sp.describe())

Unnamed: 0,ID,CHOICE,TT1,TC1,HW1,CH1,TT2,TC2,HW2,CH2,HH_INC_ABS,CAR_AVAILABILITY,COMMUTE,SHOPPING,BUSINESS,LEISURE,ALT1_AV,ALT2_AV
0,1,2,23,3,15,2,31,2,15,1,90000,1,0,0,0,1,1,1
1,1,2,17,3,60,2,24,2,60,0,90000,1,0,0,0,1,1,1
2,1,1,18,3,15,0,26,2,60,0,90000,1,0,0,0,1,1,1
3,1,2,28,3,15,2,35,2,15,1,90000,1,0,0,0,1,1,1
4,1,2,23,3,60,2,31,2,30,0,90000,1,0,0,0,1,1,1


In [2]:
import pycmtensor as cmt
import pandas as pd
model_average_sp = pd.read_csv("data/model_average_SP.csv")
model_average_sp.columns = model_average_sp.columns.str.upper()
model_average_sp['ALT1_AV'] = 1
model_average_sp['ALT2_AV'] = 1
db = cmt.Database("model_average_sp", model_average_sp, choiceVar='CHOICE')

# additional steps to format database
db.data.columns = db.data.columns.str.upper()
db.data['CHOICE'] -= 1
c = db.data.loc[:, 'TT1': 'HH_INC_ABS'].columns.to_list()
db.autoscale(variables=c, verbose=True)

scaling TT1 by 100.0
scaling TC1 by 100.0
scaling HW1 by 10.0
scaling CH1 by 1.0
scaling TT2 by 100.0
scaling TC2 by 100.0
scaling HW2 by 10.0
scaling CH2 by 1.0
scaling HH_INC_ABS by 100000.0


In [3]:
from pycmtensor.pycmtensor import PyCMTensorModel, Beta, Weights
from pycmtensor.functions import logit, neg_loglikelihood
from pycmtensor.optimizers import Adam
from pycmtensor.results import Results
import aesara.tensor as aet
import dill as pickle


class ResLogitLayer:
    def __init__(self, input, w_in, w_out):
        if isinstance(input, (list, tuple)):
            input = aet.concatenate(input, axis=1)
        
        # assert len(input) == w_in.shape[0], f"{input} must have the same length as the in first dimension of {w_in}."
        # assert w_in.shape[-1] == w_out.shape[0], f"Out dimension of {w_in} must have the same length as the in dimension of {w_out}."
        if isinstance(w_in, (Weights)):
            w_in = w_in()
        if isinstance(w_out, (Weights)):
            w_out = w_out()
            
        h = aet.sigmoid(aet.dot(input, w_in))
        output = aet.sigmoid(aet.dot(h, w_out))
        self.input = input
        self.weights = [w_in, w_out]
        self.output = output + input
        

class MNLmodel(PyCMTensorModel):
    def __init__(self, db):
        super().__init__()
        self.name = "myModel"
        self.inputs = db.inputs()  # keep track of inputs

        # update global variables from database
        for var in self.inputs:
            globals().update({var.name: var})

        # declare model params here
        b_cost = Beta("b_cost", 0.0, None, None, 0)
        b_time = Beta("b_time", 0.0, None, None, 0)
        b_hw = Beta("b_hw", 0.0, None, None, 0)
        b_ch = Beta("b_ch", 0.0, None, None, 0)
        b_hh_inc1 = Beta("b_hh_inc1", 0.0, None, None, 0)
        b_hh_inc2 = Beta("b_hh_inc2", 0.0, None, None, 1)
        b_carav1 = Beta("b_carav1", 0.0, None, None, 0)
        b_carav2 = Beta("b_carav2", 0.0, None, None, 1)
        b_com = Beta("b_com", 0.0, None, None, 0)
        b_shop = Beta("b_shop", 0.0, None, None, 0)
        b_bis = Beta("b_bis", 0.0, None, None, 0)
        b_lei = Beta("b_lei", 0.0, None, None, 0)
        asc_alt1 = Beta("asc_alt1", 0.0, None, None, 1)
        asc_alt2 = Beta("asc_alt2", 0.0, None, None, 0)

        W1 = Weights("ResNet_01a", (2, 10), 0, True)
        W2 = Weights("ResNet_01b", (10, 2), 0, True)

        # pass model params to self.params
        self.store_params(locals())

        # Definition of the utility functions
        U_1 = (
            b_cost * TC1 + b_time * TT1 + b_hw * HW1 + b_ch * CH1 
            + b_hh_inc1 * HH_INC_ABS + b_carav1 * CAR_AVAILABILITY
            + b_com * COMMUTE + b_shop * SHOPPING + b_bis * BUSINESS + b_lei * LEISURE
            + asc_alt1
        )
        U_2 = (
            b_cost * TC2 + b_time * TT2 + b_hw * HW2 + b_ch * CH2 
            + b_hh_inc2 * HH_INC_ABS + b_carav2 * CAR_AVAILABILITY
            + asc_alt2
        )
        U = [U_1, U_2]
        rh = ResLogitLayer(U, W1, W2)

        # definition of the choice output
        self.y = CHOICE

        # symbolic expression for the choice model
        self.p_y_given_x = logit(rh.output, [ALT1_AV, ALT1_AV])

        # declare Regularizers here:
        # L1 regularization cost
        # self.L1 = abs(b_cost()) + abs(b_time())

        # L2 regularization cost
        # self.L2 = b_cost() ** 2 + b_time() ** 2

        # symbolic expression for the cost fuction
        self.cost = neg_loglikelihood(self.p_y_given_x, self.y)
        self.cost = self.cost

        # symbolic description of how to compute prediction as class whose
        # probability is maximal
        self.pred = aet.argmax(self.p_y_given_x, axis=1)

# train function
model = cmt.train(MNLmodel, db, optimizer=Adam, batch_size=128, lr_init=0.01, max_epoch=800)

with open("myModel.pkl", "rb") as f:
    model = pickle.load(f)

result = Results(model, db, show_weights=True)

Building model...
dataset: model_average_sp (2790)
batch size: 128
batches per epoch: 21
validation frequency: 21

Training model...


Loglikelihood:  -1284.539175  Score: 0.796
Epoch  800/800: 100%|██████████| 16.8k/16.8k [05:36<00:00, 49.9it/s, Patience=51%]

Optimization complete with accuracy of 79.570%
 with maximum loglikelihood reached @ epoch 792.





Results for model myModel
Number of Beta parameters: 11
Total size of Neural Net weights: 40
Sample size: 2790
Init loglikelihood: -1934.251
Final loglikelihood: -1284.539
Likelihood ratio test: 1299.424
Accuracy: 79.570%
Rho square: 0.336
Rho bar square: 0.310
Akaike Information Criterion: 2671.08
Bayesian Information Criterion: 2973.70
Final gradient norm: 0.071

           Value Std err  t-test p-value Rob. Std err Rob. t-test Rob. p-value
asc_alt1     0.0       -       -       -            -           -            -
asc_alt2   0.102   0.769   0.133   0.894        2.142       0.048        0.962
b_bis       0.12    0.77   0.156   0.876        2.036       0.059        0.953
b_carav1  -0.124   0.051  -2.438   0.015        0.011     -11.445          0.0
b_carav2     0.0       -       -       -            -           -            -
b_ch      -0.642   0.052  -12.38     0.0          0.1       -6.42          0.0
b_com      0.238   0.763   0.313   0.755        2.104       0.113         0.91
