In [1]:
import dill as pickle
import aesara.tensor as aet
import pandas as pd
import pycmtensor as cmt
from pycmtensor.pycmtensor import PyCMTensorModel, Beta, Weights
from pycmtensor.functions import logit, neg_loglikelihood
from pycmtensor.optimizers import Adam
from pycmtensor.results import Results

swissmetro = pd.read_csv("data/swissmetro.dat", sep="\t")
db = cmt.Database("swissmetro", swissmetro, choiceVar="CHOICE")

In [2]:
# Removing some observations
exclude = ((db.variables["PURPOSE"] != 1) * (db.variables["PURPOSE"] != 3) + (db.variables["CHOICE"] == 0)) > 0
db.remove(exclude)

# additional steps to format database
db.data["CHOICE"] -= 1 # set the first choice to 0
db.autoscale(variables=['TRAIN_CO', 'TRAIN_TT', 'CAR_CO', 'CAR_TT', 
    'SM_CO', 'SM_TT'], default=100., verbose=False)


class ResLogitLayer:
    def __init__(self, input, w_in, w_out):
        if isinstance(input, (list, tuple)):
            input = aet.concatenate(input, axis=1)
        
        # assert len(input) == w_in.shape[0], f"{input} must have the same length as the in first dimension of {w_in}."
        # assert w_in.shape[-1] == w_out.shape[0], f"Out dimension of {w_in} must have the same length as the in dimension of {w_out}."
        if isinstance(w_in, (Weights)):
            w_in = w_in()
        if isinstance(w_out, (Weights)):
            w_out = w_out()
            
        h = aet.sigmoid(aet.dot(input, w_in))
        output = aet.sigmoid(aet.dot(h, w_out))
        self.input = input
        self.weights = [w_in, w_out]
        self.output = output + input


class MNLmodel(PyCMTensorModel):
    def __init__(self, db):
        super().__init__()
        self.name = "myModel"
        self.inputs = db.inputs()  # keep track of inputs

        # update global variables from database
        for var in self.inputs:
            globals().update({var.name: var})

        # declare model params here
        b_cost = Beta("b_cost", 0.0, None, None, 0)
        b_time = Beta("b_time", 0.0, None, None, 0)
        asc_train = Beta("asc_train", 0.0, None, None, 0)
        asc_car = Beta("asc_car", 0.0, None, None, 0)
        asc_sm = Beta("asc_sm", 0.0, None, None, 1)

        W1 = Weights("ResNet_01a", (3, 10), 0, True)
        W2 = Weights("ResNet_01b", (10, 3), 0, True)

        # pass model params to self.params
        self.store_params(locals())

        # Definition of the utility functions
        U_1 = b_cost * TRAIN_CO + b_time * TRAIN_TT + asc_train
        U_2 = b_cost * SM_CO + b_time * SM_TT + asc_sm
        U_3 = b_cost * CAR_CO + b_time * CAR_TT + asc_car
        U = [U_1, U_2, U_3]
        rh = ResLogitLayer(U, W1, W2)

        # definition of the choice output
        self.y = CHOICE

        # symbolic expression for the choice model
        self.p_y_given_x = logit(rh.output, [TRAIN_AV, SM_AV, CAR_AV])

        # declare Regularizers here:
        # L1 regularization cost
        self.L1 = abs(b_cost()) + abs(b_time())

        # L2 regularization cost
        self.L2 = b_cost() ** 2 + b_time() ** 2

        # symbolic expression for the cost fuction
        self.cost = neg_loglikelihood(self.p_y_given_x, self.y)
        self.cost = self.cost

        # symbolic description of how to compute prediction as class whose
        # probability is maximal
        self.pred = aet.argmax(self.p_y_given_x, axis=1)

In [3]:
# train function
model = cmt.train(MNLmodel, db, optimizer=Adam, batch_size=256, lr_init=0.01, max_epoch=4)

with open("myModel.pkl", "rb") as f:
    model = pickle.load(f)

result = Results(model, db, show_weights=True)

Building model...
dataset: swissmetro (6768)
batch size: 256
batches per epoch: 26
validation frequency: 26

Training model...


Loglikelihood:  -5652.154684  Score: 0.611
Epoch    4/4: 100%|██████████| 104/104 [00:03<00:00, 28.4it/s, Patience=2%]

Optimization complete with accuracy of 61.111%
 with maximum loglikelihood reached @ epoch 4.





Results for model my model
Number of Beta parameters: 4
Total size of Neural Net weights: 60
Sample size: 6768
Init loglikelihood: -6969.875
Final loglikelihood: -5652.155
Likelihood ratio test: 2635.441
Accuracy: 61.111%
Rho square: 0.189
Rho bar square: 0.180
Akaike Information Criterion: 11432.31
Bayesian Information Criterion: 11868.79
Final gradient norm: 0.194

              Value   Std err     t-test   p-value Rob. Std err Rob. t-test Rob. p-value
asc_car    0.159757  0.040053   3.988602  0.000066     0.279087    0.572427     0.567033
asc_sm          0.0         -          -         -            -           -            -
asc_train -0.416775  0.051887  -8.032309       0.0     0.564355   -0.738499     0.460211
b_cost     0.019433  0.003255   5.970887       0.0     0.008938    2.174134     0.029695
b_time    -0.522085  0.044527 -11.725024       0.0     0.534529    -0.97672     0.328708 

ResNet_01a (3, 10) init: random
[[ 0.12569721 -0.07885385 -0.72176806 -0.37371185 -0.08264174 

In [4]:
print(result)

Results for model my model
Number of Beta parameters: 4
Total size of Neural Net weights: 60
Sample size: 6768
Init loglikelihood: -6969.875
Final loglikelihood: -5652.155
Likelihood ratio test: 2635.441
Accuracy: 61.111%
Rho square: 0.189
Rho bar square: 0.180
Akaike Information Criterion: 11432.31
Bayesian Information Criterion: 11868.79
Final gradient norm: 0.194
              Value   Std err     t-test   p-value Rob. Std err Rob. t-test Rob. p-value
asc_car    0.159757  0.040053   3.988602  0.000066     0.279087    0.572427     0.567033
asc_sm          0.0         -          -         -            -           -            -
asc_train -0.416775  0.051887  -8.032309       0.0     0.564355   -0.738499     0.460211
b_cost     0.019433  0.003255   5.970887       0.0     0.008938    2.174134     0.029695
b_time    -0.522085  0.044527 -11.725024       0.0     0.534529    -0.97672     0.328708
