# Swissmetro test

In [2]:
import dill as pickle
import pycmtensor as cmt
from pycmtensor.models import MNLogit
from pycmtensor.optimizers import Adam
from pycmtensor.expressions import Beta, Weights
from pycmtensor.results import Results, Predict

cmt.logger.set_level(cmt.logger.WARNING)


In [3]:
import pycmtensor as cmt
import pandas as pd
swissmetro = pd.read_csv("data/swissmetro.dat", sep="\t")
db = cmt.Database(name="swissmetro", pandasDatabase=swissmetro, choiceVar="CHOICE")
globals().update(db.variables)
# Removing some observations
exclude = ((PURPOSE != 1) * (PURPOSE != 3) + (CHOICE == 0)) > 0
db.remove(exclude)

# additional steps to format database
db.data["CHOICE"] -= 1  # set the first choice to 0
db.choices = sorted(db.data["CHOICE"].unique())  # save original choices
db.autoscale(
    variables=["TRAIN_CO", "TRAIN_TT", "CAR_CO", "CAR_TT", "SM_CO", "SM_TT"],
    default=100.0,
    verbose=False,
)

In [None]:
# declare model params here
b_cost = Beta("b_cost", 0.0, None, None, 0)
b_time = Beta("b_time", 0.0, None, None, 0)
b_dist = Beta("b_dist", 0.0, None, None, 0)
asc_train = Beta("asc_train", 0.0, None, None, 0)
asc_car = Beta("asc_car", 0.0, None, None, 0)
asc_sm = Beta("asc_sm", 0.0, None, None, 1)

U_1 = b_cost * db["TRAIN_CO"] + b_time * db["TRAIN_TT"] + asc_train
U_2 = b_cost * db["SM_CO"] + b_time * db["SM_TT"] + asc_sm
U_3 = b_cost * db["CAR_CO"] + b_time * db["CAR_TT"] + asc_car

# specify the utility function and the availability conditions
U = [U_1, U_2, U_3]
AV = [db["TRAIN_AV"], db["SM_AV"], db["CAR_AV"]]

mymodel = MNLogit(u=U, av=AV, database=db, name="mymodel")
mymodel.add_params(locals())

# set training configuration
mymodel.config["patience"] = 20000
mymodel.config["base_lr"] = 0.0012
mymodel.config["max_lr"] = 0.002
mymodel.config["learning_scheduler"] = "CyclicLR"
mymodel.config["cyclic_lr_step_size"] = 8
mymodel.config["cyclic_lr_mode"] = "triangular2"

In [None]:
# train function
model = cmt.train(
    mymodel, database=db, optimizer=Adam, batch_size=128, max_epoch=499, notebook=True
)

with open("myModel.pkl", "rb") as f:
    model = pickle.load(f)

result = Results(model, db, show_weights=True)
result.print_beta_statistics()
result.print_correlation_matrix()


In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

sns.set_style("ticks")
sns.set_context("notebook", font_scale=1.0, rc={"lines.linewidth": 1.2})
sns.despine()
log = model.tracker.get_data()
fig, axs = plt.subplots(1, 3, figsize=(12, 3), squeeze=False)
g1 = sns.lineplot(ax=axs[0, 0], data=log, x=log.index, y="full_ll", color="red")
g2 = sns.lineplot(ax=axs[0, 1], data=log, x=log.index, y="lr", color="red")
g3 = sns.lineplot(
    ax=axs[0, 2], data=log, x=log.index, y="score", color="red"
)  # y = score or full_ll or lr
g1.set(xscale="log", xlabel="iterations", ylabel="log likelihood")
g2.set(xscale="log", xlabel="iterations", ylabel="learning rate")
g3.set(xscale="log", xlabel="iterations", ylabel="accuracy")
plt.tight_layout()
plt.savefig("docs/viz/fig.png", format="png", facecolor="white", transparent=False)
plt.show()


In [None]:
import aesara.d3viz as d3v
from aesara import printing

d3v.d3viz(model.loglikelihood, "docs/viz/MNLogit.html")

printing.pydotprint(mymodel.cost, "docs/viz/print.png")
from IPython.display import Image

Image("docs/viz/print.png", width="80%")


In [4]:
from pycmtensor.results import Predict
from pycmtensor.pycmtensor import PyCMTensorModel
import dill as pickle
import pandas as pd

# class MNLmodel(PyCMTensorModel):
#     def __init__(self, db):
#         super().__init__(db)


with open("myModel.pkl", "rb") as f:
    model = pickle.load(f)

pd.concat((Predict(model, db).probs(), Predict(model, db).choices()), axis=1)


Unnamed: 0,0,1,2,CHOICE
0,0.159529,0.526072,0.314399,1
1,0.170893,0.525581,0.303527,1
2,0.136829,0.530941,0.332230,1
3,0.143593,0.426624,0.429784,2
4,0.119186,0.484416,0.396398,1
...,...,...,...,...
6763,0.159233,0.582936,0.257831,1
6764,0.134582,0.475826,0.389592,1
6765,0.132383,0.484591,0.383026,1
6766,0.109735,0.489355,0.400909,1
