# SwissMetro
> Reproducing this example from [biogeme](From https://github.com/michelbierlaire/biogeme/blob/working/examples/notebooks/My%20first%20model.ipynb)

In [None]:
from pathlib import Path
import pandas as pd
from pytorch_mnl.core import *

In [None]:
PATH = Path('data')

In [None]:
df = pd.read_csv(PATH/'swissmetro.dat', '\t')
df.head()

Unnamed: 0,GROUP,SURVEY,SP,ID,PURPOSE,FIRST,TICKET,WHO,LUGGAGE,AGE,...,TRAIN_TT,TRAIN_CO,TRAIN_HE,SM_TT,SM_CO,SM_HE,SM_SEATS,CAR_TT,CAR_CO,CHOICE
0,2,0,1,1,1,0,1,1,0,3,...,112,48,120,63,52,20,0,117,65,2
1,2,0,1,1,1,0,1,1,0,3,...,103,48,30,60,49,10,0,117,84,2
2,2,0,1,1,1,0,1,1,0,3,...,130,48,60,67,58,30,0,117,52,2
3,2,0,1,1,1,0,1,1,0,3,...,103,40,30,63,52,20,0,72,52,2
4,2,0,1,1,1,0,1,1,0,3,...,130,36,60,63,42,20,0,90,84,2


Remove some observations


In [None]:
exclude = df.query('(PURPOSE != 1 and PURPOSE != 3) or CHOICE == 0')

In [None]:
df = df.drop(exclude.index)
len(df)

6768

Model Params

In [None]:
# ASC_CAR = Beta('ASC_CAR', 0, None, None, 0)
# ASC_TRAIN = Beta('ASC_TRAIN', 0, None, None, 0)
# ASC_SM = Beta('ASC_SM', 0, None, None, 1)
# B_TIME = Beta('B_TIME', 0, None, None, 0)
# B_COST = Beta('B_COST', 0, None, None, 0)

Definition of new variables

In [None]:
df = df.assign(SM_COST = df['SM_CO'] * (df['GA'] == 0),
               TRAIN_COST = df['TRAIN_CO'] * (df['GA'] == 0))

df = df.assign(CAR_AV_SP = df['CAR_AV'] * (df['SP'] != 0),
               TRAIN_AV_SP = df['TRAIN_AV'] * (df['SP'] != 0),
               TRAIN_TT_SCALED = df['TRAIN_TT'] / 100,
               TRAIN_COST_SCALED = df['TRAIN_COST'] / 100,
               SM_TT_SCALED = df['SM_TT'] / 100,
               SM_COST_SCALED = df['SM_COST'] / 100,
               CAR_TT_SCALED = df['CAR_TT'] / 100,
               CAR_CO_SCALED = df['CAR_CO'] / 100)

## Utility Func
> you have to define your model as a nn.Module

In [None]:
# V1 = ASC_TRAIN + \
#      B_TIME * TRAIN_TT_SCALED + \
#      B_COST * TRAIN_COST_SCALED
# V2 = ASC_SM + \
#      B_TIME * SM_TT_SCALED + \
#      B_COST * SM_COST_SCALED
# V3 = ASC_CAR + \
#      B_TIME * CAR_TT_SCALED + \
#      B_COST * CAR_CO_SCALED

$$V = a + b\cdot x $$

In [None]:
import torch
import torch.nn as nn

class Model(nn.Module):
    def __init__(self, n_choices=3, n_params=2):
        super().__init__()
        self.available = Availability(n_choices)
        
        self.b = nn.Parameter(torch.zeros(n_params,1))
#         self.a = torch.cat([torch.zeros(1,1), #first param set to zero
#                             nn.Parameter(torch.zeros(n_choices-1, 1))])
        self.a = nn.Parameter(torch.zeros(n_choices-1, 1))
        
    def forward(self, x, av):
        a = torch.cat([torch.zeros(1,1, device=x.device), self.a])
        logits = (a + x[:,:,:2] @ self.b).squeeze()
        return self.available(logits, av)

In [None]:
model = Model()

In [None]:
model(torch.rand(8, 3, 3), torch.randint(0,1,(8,3))).shape

torch.Size([8, 3])

In [None]:
x_cols =  ['SM_TT_SCALED',
           'SM_COST_SCALED',
           'TRAIN_TT_SCALED',
           'TRAIN_COST_SCALED',
           'CAR_TT_SCALED',
           'CAR_CO_SCALED']

av = ['SM_AV',
      'TRAIN_AV_SP',
      'CAR_AV_SP']

In [None]:
X, y, avs = prepare_data(df, x_cols=x_cols, target_col='CHOICE', av_cols=av)

X = X.reshape(-1,3,2)

In [None]:
avs.shape

torch.Size([6768, 3])

In [None]:
X.shape

torch.Size([6768, 3, 2])

In [None]:
dls = DataLoaders.from_Xy(X, y, avs, pct=None, batch_size=64)

In [None]:
bx,by, bavs = dls.one_batch()

In [None]:
bx.shape

torch.Size([64, 3, 2])

In [None]:
by, model(bx, bavs).argmax(1)

(tensor([0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0]),
 tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]))

In [None]:
learn = Learner(dls, model)

In [None]:
learn.fit(100, lr=0.1)

epoch =   0, train_loss = 87.617, val_loss = 85.678, accuracy = 0.67
epoch =   1, train_loss = 84.056, val_loss = 84.944, accuracy = 0.68
epoch =   2, train_loss = 83.772, val_loss = 84.667, accuracy = 0.68
epoch =   3, train_loss = 83.638, val_loss = 84.547, accuracy = 0.68
epoch =   4, train_loss = 83.577, val_loss = 84.492, accuracy = 0.68
Epoch     6: reducing learning rate of group 0 to 1.0000e-02.
epoch =   5, train_loss = 83.549, val_loss = 84.465, accuracy = 0.68
epoch =   6, train_loss = 84.417, val_loss = 84.227, accuracy = 0.68
epoch =   7, train_loss = 84.213, val_loss = 84.059, accuracy = 0.68
Epoch     9: reducing learning rate of group 0 to 1.0000e-03.
epoch =   8, train_loss = 84.069, val_loss = 83.942, accuracy = 0.68
epoch =   9, train_loss = 83.945, val_loss = 83.931, accuracy = 0.68
epoch =  10, train_loss = 83.934, val_loss = 83.920, accuracy = 0.68
Epoch    12: reducing learning rate of group 0 to 1.0000e-04.
epoch =  11, train_loss = 83.923, val_loss = 83.910, ac

In [None]:
model.a

Parameter containing:
tensor([[-0.8573],
        [-0.0261]], requires_grad=True)

In [None]:
model.b

Parameter containing:
tensor([[-1.2013],
        [-0.8977]], requires_grad=True)