In [1]:
import sys
sys.path.append("models/")

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
from torch.utils.data import DataLoader

from setup import *
from dataloader import SurveyDataset
import mnl

%load_ext autoreload
%autoreload 2

In [2]:
data_version = '1571'

In [3]:
tp = pd.read_csv(data_dir+"trips.csv")
n_alts = 4

In [4]:
print(tp['mode'].value_counts()/len(tp))

2    0.713060
1    0.132001
4    0.111893
3    0.043046
Name: mode, dtype: float64


In [5]:
tp['morning'] = (tp['dep_hour'] > 6) & (tp['dep_hour'] < 10)
tp['afternoon'] = (tp['dep_hour'] > 15) & (tp['dep_hour'] < 19)
tp['morning'] = tp['morning'].astype(int)
tp['afternoon'] = tp['afternoon'].astype(int)

def normalize_features(df, cols):
    for c in cols:
        df[c] = df[c]/df[c].max()
    return df

In [6]:
tp['const'] = 1

In [7]:
ct_filter = pd.read_csv(data_dir+"census_tracts_filtered-"+data_version+".csv")
unique_ct = ct_filter['geoid'].to_numpy()

In [8]:
len(unique_ct)

1571

In [9]:
trip_filter = []
for t1, t2 in zip(tp['tract_1'], tp['tract_2']):
    if sum(unique_ct == t1) == 1 and sum(unique_ct == t2) == 1:
        trip_filter.append(True)
    else:
        trip_filter.append(False)
trip_filter = np.array(trip_filter)

In [10]:
x = tp[['const','morning','afternoon','companion', 'distance', 
         'from_home', 'to_home', 'purp_work', 'purp_school', 'purp_errand', 'purp_recreation', 
         'ontime_important', '12_18yrs', '18_25yrs', '25_55yrs', '55+yrs', 
         'disability', 'educ_col', 'educ_grad', 
         'race_white', 'race_black', 'race_asian', 
         'male', 'female', 
         'emply_park', 'emply_transit', 'emply_veh', 'emply_wfh', 'emply_flex', 'emply_hours', 
         'license', 'person_trips', 'person_transit', 'person_freq_transit', 
         'hh_inc_0_30', 'hh_inc_30_60', 'hh_inc_60_100', 'hh_inc_100_150', 'hh_inc_150', 
         'avg_pr_veh', 'home_own', 'home_house', 'home_condo']].to_numpy()[trip_filter]

y = tp['mode'].astype(int).to_numpy()[trip_filter] - 1
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.2, random_state=42)

In [11]:
x.shape

(79929, 43)

In [12]:
trainset = SurveyDataset(torch.tensor(x_train, dtype=torch.float), torch.tensor(y_train, dtype=torch.long))
trainloader = DataLoader(trainset, batch_size=256, shuffle=True)

testset = SurveyDataset(torch.tensor(x_test, dtype=torch.float), torch.tensor(y_test, dtype=torch.long))
testloader = DataLoader(testset, batch_size=len(testset), shuffle=True)

In [13]:
loss_fn = nn.CrossEntropyLoss()

model = mnl.MNL(n_alts=n_alts, n_features=x.shape[-1])
# model = model.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.002, weight_decay=0)

for epoch in range(300):
    loss_ = 0
    correct = 0
    for batch, (x_batch, y_batch) in enumerate(trainloader):
        # Compute prediction and loss
        util = model(x_batch)
        loss = loss_fn(util, y_batch)
        loss_ += loss.item()*len(y_batch)
        
        pred = torch.argmax(util, dim=1)
        correct += torch.sum(pred == y_batch)
        
        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()


    if batch % 1 == 0:
        print(f"[epoch: {epoch:>3d}] Train loss: {loss_/len(trainset):.4f} accuracy: {correct/len(trainset):.3f}")

    correct = 0
    loss_ = 0
    for batch, (x_batch, y_batch) in enumerate(testloader):
        util = model(x_batch)
        loss = loss_fn(util, y_batch)
        loss_ += loss.item()*len(y_batch)
        pred = torch.argmax(util, dim=1)
        correct += torch.sum(pred == y_batch)
        
    print(f"[epoch: {epoch:>3d}] Test loss: {loss_/len(testset):.4f} accuracy: {correct/len(testset):.3f}")


[epoch:   0] Train loss: 0.9441 accuracy: 0.673
[epoch:   0] Test loss: 0.5756 accuracy: 0.789
[epoch:   1] Train loss: 0.5413 accuracy: 0.808
[epoch:   1] Test loss: 0.5104 accuracy: 0.824
[epoch:   2] Train loss: 0.4991 accuracy: 0.825
[epoch:   2] Test loss: 0.4840 accuracy: 0.829
[epoch:   3] Train loss: 0.4802 accuracy: 0.833
[epoch:   3] Test loss: 0.4717 accuracy: 0.833
[epoch:   4] Train loss: 0.4710 accuracy: 0.835
[epoch:   4] Test loss: 0.4656 accuracy: 0.835
[epoch:   5] Train loss: 0.4653 accuracy: 0.838
[epoch:   5] Test loss: 0.4632 accuracy: 0.836
[epoch:   6] Train loss: 0.4622 accuracy: 0.840
[epoch:   6] Test loss: 0.4601 accuracy: 0.839
[epoch:   7] Train loss: 0.4605 accuracy: 0.841
[epoch:   7] Test loss: 0.4594 accuracy: 0.841
[epoch:   8] Train loss: 0.4589 accuracy: 0.842
[epoch:   8] Test loss: 0.4577 accuracy: 0.841
[epoch:   9] Train loss: 0.4579 accuracy: 0.843
[epoch:   9] Test loss: 0.4568 accuracy: 0.842
[epoch:  10] Train loss: 0.4577 accuracy: 0.844
[e

KeyboardInterrupt: 

In [14]:
for i in model.named_parameters():
    print(i)

('beta.weight', Parameter containing:
tensor([[ 0.6929, -0.0772,  0.2061, -0.2254, -1.0756, -0.0244,  0.0143,  0.2920,
          0.1733, -0.0782,  0.3137, -0.2152, -0.0182,  0.4557,  0.4485,  0.7052,
         -0.4744,  0.4495,  0.6261,  0.0923, -0.1780, -0.1531,  0.6451,  0.4286,
         -0.3748,  0.3883, -0.2609, -0.1072,  0.0201,  0.0325, -0.2460, -0.0481,
          0.1830, -0.2823,  0.3767,  0.2456,  0.3425,  0.3894,  0.4302, -0.8925,
         -0.1659, -0.1228,  0.5170],
        [-0.1174, -0.0136,  0.2340,  0.1317,  0.1220,  0.1956,  0.1850, -0.2740,
         -0.6176,  0.4992,  0.0923, -0.0575, -0.3498, -0.6777, -0.0824, -0.0828,
          0.0055, -0.3137, -0.2230,  0.0249,  0.0121,  0.0798, -0.1605, -0.1104,
          0.5252, -0.6962,  0.0268, -0.1031, -0.1024,  0.0357,  1.0361,  0.0590,
          0.0463, -1.3601, -0.2623,  0.0927, -0.0218, -0.0544, -0.0733,  0.6779,
          0.2806,  0.3052, -0.3107],
        [-0.0145, -0.2400, -0.5547,  0.1484,  0.1102, -0.3981, -0.3256, -0.383