In [1]:
import sys
sys.path.append("models/")

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
from torch.utils.data import DataLoader

from setup import *
from dataloader import SurveyDataset
import mnl

%load_ext autoreload
%autoreload 2

In [24]:
tp = pd.read_csv(data_dir+"trips.csv")
n_alts = 4

In [25]:
print(tp['mode'].value_counts()/len(tp))

2    0.713060
1    0.132001
4    0.111893
3    0.043046
Name: mode, dtype: float64


In [26]:
tp['morning'] = (tp['dep_hour'] > 6) & (tp['dep_hour'] < 10)
tp['afternoon'] = (tp['dep_hour'] > 15) & (tp['dep_hour'] < 19)
tp['morning'] = tp['morning'].astype(int)
tp['afternoon'] = tp['afternoon'].astype(int)

def normalize_features(df, cols):
    for c in cols:
        df[c] = df[c]/df[c].max()
    return df

In [27]:
tp['const'] = 1

In [2]:
ct_filter = pd.read_csv(data_dir+"census_tracts_filtered.csv")
unique_ct = ct_filter['geoid'].to_numpy()

In [3]:
len(unique_ct)

1337

In [None]:
trip_filter = []
for t1, t2 in zip(tp['tract_1'], tp['tract_2']):
    if sum(unique_ct == t1) == 1 and sum(unique_ct == t2) == 1:
        trip_filter.append(True)
    else:
        trip_filter.append(False)
trip_filter = np.array(trip_filter)

In [39]:
x = tp[['const','morning','afternoon','companion', 'distance', 
         'from_home', 'to_home', 'purp_work', 'purp_school', 'purp_errand', 'purp_recreation', 
         'ontime_important', '12_18yrs', '18_25yrs', '25_55yrs', '55+yrs', 'no_age', 
         'disability', 'educ_col', 'educ_grad', 
         'race_white', 'race_black', 'race_asian', 
         'male', 'female', 
         'emply_park', 'emply_transit', 'emply_veh', 'emply_wfh', 'emply_flex', 'emply_hours', 
         'license', 'person_trips', 'person_transit', 'person_freq_transit', 
         'hh_inc_0_30', 'hh_inc_30_60', 'hh_inc_60_100', 'hh_inc_100_150', 'hh_inc_150', 
         'avg_pr_veh', 'home_own', 'home_house', 'home_condo']].to_numpy()[trip_filter]

y = tp['mode'].astype(int).to_numpy()[trip_filter] - 1
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.2, random_state=42)

In [40]:
x.shape

(75248, 44)

In [42]:
trainset = SurveyDataset(torch.tensor(x_train, dtype=torch.float), torch.tensor(y_train, dtype=torch.long))
trainloader = DataLoader(trainset, batch_size=256, shuffle=True)

testset = SurveyDataset(torch.tensor(x_test, dtype=torch.float), torch.tensor(y_test, dtype=torch.long))
testloader = DataLoader(testset, batch_size=len(testset), shuffle=True)

In [43]:
loss_fn = nn.CrossEntropyLoss()

model = mnl.MNL(n_alts=n_alts, n_features=x.shape[-1])
# model = model.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.02, weight_decay=0)

In [45]:
for epoch in range(30):
    loss_ = 0
    correct = 0
    for batch, (x_batch, y_batch) in enumerate(trainloader):
        # Compute prediction and loss
        util = model(x_batch)
        loss = loss_fn(util, y_batch)
        loss_ += loss.item()
        
        pred = torch.argmax(util, dim=1)
        correct += torch.sum(pred == y_batch)
        
        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()


    if batch % 1 == 0:
        print(f"[epoch: {epoch:>3d}] Train loss: {loss_/len(trainset):.4f} accuracy: {correct/len(trainset):.3f}")
    
    
    correct = 0
    for batch, (x_batch, y_batch) in enumerate(testloader):
        util = model(x_batch)
        pred = torch.argmax(util, dim=1)
        correct += torch.sum(pred == y_batch)
    print(f"[epoch: {epoch:>3d}] Test accuracy: {correct/len(testset):.3f}")


[epoch:   0] Train loss: 0.0018 accuracy: 0.841
[epoch:   0] Test accuracy: 0.842
[epoch:   1] Train loss: 0.0019 accuracy: 0.840
[epoch:   1] Test accuracy: 0.845
[epoch:   2] Train loss: 0.0018 accuracy: 0.841
[epoch:   2] Test accuracy: 0.841
[epoch:   3] Train loss: 0.0018 accuracy: 0.841
[epoch:   3] Test accuracy: 0.829
[epoch:   4] Train loss: 0.0019 accuracy: 0.841
[epoch:   4] Test accuracy: 0.843
[epoch:   5] Train loss: 0.0018 accuracy: 0.840
[epoch:   5] Test accuracy: 0.844
[epoch:   6] Train loss: 0.0018 accuracy: 0.841
[epoch:   6] Test accuracy: 0.833
[epoch:   7] Train loss: 0.0018 accuracy: 0.840
[epoch:   7] Test accuracy: 0.833
[epoch:   8] Train loss: 0.0018 accuracy: 0.842
[epoch:   8] Test accuracy: 0.840
[epoch:   9] Train loss: 0.0018 accuracy: 0.841
[epoch:   9] Test accuracy: 0.840
[epoch:  10] Train loss: 0.0018 accuracy: 0.841
[epoch:  10] Test accuracy: 0.842
[epoch:  11] Train loss: 0.0018 accuracy: 0.842
[epoch:  11] Test accuracy: 0.842
[epoch:  12] Tra