In [1]:
# import bus_processing
import bus_prediction
import os
import pandas as pd
from constants import DATA_DIR

  _torch_pytree._register_pytree_node(


In [2]:
def load_bus_data():
    path = os.path.join(DATA_DIR, "dataset.csv")
    df = pd.read_csv(path)
    return df

bus_data = load_bus_data()
bus_data["time_bucket"] = pd.to_datetime(bus_data["time_bucket"], utc=True)
X_train, y_train, X_test, y_test = bus_prediction.prepare_bus_data(bus_data)

## LSTM

### Next-period prediction

In [7]:
lstm_model = bus_prediction.LSTMModule(input_size=X_train.shape[1]-1, hidden_size=64, num_layers=4, dropout=0.1)
lstm_predictor = bus_prediction.LSTMPredictor(lstm_model, lag_offsets=[1, 2, 3, 3*24])
lstm_predictor.train(X_train, y_train, epochs=30, learning_rate=0.001)
mse = lstm_predictor.evaluate(X_test, y_test)
print(f"LSTM Test MSE: {mse:.4f}")

Epoch 1/30, Train Loss: 1.2878, Val Loss: 0.5795
Epoch 2/30, Train Loss: 0.6859, Val Loss: 0.3997
Epoch 3/30, Train Loss: 0.5414, Val Loss: 0.3108
Epoch 4/30, Train Loss: 0.5374, Val Loss: 0.3307
Epoch 5/30, Train Loss: 0.5131, Val Loss: 0.3163
Epoch 6/30, Train Loss: 0.4938, Val Loss: 0.3337
Epoch 7/30, Train Loss: 0.4965, Val Loss: 0.2626
Epoch 8/30, Train Loss: 0.4800, Val Loss: 0.3052
Epoch 9/30, Train Loss: 0.4930, Val Loss: 0.2481
Epoch 10/30, Train Loss: 0.4948, Val Loss: 0.2578
Epoch 11/30, Train Loss: 0.4748, Val Loss: 0.2610
Epoch 12/30, Train Loss: 0.4516, Val Loss: 0.2792
Epoch 13/30, Train Loss: 0.4722, Val Loss: 0.2688
Epoch 14/30, Train Loss: 0.4413, Val Loss: 0.2981
Epoch 15/30, Train Loss: 0.4276, Val Loss: 0.2571
Epoch 16/30, Train Loss: 0.4566, Val Loss: 0.2763
Epoch 17/30, Train Loss: 0.4388, Val Loss: 0.2622
Epoch 18/30, Train Loss: 0.4630, Val Loss: 0.2815
Epoch 19/30, Train Loss: 0.4103, Val Loss: 0.2846
Epoch 20/30, Train Loss: 0.3852, Val Loss: 0.2760
Epoch 21/

### Day-ahead prediction

In [9]:
lstm_model = bus_prediction.LSTMModule(input_size=X_train.shape[1]-1, hidden_size=64, num_layers=4, dropout=0.1)
lstm_predictor = bus_prediction.LSTMPredictor(lstm_model, lag_offsets=[3*24, 3*24+1, 3*24+2, 3*48])
lstm_predictor.train(X_train, y_train, epochs=30, learning_rate=0.001)
mse = lstm_predictor.evaluate(X_test, y_test)
print(f"LSTM Test MSE: {mse:.4f}")

Epoch 1/30, Train Loss: 1.4978, Val Loss: 0.9029
Epoch 2/30, Train Loss: 1.3759, Val Loss: 0.8536
Epoch 3/30, Train Loss: 1.3883, Val Loss: 0.9450
Epoch 4/30, Train Loss: 1.4460, Val Loss: 0.9284
Epoch 5/30, Train Loss: 1.3753, Val Loss: 0.8359
Epoch 6/30, Train Loss: 1.3807, Val Loss: 0.9443
Epoch 7/30, Train Loss: 1.4470, Val Loss: 0.9395
Epoch 8/30, Train Loss: 1.3649, Val Loss: 0.8144
Epoch 9/30, Train Loss: 1.3253, Val Loss: 0.6970
Epoch 10/30, Train Loss: 1.3091, Val Loss: 0.7834
Epoch 11/30, Train Loss: 1.2760, Val Loss: 0.6293
Epoch 12/30, Train Loss: 1.2618, Val Loss: 0.6073
Epoch 13/30, Train Loss: 1.2579, Val Loss: 0.6186
Epoch 14/30, Train Loss: 1.2531, Val Loss: 0.6011
Epoch 15/30, Train Loss: 1.2458, Val Loss: 0.6145
Epoch 16/30, Train Loss: 1.2237, Val Loss: 0.5992
Epoch 17/30, Train Loss: 1.2093, Val Loss: 0.5896
Epoch 18/30, Train Loss: 1.1907, Val Loss: 0.5948
Epoch 19/30, Train Loss: 1.1900, Val Loss: 0.5836
Epoch 20/30, Train Loss: 1.1755, Val Loss: 0.5755
Epoch 21/

## XGBoost

### Next-period prediction

In [None]:
xgboost_predictor = bus_prediction.XGBoostPredictor(lagged_features=["avg_delay"], lag_offsets=[1, 2, 3, 24*3], avg_features=["avg_delay"], avg_ranges=[24*3, 48*3, 72*3])
xgboost_predictor.train(X_train, y_train)
xgboost_mse = xgboost_predictor.evaluate(X_test, y_test)
print(f"XGBoost Test MSE: {xgboost_mse:.4f}")

Validation MSE: 0.1947
Shape of y_pred: (2130,)
Shape of test_y: (2130,)
XGBoost Test MSE: 0.3340


### Day-ahead prediction

In [10]:
xgboost_predictor = bus_prediction.XGBoostPredictor(lagged_features=["avg_delay"], lag_offsets=[24*3, 24*3+1, 24*3+2, 48*3])
xgboost_predictor.train(X_train, y_train)
xgboost_mse = xgboost_predictor.evaluate(X_test, y_test)
print(f"XGBoost Test MSE: {xgboost_mse:.4f}")

Validation MSE: 0.6128
Shape of y_pred: (2058,)
Shape of test_y: (2058,)
XGBoost Test MSE: 0.6390


## TFT

In [3]:
tft_predictor = bus_prediction.TFTPredictor()
tft_predictor.train(X_train, y_train)

/Users/legendword/python3/lib/python3.12/site-packages/lightning/pytorch/utilities/parsing.py:209: Attribute 'loss' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['loss'])`.
/Users/legendword/python3/lib/python3.12/site-packages/lightning/pytorch/utilities/parsing.py:209: Attribute 'logging_metrics' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['logging_metrics'])`.
GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
/Users/legendword/python3/lib/python3.12/site-packages/lightning/pytorch/trainer/connectors/logger_connector/logger_connector.py:76: Starting from v1.9.0, `tensorboardX` has been removed as a dependency of the `lightning.pytorch` package, due to potential conflicts with other packages in the ML ecosystem. For this

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

/Users/legendword/python3/lib/python3.12/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:425: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
/Users/legendword/python3/lib/python3.12/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


In [None]:
tft_predictor.evaluate(X_test, y_test)

Restoring states from the checkpoint path at /Users/legendword/Documents/zwh/Coding/CPSC440/cpsc440-transit-project/src/lightning_logs/version_80/checkpoints/epoch=1-step=344.ckpt
Loaded model weights from the checkpoint at /Users/legendword/Documents/zwh/Coding/CPSC440/cpsc440-transit-project/src/lightning_logs/version_80/checkpoints/epoch=1-step=344.ckpt
/Users/legendword/python3/lib/python3.12/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:425: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


Testing: |          | 0/? [00:00<?, ?it/s]