In [1]:
from google.colab import drive
drive_root = '/content/drive'
drive.mount(drive_root, force_remount=True)

Mounted at /content/drive


In [2]:
import sys
import os

project_root = os.path.join(drive_root, 'MyDrive/Colab Notebooks/cmpe540/final-project')
source_root = os.path.join(project_root, 'src')
sys.path.append(source_root)
data_folder_path = os.path.join(project_root, 'data')
raw_data_folder_path = os.path.join(data_folder_path, 'raw')
processed_data_folder_path = os.path.join(data_folder_path, 'processed')
train_data_path = os.path.join(processed_data_folder_path, "train_data.csv")

In [3]:
import torch
from torch.utils.data import DataLoader, Dataset

# Define the dataset
class TicketPriceDataset(Dataset):
    def __init__(self, X, y):
      self.X = torch.tensor(X, dtype=torch.float32)
      self.y = torch.tensor(y, dtype=torch.float32)

    def __len__(self):
      return len(self.X)

    def __getitem__(self, idx):
      return self.X[idx], self.y[idx]



import torch
import torch.nn as nn
import torch.nn.functional as F


# Define the model
class TicketPriceModel(nn.Module):
    def __init__(self):
      super(TicketPriceModel, self).__init__()
      # 3-layer LSTM block
      self.lstm = nn.LSTM(input_size=36, hidden_size=64, num_layers=3, batch_first=True)

      # 3-layer 1D Conv block
      self.conv1 = nn.Conv1d(in_channels=64, out_channels=32, kernel_size=3, padding=1)
      self.conv2 = nn.Conv1d(in_channels=32, out_channels=16, kernel_size=3, padding=1)
      self.conv3 = nn.Conv1d(in_channels=16, out_channels=16, kernel_size=3, padding=1)

      # Fully connected layers
      # Calculate the input size dynamically based on the output of the convolutional layers
      self.fc1_input_size = self._get_fc1_input_size(torch.randn(1, 36))  # Pass a dummy input to calculate size
      self.fc1 = nn.Linear(self.fc1_input_size, 128)
      self.fc2 = nn.Linear(128, 1)  # Output: ticket price

      # Activation function
      self.relu = nn.ReLU()

    def _get_fc1_input_size(self, x):
      """Calculates the input size for fc1 based on a dummy input."""
      x, _ = self.lstm(x)
      if x.dim() == 2:
        x = x.unsqueeze(1)
      x = x.permute(0, 2, 1)
      x = self.conv1(x)
      x = F.relu(x)
      x = self.conv2(x)
      x = F.relu(x)
      x = self.conv3(x)
      x = F.relu(x)
      return x.view(x.size(0), -1).shape[1]

    def forward(self, x):
      # LSTM block
      x, _ = self.lstm(x)

      # Check if x has 3 dimensions, if not, add a dimension
      if x.dim() == 2:  # If x has only 2 dimensions
        x = x.unsqueeze(1)  # Add a dimension at position 1

      # Permute for Conv1D: (batch_size, channels=64, seq_length)
      x = x.permute(0, 2, 1)

      # Pass through Conv layers
      x = self.conv1(x)  # -> (batch_size, 32, seq_length')
      x = F.relu(x)
      x = self.conv2(x)  # -> (batch_size, 16, seq_length'')
      x = F.relu(x)
      x = self.conv3(x)  # -> (batch_size, 16, seq_length''')
      x = F.relu(x)


      # Flatten for fully connected layers
      x = x.view(x.size(0), -1)  # (batch_size, 64 * 16)

      # Fully-connected layers
      x = self.fc1(x)
      x = F.relu(x)
      x = self.fc2(x)  # -> (batch_size, 1)

      return x


In [4]:
import numpy as np

np.random.seed(42)

In [5]:
import pandas as pd

# TODO: enter timestamp of the model to be evaluated
timestamp = '20250113_191030'
results_dir = os.path.join(project_root, 'results')

test_path = os.path.join(results_dir, timestamp, "test_data.csv")
test_set = pd.read_csv(test_path)

In [8]:
ndo_1 = test_set[test_set['ndo'] == 1]
ndo_7 = test_set[test_set['ndo'] == 7]
ndo_30 = test_set[test_set['ndo'] == 30]

In [9]:
import os
import torch
from torch.utils.data import DataLoader
import numpy as np
from sklearn.metrics import mean_squared_error

def evaluate_model(model_path, test_loader, device):
  """Evaluates the model on the test set and calculates RMSE and Accuracy."""

  model = TicketPriceModel()  # Instantiate your model class with correct parameters
  model.load_state_dict(torch.load(model_path, map_location=device))
  model.to(device)
  model.eval()  # Set to evaluation mode

  predictions = []
  actuals = []

  with torch.no_grad():
    for inputs, targets in test_loader:
      inputs, targets = inputs.to(device), targets.to(device)
      outputs = model(inputs)

      # Ensure predictions are always 1-dimensional
      pred = outputs.squeeze().cpu().numpy()
      if pred.ndim == 0:
          pred = pred[np.newaxis]  # Add a dimension if it's a scalar

      predictions.append(pred)
      actuals.extend(targets.cpu().numpy())

  # Flatten predictions to a 1D array
  predictions = np.concatenate(predictions)

  rmse = np.sqrt(mean_squared_error(actuals, predictions))

  # Prediction Accuracy (ACC) - Define your criteria for "correct" prediction
  # Example: Within 10% of the actual value
  tolerance = 0.1  # 10% tolerance
  correct_predictions = np.sum(np.abs((np.array(predictions) - np.array(actuals))) <= tolerance * np.array(actuals))
  accuracy = correct_predictions / len(actuals) if len(actuals) > 0 else 0

  return rmse, accuracy

In [10]:
def evaluation_pipeline(ndo, evaluation_model_path, device):
  # Filter for specific ndo
  ndo_filtered = test_set[test_set['ndo'] == ndo].iloc[]

  # Create test loader object
  batch_size = 2
  test_loader = DataLoader(
    TicketPriceDataset(ndo_filtered.drop(columns=['baseFare']).values, ndo_filtered['baseFare'].values),
    batch_size=batch_size,
    shuffle=False
  )

  rmse, accuracy = evaluate_model(evaluation_model_path, test_loader, device)
  print(f"RMSE: {rmse:.4f}")
  print(f"Accuracy: {accuracy:.4f}")

In [12]:
# Extract model to evaluate
evaluation_timestamp = timestamp # TODO: change here if you want to evaluate a different model
evaluation_model_path = os.path.join(results_dir, evaluation_timestamp, 'best_model.pth')

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

for ndo in [1, 7, 30]:
  print(f"Metrics for ndo {ndo}")
  evaluation_pipeline(ndo, evaluation_model_path, device)

Metrics for ndo 1


  model.load_state_dict(torch.load(model_path, map_location=device))


RMSE: 167.5585
Accuracy: 0.0612
Metrics for ndo 7
RMSE: 132.9819
Accuracy: 0.2785
Metrics for ndo 30
RMSE: 122.7148
Accuracy: 0.0972


  model.load_state_dict(torch.load(model_path, map_location=device))
  model.load_state_dict(torch.load(model_path, map_location=device))
