In [2]:
from google.colab import drive
drive_root = '/content/drive'
drive.mount(drive_root, force_remount=True)

Mounted at /content/drive


In [3]:
import sys
import os

project_root = os.path.join(drive_root, 'MyDrive/Colab Notebooks/cmpe540/final-project')
source_root = os.path.join(project_root, 'src')
sys.path.append(source_root)
data_folder_path = os.path.join(project_root, 'data')
raw_data_folder_path = os.path.join(data_folder_path, 'raw')
processed_data_folder_path = os.path.join(data_folder_path, 'processed')
training_data_path = os.path.join(processed_data_folder_path, "training_data")

In [4]:
import pandas as pd
import numpy as np

np.random.seed(42)

# Load training data
flight_vector_path = os.path.join(training_data_path, "flight_vector.npy")
targets_path = os.path.join(training_data_path, "targets_vector.npy")

flight_vectors = np.load(flight_vector_path, allow_pickle=True)
targets_vector = np.load(targets_path, allow_pickle=True)

In [5]:
# Determine the split index
test_size = 360
train_val_size = len(flight_vectors) - test_size

# Split into training/validation and test sets
train_val_vectors = flight_vectors[:train_val_size]
test_vectors = flight_vectors[train_val_size:]

train_val_targets = targets_vector[:train_val_size]
test_targets = targets_vector[train_val_size:]

from sklearn.model_selection import train_test_split
train_vectors, val_vectors, train_targets, val_targets = train_test_split(
    train_val_vectors,
    train_val_targets,
    test_size=0.2,
    random_state=42,
    shuffle=False
)

# Print shapes to verify
print("Training set shape:", train_vectors.shape)
print("Validation set shape:", val_vectors.shape)
print("Test set shape:", test_vectors.shape)

#Save the data
np.save(os.path.join(training_data_path, "train_vectors.npy"), train_vectors)
np.save(os.path.join(training_data_path, "val_vectors.npy"), val_vectors)
np.save(os.path.join(training_data_path, "test_vectors.npy"), test_vectors)

np.save(os.path.join(training_data_path, "train_targets.npy"), train_targets)
np.save(os.path.join(training_data_path, "val_targets.npy"), val_targets)
np.save(os.path.join(training_data_path,"test_targets.npy"), test_targets)

print("Vectors saved!")

Training set shape: (25216, 36)
Validation set shape: (6305, 36)
Test set shape: (360, 36)
Vectors saved!


In [6]:
import torch
from torch.utils.data import TensorDataset, DataLoader

# 1) Load the .npy files
train_vectors = np.load(os.path.join(training_data_path, "train_vectors.npy"), allow_pickle=True)
val_vectors = np.load(os.path.join(training_data_path, "val_vectors.npy"), allow_pickle=True)
test_vectors = np.load(os.path.join(training_data_path, "test_vectors.npy"), allow_pickle=True)

train_targets = np.load(os.path.join(training_data_path, "train_targets.npy"), allow_pickle=True)
val_targets = np.load(os.path.join(training_data_path, "val_targets.npy"), allow_pickle=True)
test_targets = np.load(os.path.join(training_data_path, "test_targets.npy"), allow_pickle=True)

In [7]:
train_vectors = train_vectors.astype(float)
val_vectors   = val_vectors.astype(float)
test_vectors  = test_vectors.astype(float)

In [14]:
from sklearn.ensemble import RandomForestRegressor

# Create the regressor
rf = RandomForestRegressor(
    n_estimators=100,
    random_state=42,
    n_jobs=-1  # use all CPU cores for speed
)

# Fit on training data
rf.fit(train_vectors, train_targets)

# Evaluate on validation data
test_predictions = rf.predict(test_vectors)


In [16]:
from sklearn.metrics import root_mean_squared_error

val_mse = root_mean_squared_error(test_targets, test_predictions)

print(f"Test RMSE: {val_mse:.4f}")


Validation RMSE: 45.0342
