In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error
from sklearn.neural_network import MLPRegressor
from sklearn.preprocessing import StandardScaler
from itertools import product

In [2]:
# read input file
filepath = "space_mountain_with_holiday_weather_lag_new.csv"
df = pd.read_csv(filepath)

In [3]:
# one hot encode the Day of Week column
df = pd.get_dummies(df, columns=['Day of Week']) 

In [4]:
# ------ SPLITTING DATASET ------
train_years = ['14', '15', '16', '17', '18', '19', '22']
dev_years = ['23']
test_years = ['24', '25']

df['Year'] = df['Date'].apply(lambda x: x.split('/')[-1])  # Extract year as a string
df = df[df['Time of Day'] >= 450]
df['Time_sq'] = df['Time of Day'] ** 2
df['Time_sin'] = np.sin(2 * np.pi * df['Time of Day'] / 1440)
df['Time_cos'] = np.cos(2 * np.pi * df['Time of Day'] / 1440)

train_df = df[df['Year'].isin(train_years)]
dev_df = df[df['Year'].isin(dev_years)]
test_df = df[df['Year'].isin(test_years)]

print("Train size:", train_df.shape)
print("Dev size:", dev_df.shape)
print("Test size:", test_df.shape)

Train size: (30844, 26)
Dev size: (5056, 26)
Test size: (7142, 26)


In [5]:
# getting datasets
def get_X_y(dataframe):
    y = dataframe['Wait Time'].values
    X = dataframe.drop(columns=['Wait Time', 'Date', 'Time', 'Year'])
    return X, y

X_train, y_train = get_X_y(train_df)
X_dev, y_dev = get_X_y(dev_df)
X_test, y_test = get_X_y(test_df)

In [6]:
# normalize features (important for MLP)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_dev_scaled = scaler.fit_transform(X_dev)
X_test_scaled = scaler.transform(X_test)

In [7]:
hidden_layers = [(32,), (64,), (64, 32), (128,)]
activations = ['relu', 'tanh']
learning_rates = [0.001, 0.01]

param_grid = list(product(hidden_layers, activations, learning_rates))

best_rmse = float('inf')
best_params = None
best_model = None

In [8]:
print("Training MLPs...\n")
for h, act, lr in param_grid:
    model = MLPRegressor(hidden_layer_sizes=h,
                         activation=act,
                         solver='adam',
                         learning_rate_init=lr,
                         max_iter=300,
                         random_state=42)

    model.fit(X_train_scaled, y_train)
    y_dev_pred = model.predict(X_dev_scaled)
    rmse = np.sqrt(mean_squared_error(y_dev, y_dev_pred))

    print(f"hidden_layer_sizes={h}, activation={act}, lr={lr} → Dev RMSE: {rmse:.2f}")

    if rmse < best_rmse:
        best_rmse = rmse
        best_params = (h, act, lr)
        best_model = model

Training MLPs...





hidden_layer_sizes=(32,), activation=relu, lr=0.001 → Dev RMSE: 16.73
hidden_layer_sizes=(32,), activation=relu, lr=0.01 → Dev RMSE: 16.59




hidden_layer_sizes=(32,), activation=tanh, lr=0.001 → Dev RMSE: 16.68
hidden_layer_sizes=(32,), activation=tanh, lr=0.01 → Dev RMSE: 17.32




hidden_layer_sizes=(64,), activation=relu, lr=0.001 → Dev RMSE: 16.83
hidden_layer_sizes=(64,), activation=relu, lr=0.01 → Dev RMSE: 16.81




hidden_layer_sizes=(64,), activation=tanh, lr=0.001 → Dev RMSE: 16.74
hidden_layer_sizes=(64,), activation=tanh, lr=0.01 → Dev RMSE: 17.03




hidden_layer_sizes=(64, 32), activation=relu, lr=0.001 → Dev RMSE: 17.73
hidden_layer_sizes=(64, 32), activation=relu, lr=0.01 → Dev RMSE: 18.20




hidden_layer_sizes=(64, 32), activation=tanh, lr=0.001 → Dev RMSE: 17.67
hidden_layer_sizes=(64, 32), activation=tanh, lr=0.01 → Dev RMSE: 17.68




hidden_layer_sizes=(128,), activation=relu, lr=0.001 → Dev RMSE: 16.79
hidden_layer_sizes=(128,), activation=relu, lr=0.01 → Dev RMSE: 18.11




hidden_layer_sizes=(128,), activation=tanh, lr=0.001 → Dev RMSE: 17.04
hidden_layer_sizes=(128,), activation=tanh, lr=0.01 → Dev RMSE: 17.88




In [9]:
print("\nBest hyperparameters:")
print(f"hidden_layer_sizes={best_params[0]}, activation={best_params[1]}, lr={best_params[2]}")

X_train_dev = np.vstack([X_train_scaled, X_dev_scaled])
y_train_dev = np.concatenate([y_train, y_dev])

final_model = MLPRegressor(hidden_layer_sizes=best_params[0],
                           activation=best_params[1],
                           solver='adam',
                           learning_rate_init=best_params[2],
                           max_iter=300,
                           random_state=42)

final_model.fit(X_train_dev, y_train_dev)
y_test_pred = final_model.predict(X_test_scaled)
test_rmse = np.sqrt(mean_squared_error(y_test, y_test_pred))
print(f"Test RMSE: {test_rmse:.2f}")


Best hyperparameters:
hidden_layer_sizes=(32,), activation=relu, lr=0.01
Test RMSE: 15.35


In [10]:
# # error distribution
# errors =  - y_test
# plt.figure(figsize=(8, 4))
# plt.hist(errors, bins=30, edgecolor='black')
# plt.title("Distribution of Prediction Errors (MLP)")
# plt.xlabel("Prediction Error (Predicted - Actual)")
# plt.ylabel("Frequency")
# plt.grid(True)
# plt.tight_layout()
# plt.show()