In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error
from sklearn.neural_network import MLPRegressor
from sklearn.preprocessing import StandardScaler
from itertools import product
import csv
from time import sleep

In [2]:
# read input file
filepath = "space_mountain_with_holiday_weather_lag_new.csv"
df = pd.read_csv(filepath)

In [3]:
# one hot encode the Day of Week column
df = pd.get_dummies(df, columns=['Day of Week']) 

In [4]:
# ------ SPLITTING DATASET ------
train_years = ['14', '15', '16', '17', '18', '19', '22']
dev_years = ['23']
test_years = ['24', '25']

df['Year'] = df['Date'].apply(lambda x: x.split('/')[-1])  # Extract year as a string
df = df[df['Time of Day'] >= 450]
df['Time_sq'] = df['Time of Day'] ** 2
df['Time_sin'] = np.sin(2 * np.pi * df['Time of Day'] / 1440)
df['Time_cos'] = np.cos(2 * np.pi * df['Time of Day'] / 1440)

train_df = df[df['Year'].isin(train_years)]
dev_df = df[df['Year'].isin(dev_years)]
test_df = df[df['Year'].isin(test_years)]

print("Train size:", train_df.shape)
print("Dev size:", dev_df.shape)
print("Test size:", test_df.shape)

Train size: (30844, 26)
Dev size: (5056, 26)
Test size: (7142, 26)


In [5]:
# getting datasets
def get_X_y(dataframe):
    y = dataframe['Wait Time'].values
    X = dataframe.drop(columns=['Wait Time', 'Date', 'Time', 'Year'])
    return X, y

X_train, y_train = get_X_y(train_df)
X_dev, y_dev = get_X_y(dev_df)
X_test, y_test = get_X_y(test_df)

In [6]:
# normalize features (important for MLP)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_dev_scaled = scaler.fit_transform(X_dev)
X_test_scaled = scaler.transform(X_test)

In [7]:
hidden_layers = [
    (32,), (64,), (128,), (256,),
    (64, 32), (128, 64), (128, 128),
    (256, 128), (256, 256)
]

activations = ['relu', 'tanh']

learning_rates = [1e-4, 5e-4, 1e-3, 5e-3, 1e-2]

alphas = [0.0001, 0.001, 0.01, 0.1]

batch_sizes = [32, 64, 128]

param_grid = list(product(hidden_layers, activations, learning_rates, alphas, batch_sizes))

best_rmse = float('inf')
best_params = None
best_model = None

In [8]:
csv_filename = "mlp_grid_results_live.csv"

# Create file and header once
with open(csv_filename, mode='w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(["hidden_layers", "activation", "learning_rate", "alpha", "batch_size", "dev_rmse", "n_iter"])

# Now iterate and append after each model
for params in param_grid:
    h, act, lr, al, bs = params
    model = MLPRegressor(hidden_layer_sizes=h,
                         activation=act,
                         solver='adam',
                         learning_rate_init=lr,
                         alpha=al,
                         batch_size=bs,
                         max_iter=2000,
                         random_state=42,
                         early_stopping=True,
                         n_iter_no_change=10,
                         validation_fraction=0.1)

    model.fit(X_train_scaled, y_train)
    y_dev_pred = model.predict(X_dev_scaled)
    rmse = np.sqrt(mean_squared_error(y_dev, y_dev_pred))
    n_iter = model.n_iter_

    # Log result immediately
    with open(csv_filename, mode='a', newline='') as file:
        writer = csv.writer(file)
        writer.writerow([h, act, lr, al, bs, rmse, n_iter])

    print(f"→ {params} → RMSE: {rmse:.2f}")


→ ((32,), 'relu', 0.0001, 0.0001, 32) → RMSE: 16.67
→ ((32,), 'relu', 0.0001, 0.0001, 64) → RMSE: 16.68
→ ((32,), 'relu', 0.0001, 0.0001, 128) → RMSE: 16.69
→ ((32,), 'relu', 0.0001, 0.001, 32) → RMSE: 16.67
→ ((32,), 'relu', 0.0001, 0.001, 64) → RMSE: 16.68
→ ((32,), 'relu', 0.0001, 0.001, 128) → RMSE: 16.69
→ ((32,), 'relu', 0.0001, 0.01, 32) → RMSE: 16.67
→ ((32,), 'relu', 0.0001, 0.01, 64) → RMSE: 16.68
→ ((32,), 'relu', 0.0001, 0.01, 128) → RMSE: 16.69
→ ((32,), 'relu', 0.0001, 0.1, 32) → RMSE: 16.67
→ ((32,), 'relu', 0.0001, 0.1, 64) → RMSE: 16.68
→ ((32,), 'relu', 0.0001, 0.1, 128) → RMSE: 16.69
→ ((32,), 'relu', 0.0005, 0.0001, 32) → RMSE: 16.77
→ ((32,), 'relu', 0.0005, 0.0001, 64) → RMSE: 16.73
→ ((32,), 'relu', 0.0005, 0.0001, 128) → RMSE: 16.63
→ ((32,), 'relu', 0.0005, 0.001, 32) → RMSE: 16.77
→ ((32,), 'relu', 0.0005, 0.001, 64) → RMSE: 16.73
→ ((32,), 'relu', 0.0005, 0.001, 128) → RMSE: 16.63
→ ((32,), 'relu', 0.0005, 0.01, 32) → RMSE: 16.77
→ ((32,), 'relu', 0.0005, 0.0

In [9]:
print("\nBest hyperparameters:")
print(f"hidden_layer_sizes={best_params[0]}, activation={best_params[1]}, lr={best_params[2]}")

X_train_dev = np.vstack([X_train_scaled, X_dev_scaled])
y_train_dev = np.concatenate([y_train, y_dev])

final_model = MLPRegressor(hidden_layer_sizes=best_params[0],
                           activation=best_params[1],
                           solver='adam',
                           learning_rate_init=best_params[2],
                           max_iter=300,
                           random_state=42)

final_model.fit(X_train_dev, y_train_dev)
y_test_pred = final_model.predict(X_test_scaled)
test_rmse = np.sqrt(mean_squared_error(y_test, y_test_pred))
print(f"Test RMSE: {test_rmse:.2f}")


Best hyperparameters:


TypeError: 'NoneType' object is not subscriptable

In [None]:
# # error distribution
# errors =  - y_test
# plt.figure(figsize=(8, 4))
# plt.hist(errors, bins=30, edgecolor='black')
# plt.title("Distribution of Prediction Errors (MLP)")
# plt.xlabel("Prediction Error (Predicted - Actual)")
# plt.ylabel("Frequency")
# plt.grid(True)
# plt.tight_layout()
# plt.show()