In [1]:
import pandas as pd
import numpy as np
from sklearn.linear_model import Lasso
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler
from xgboost import XGBRegressor

In [2]:
# Load data
df = pd.read_csv("space_mountain_with_holiday_weather_lag_suite.csv")

# Extract year
df['Year'] = df['Date'].apply(lambda x: x.split('/')[-1])
df = df[df['Time of Day'] >= 450]

# One-hot encode Day of Week
df = pd.get_dummies(df, columns=['Day of Week'], drop_first=True)

# Interaction features
df['Lag1_x_Temp'] = df['Lag1_Wait_Time'] * df['Temperature']
df['Holiday_x_Time'] = df['Is_Holiday'].astype(int) * df['Time of Day']
df['Rain'] = (df['Precipitation'] > 0.1).astype(int)
df['Time_sq'] = df['Time of Day'] ** 2

# Train/test split
train_years = ['14', '15', '16', '17', '18', '19', '22']
test_years = ['23', '24', '25']
train_df = df[df['Year'].isin(train_years)]
test_df = df[df['Year'].isin(test_years)]

# Define features and target
drop_cols = ['Wait Time', 'Date', 'Time', 'Year', 'Month']
X_train = train_df.drop(columns=drop_cols)
y_train = train_df['Wait Time']
X_test = test_df.drop(columns=drop_cols)
y_test = test_df['Wait Time']

In [3]:
# Lasso requires scaled features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Lasso
lasso = Lasso(alpha=1.0, max_iter=10000)
lasso.fit(X_train_scaled, y_train)
lasso_pred = lasso.predict(X_test_scaled)

# XGBoost
xgb = XGBRegressor(n_estimators=100, learning_rate=0.1, max_depth=6, random_state=42)
xgb.fit(X_train, y_train)
xgb_pred = xgb.predict(X_test)

In [4]:
# Blend weight (alpha)
alpha = 0.5

# Ensemble prediction
ensemble_pred = alpha * lasso_pred + (1 - alpha) * xgb_pred

# Evaluate RMSE
rmse = np.sqrt(mean_squared_error(y_test, ensemble_pred))
print(f"Ensemble RMSE (alpha={alpha}): {rmse:.2f}")

Ensemble RMSE (alpha=0.5): 16.61


In [None]:
# Sweep through alpha values and track RMSE
best_alpha = None
best_rmse = float("inf")

print("Alpha Tuning Results:")
for alpha in np.arange(0.0, 1.05, 0.05):
    ensemble_pred = alpha * lasso_pred + (1 - alpha) * xgb_pred
    rmse = np.sqrt(mean_squared_error(y_test, ensemble_pred))
    print(f"  alpha = {alpha:.2f} → RMSE = {rmse:.4f}")
    
    if rmse < best_rmse:
        best_rmse = rmse
        best_alpha = alpha

print(f"\n✅ Best alpha: {best_alpha:.2f} with RMSE = {best_rmse:.4f}")
