In [7]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from xgboost import XGBRegressor

In [8]:
# Load CSV
df = pd.read_csv("space_mountain_with_holiday_weather_lag_suite.csv")

# Extract year
df['Year'] = df['Date'].apply(lambda x: x.split('/')[-1])

# Filter time range
df = df[df['Time of Day'] >= 450]

# One-hot encode Day of Week
df = pd.get_dummies(df, columns=['Day of Week'], drop_first=True)

# Feature interactions
df['Lag1_x_Temp'] = df['Lag1_Wait_Time'] * df['Temperature']
df['Holiday_x_Time'] = df['Is_Holiday'].astype(int) * df['Time of Day']
df['Rain'] = (df['Precipitation'] > 0.1).astype(int)
df['Time_sq'] = df['Time of Day'] ** 2

# Define train and test years
train_years = ['14', '15', '16', '17', '18', '19', '22']
test_years = ['23', '24', '25']

# Split data
train_df = df[df['Year'].isin(train_years)]
test_df = df[df['Year'].isin(test_years)]

# Define features and target
drop_cols = ['Wait Time', 'Date', 'Time', 'Year', 'Month']
X_train = train_df.drop(columns=drop_cols)
y_train = train_df['Wait Time']
X_test = test_df.drop(columns=drop_cols)
y_test = test_df['Wait Time']

In [9]:
model = XGBRegressor(n_estimators=100, learning_rate=0.1, max_depth=6, random_state=42)
model.fit(X_train, y_train)

# Predict and evaluate
y_pred = model.predict(X_test)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
print(f"XGBoost RMSE with interactions: {rmse:.2f}")

XGBoost RMSE with interactions: 16.82
