In [3]:
import pandas as pd
import numpy as np
from sklearn.linear_model import Lasso
from sklearn.metrics import mean_squared_error

In [4]:
# Load dataset
df = pd.read_csv("space_mountain_with_holiday_weather_lag_suite.csv")

# One-hot encode 'Day of Week'
df = pd.get_dummies(df, columns=['Day of Week'], drop_first=True)

# Extract year from date
df['Year'] = df['Date'].apply(lambda x: x.split('/')[-1])

# Filter by time of day (after park opens)
df = df[df['Time of Day'] >= 450]

In [None]:
# Define feature list
top_features = ['Lag1_Wait_Time', 'Lag2_Wait_Time', 'Temperature', 'Month', 'Time of Day', 'Day of Week_Saturday', 'Lag3_Wait_Time', 'RollingMean_Lag3']

# Define year splits
train_years = ['14', '15', '16', '17', '18', '19', '22']
test_years = ['23', '24', '25']

# Split data
train_df = df[df['Year'].isin(train_years)]
test_df = df[df['Year'].isin(test_years)]

X_train = train_df[top_features]
y_train = train_df['Wait Time']
X_test = test_df[top_features]
y_test = test_df['Wait Time']

In [6]:
# Train simplified Lasso model
model = Lasso(alpha=1.0, max_iter=10000)
model.fit(X_train, y_train)

# Predict
y_pred = model.predict(X_test)

# Evaluate
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
print(f"Simplified Lasso RMSE: {rmse:.2f}")

Simplified Lasso RMSE: 17.13
