In [9]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

In [10]:
# Load data
df = pd.read_csv("space_mountain_with_holiday_weather_lag_suite.csv")

# Parse date
df['Date'] = pd.to_datetime(df['Date'], format='%m/%d/%y')
df['Datetime'] = pd.to_datetime(df['Date'].astype(str) + ' ' + df['Time'])
df.sort_values('Datetime', inplace=True)

# One-hot encode Day of Week
df = pd.get_dummies(df, columns=['Day of Week'], drop_first=True)

# Drop unused columns
df.drop(columns=['Date', 'Time', 'Datetime'], inplace=True)

# Scale all features
scaler = StandardScaler()
scaled = scaler.fit_transform(df)
scaled_df = pd.DataFrame(scaled, columns=df.columns)

In [None]:
SEQUENCE_LENGTH = 12
X, y = [], []
features = scaled_df.drop(columns=['Wait Time']).values
wait_values = scaled_df['Wait Time'].values

for i in range(SEQUENCE_LENGTH, len(scaled_df)):
    X.append(features[i-SEQUENCE_LENGTH:i])
    y.append(wait_values[i])

X = np.array(X)
y = np.array(y)

# Train/test split
split_index = int(0.8 * len(X))
X_train, X_test = X[:split_index], X[split_index:]
y_train, y_test = y[:split_index], y[split_index:]

In [12]:
model = Sequential()
model.add(LSTM(64, input_shape=(X_train.shape[1], X_train.shape[2]), return_sequences=False))
model.add(Dropout(0.2))
model.add(Dense(1))

model.compile(optimizer='adam', loss='mse')
model.summary()

# Train
history = model.fit(X_train, y_train, epochs=20, batch_size=32, validation_split=0.1, verbose=1)

  super().__init__(**kwargs)


Epoch 1/20
[1m1085/1085[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - loss: 0.7984 - val_loss: 0.5729
Epoch 2/20
[1m1085/1085[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - loss: 0.7193 - val_loss: 0.5908
Epoch 3/20
[1m1085/1085[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 0.7009 - val_loss: 0.5606
Epoch 4/20
[1m1085/1085[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 0.7222 - val_loss: 0.5907
Epoch 5/20
[1m1085/1085[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 0.6892 - val_loss: 0.5754
Epoch 6/20
[1m1085/1085[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 0.6778 - val_loss: 0.5797
Epoch 7/20
[1m1085/1085[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 0.6752 - val_loss: 0.5918
Epoch 8/20
[1m1085/1085[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 0.6676 - val_loss: 0.5793
Epoch 9/20
[1m1085/1085

In [13]:
y_pred = model.predict(X_test)

# Unscale predictions and actuals to get real RMSE in minutes
wait_scaler = StandardScaler()
wait_scaler.fit(df[['Wait Time']])  # Fit again just on original column

y_pred_unscaled = wait_scaler.inverse_transform(y_pred)
y_test_unscaled = wait_scaler.inverse_transform(y_test.reshape(-1, 1))

real_rmse = np.sqrt(mean_squared_error(y_test_unscaled, y_pred_unscaled))
print(f"Unscaled RMSE (in minutes): {real_rmse:.2f}")


[1m302/302[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 611us/step
Unscaled RMSE (in minutes): 18.44
