In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, Dense, Dropout, Flatten, MaxPooling1D

In [2]:
df = pd.read_csv("space_mountain_with_holiday_weather_lag_suite.csv")

# Parse date
df['Date'] = pd.to_datetime(df['Date'], format='%m/%d/%y')
df['Datetime'] = pd.to_datetime(df['Date'].astype(str) + ' ' + df['Time'])
df.sort_values('Datetime', inplace=True)

# One-hot encode Day of Week
df = pd.get_dummies(df, columns=['Day of Week'], drop_first=True)

# Calculate residual target
df['Residual'] = df['Wait Time'] - df['Lag1_Wait_Time']

# Drop unused columns
df.drop(columns=['Date', 'Time', 'Datetime', 'Wait Time'], inplace=True)

# Store unscaled Lag1 for reconstruction
lag1_unscaled = df['Lag1_Wait_Time'].values.copy().reshape(-1, 1)

# Standardize all features and target
scaler = StandardScaler()
scaled = scaler.fit_transform(df)
scaled_df = pd.DataFrame(scaled, columns=df.columns)

In [3]:
SEQUENCE_LENGTH = 12
X, y, lag1_ref = [], [], []
features = scaled_df.drop(columns=['Residual']).values
target = scaled_df['Residual'].values

for i in range(SEQUENCE_LENGTH, len(df)):
    X.append(features[i-SEQUENCE_LENGTH:i])
    y.append(target[i])
    lag1_ref.append(lag1_unscaled[i])

X = np.array(X)
y = np.array(y)
lag1_ref = np.array(lag1_ref)

In [4]:
split_index = int(0.8 * len(X))
X_train, X_test = X[:split_index], X[split_index:]
y_train, y_test = y[:split_index], y[split_index:]
lag1_test = lag1_ref[split_index:]

model = Sequential()
model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(MaxPooling1D(pool_size=2))
model.add(Dropout(0.3))
model.add(Flatten())
model.add(Dense(64, activation='relu'))
model.add(Dense(1))

model.compile(optimizer='adam', loss='mse')
model.summary()

model.fit(X_train, y_train, epochs=20, batch_size=32, validation_split=0.1, verbose=1)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/20
[1m1085/1085[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - loss: 1.0799 - val_loss: 0.7710
Epoch 2/20
[1m1085/1085[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 955us/step - loss: 0.9788 - val_loss: 0.7561
Epoch 3/20
[1m1085/1085[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 858us/step - loss: 0.9756 - val_loss: 0.7700
Epoch 4/20
[1m1085/1085[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 838us/step - loss: 0.9551 - val_loss: 0.7503
Epoch 5/20
[1m1085/1085[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 880us/step - loss: 0.9412 - val_loss: 0.7543
Epoch 6/20
[1m1085/1085[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 852us/step - loss: 0.9458 - val_loss: 0.7679
Epoch 7/20
[1m1085/1085[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 875us/step - loss: 0.9087 - val_loss: 0.7565
Epoch 8/20
[1m1085/1085[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 845us/step - loss: 0.9452 - val_loss: 0.8075
Epoch 9/20

<keras.src.callbacks.history.History at 0x1787aecc0>

In [None]:
# Predict residuals
resid_pred = model.predict(X_test)

# Reconstruct predicted wait times
predicted_wait = resid_pred.flatten() + lag1_test.flatten()

# Load true unscaled wait times (from previous runs)
true_wait = predicted_wait.copy()  # placeholder (to be replaced with actual)
# Replace above line with loading of actual test 'Wait Time' values if available

# Load original (unscaled) Wait Time column
df_full = pd.read_csv("space_mountain_with_holiday_weather_lag_suite.csv")
df_full['Date'] = pd.to_datetime(df_full['Date'], format='%m/%d/%y')
df_full['Datetime'] = pd.to_datetime(df_full['Date'].astype(str) + ' ' + df_full['Time'])
df_full.sort_values('Datetime', inplace=True)
wait_true = df_full['Wait Time'].values[SEQUENCE_LENGTH:]

# Split into train/test aligned with model split
wait_true_test = wait_true[split_index:]

# Compute RMSE on reconstructed predictions
rmse = np.sqrt(mean_squared_error(wait_true_test, predicted_wait))
print(f"Residual model RMSE (in minutes): {rmse:.2f}")

[1m302/302[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 343us/step


FileNotFoundError: [Errno 2] No such file or directory: 'space_mountain_with_lag_suite.csv'