In [12]:
import pandas as pd
import numpy as np
from sklearn.metrics import mean_absolute_error
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Input, LSTM, Dense, Bidirectional, Conv1D, MaxPooling1D, Flatten, Concatenate
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from tensorflow.keras.optimizers import Adam
import tensorflow as tf
import tensorflow.keras as keras

In [2]:
X_train = pd.read_csv("../data/X_train_lstm.csv")
X_test = pd.read_csv("../data/X_test_lstm.csv")

Y_train = pd.read_csv("../data/Y_train_lstm.csv")
Y_test = pd.read_csv("../data/Y_test_lstm.csv")

### Set random seeds

In [3]:
np.random.seed(42)
tf.random.set_seed(42)
keras.utils.set_random_seed(42)

### Train LSTM

In [11]:
# Defining callbacks
checkpoint = ModelCheckpoint("../models/lstm_model.keras", monitor='val_loss', save_best_only=True, verbose=1)
early_stopping = EarlyStopping(monitor='val_loss', patience=5, verbose=1)

# Define LSTM model
# Up to 2 layers of LSTM and number of hidden units were hand tuned to determine this as the optimum model
lstm_model = Sequential([
    Input(shape=(X_train.shape[1], 1)),
    LSTM(units=64, activation='relu', recurrent_dropout=0.2),
    Dense(5)
])

# Use MSE for loss because we want to emphasize the "wrongest" guesses the most. MAE is an interpretable metric
lstm_model.compile(optimizer=Adam(learning_rate=1e-3), loss='mse', metrics=['mae'])

# Train model w/ early stopping
# Batch size is the average number of flights per day
history = lstm_model.fit(X_train, Y_train, epochs=50, batch_size=265, validation_split=0.2, callbacks=[checkpoint, early_stopping])


loss, mae = lstm_model.evaluate(X_test, Y_test)
print("Test Mean Absolute Error:", mae)

Y_pred = lstm_model.predict(X_test)

mae_columns = mean_absolute_error(Y_test, Y_pred, multioutput='raw_values')
print("Mean Absolute Error for each column:")
print(mae_columns)

Epoch 1/50
[1m1107/1110[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 10ms/step - loss: 476.4057 - mae: 12.0670
Epoch 1: val_loss improved from inf to 376.68182, saving model to ../models/lstm_model.keras
[1m1110/1110[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 12ms/step - loss: 476.1666 - mae: 12.0632 - val_loss: 376.6818 - val_mae: 10.1995
Epoch 2/50
[1m1109/1110[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 10ms/step - loss: 381.4813 - mae: 10.3305
Epoch 2: val_loss improved from 376.68182 to 373.32498, saving model to ../models/lstm_model.keras
[1m1110/1110[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 11ms/step - loss: 381.4779 - mae: 10.3304 - val_loss: 373.3250 - val_mae: 10.0969
Epoch 3/50
[1m1108/1110[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 11ms/step - loss: 378.4653 - mae: 10.2366
Epoch 3: val_loss improved from 373.32498 to 372.51569, saving model to ../models/lstm_model.keras
[1m1110/1110[0m [32m━━━━━━━━━━━━━━━━━━━━

### Train BiLSTM

In [7]:
# Defining callbacks
checkpoint = ModelCheckpoint("../models/bilstm_model.keras", monitor='val_loss', save_best_only=True, verbose=1)
early_stopping = EarlyStopping(monitor='val_loss', patience=5, verbose=1)

# Define LSTM model
# Up to 2 layers of LSTM and number of hidden units were hand tuned to determine this as the optimum model
bilstm_model = Sequential([
    Input(shape=(X_train.shape[1], 1)),
    Bidirectional(
        LSTM(units=64, activation='relu', recurrent_dropout=0.2)
    ),
    Dense(5)
])

# Use MSE for loss because we want to emphasize the "wrongest" guesses the most. MAE is an interpretable metric
bilstm_model.compile(optimizer=Adam(learning_rate=1e-3), loss='mse', metrics=['mae'])

# Train model w/ early stopping
# Batch size is the average number of flights per day
history = bilstm_model.fit(X_train, Y_train, epochs=50, batch_size=265, validation_split=0.2, callbacks=[checkpoint, early_stopping])


loss, mae = bilstm_model.evaluate(X_test, Y_test)
print("Test Mean Absolute Error:", mae)

Y_pred = bilstm_model.predict(X_test)

mae_columns = mean_absolute_error(Y_test, Y_pred, multioutput='raw_values')
print("Mean Absolute Error for each column:")
print(mae_columns)

Epoch 1/50
[1m1109/1110[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 10ms/step - loss: 994.3939 - mae: 19.4989
Epoch 1: val_loss improved from inf to 380.10315, saving model to ../models/lstm_model.keras
[1m1110/1110[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 12ms/step - loss: 993.5950 - mae: 19.4887 - val_loss: 380.1031 - val_mae: 10.5792
Epoch 2/50
[1m1108/1110[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 10ms/step - loss: 387.6837 - mae: 10.8249
Epoch 2: val_loss improved from 380.10315 to 374.93298, saving model to ../models/lstm_model.keras
[1m1110/1110[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 11ms/step - loss: 387.6754 - mae: 10.8246 - val_loss: 374.9330 - val_mae: 10.2969
Epoch 3/50
[1m1109/1110[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 10ms/step - loss: 382.0375 - mae: 10.5333
Epoch 3: val_loss improved from 374.93298 to 374.36658, saving model to ../models/lstm_model.keras
[1m1110/1110[0m [32m━━━━━━━━━━━━━━━━━━━━

### Train CNN + LSTM Hybrid

In [10]:
checkpoint = ModelCheckpoint("../models/hybrid_model.keras", monitor='val_loss', save_best_only=True, verbose=1)
early_stopping = EarlyStopping(monitor='val_loss', patience=5, verbose=1)

# Define input layer
input_layer = Input(shape=(X_train.shape[1], 1))

# CNN model
conv_layer = Conv1D(filters=32, kernel_size=3, activation='relu')(input_layer)
maxpool_layer = MaxPooling1D(pool_size=2)(conv_layer)
flatten_layer = Flatten()(maxpool_layer)
dense_cnn = Dense(32, activation='relu')(flatten_layer)

# BiLSTM model
lstm_layer = LSTM(64, activation='relu')(input_layer)
# lstm_layer2 = LSTM(32, activation='relu', return_sequences=False)(lstm_layer)
dense_lstm = Dense(32, activation='relu')(lstm_layer)

# Concatenate CNN and BiLSTM outputs
concatenated = Concatenate()([dense_cnn, dense_lstm])

# Output layer
output_layer = Dense(5)(concatenated)

# Create the ensemble model
hybrid_model = Model(inputs=input_layer, outputs=output_layer)

hybrid_model.compile(optimizer='adam', loss='mse', metrics=['mae'])

history = hybrid_model.fit(
    X_train,
    Y_train,
    epochs=50,
    batch_size=512,
    validation_split=0.2,
    callbacks=[checkpoint, early_stopping]
)

loss, mae = hybrid_model.evaluate(X_test, Y_test)
print("Test Mean Absolute Error:", mae)

Y_pred = hybrid_model.predict(X_test)

mae_columns = mean_absolute_error(Y_test, Y_pred, multioutput='raw_values')
print("Mean Absolute Error for each column:")
print(mae_columns)

Epoch 1/50
[1m570/575[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 12ms/step - loss: 928.0755 - mae: 17.2504
Epoch 1: val_loss improved from inf to 405.37177, saving model to ../models/hybrid_model.keras
[1m575/575[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 14ms/step - loss: 923.7798 - mae: 17.2057 - val_loss: 405.3718 - val_mae: 11.0990
Epoch 2/50
[1m570/575[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 12ms/step - loss: 400.2170 - mae: 11.0901
Epoch 2: val_loss improved from 405.37177 to 397.49271, saving model to ../models/hybrid_model.keras
[1m575/575[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 14ms/step - loss: 400.1458 - mae: 11.0878 - val_loss: 397.4927 - val_mae: 10.6789
Epoch 3/50
[1m571/575[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 12ms/step - loss: 385.4915 - mae: 10.5928
Epoch 3: val_loss improved from 397.49271 to 386.89297, saving model to ../models/hybrid_model.keras
[1m575/575[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37