In [None]:
import tensorflow as tf
from tensorflow import keras
import numpy as np
import pandas as pd
import os


# Load data
X_train = pd.read_csv(
    '../data/prepared/X_train_scaled.csv').drop("Date", axis=1).values
y_train = pd.read_csv(
    '../data/prepared/y_train_scaled.csv').drop("Date", axis=1).values
X_test = pd.read_csv(
    '../data/prepared/X_test_scaled.csv').drop("Date", axis=1).values
y_test = pd.read_csv(
    '../data/prepared/y_test_scaled.csv').drop("Date", axis=1).values

# Replace 0s y with NaN
y_train[y_train == 0] = np.nan
y_test[y_test == 0] = np.nan



# Define a custom masked MSE loss function for stocks with non trading days

def masked_mse(y_true, y_pred):
    mask = ~tf.math.is_nan(y_true)
    y_true = tf.where(mask, y_true, 0.0)
    y_pred = tf.where(mask, y_pred, 0.0)
    mse = tf.reduce_sum(tf.square(y_true - y_pred) * tf.cast(mask,
                        tf.float32)) / tf.reduce_sum(tf.cast(mask, tf.float32))
    return mse


# Define a simple Keras model

print(X_train.shape, y_train.shape)  # Get the shape of the input features


model = keras.Sequential([
    keras.Input(shape=(X_train.shape[1],)),  # Explicit Input layer
    keras.layers.Dense(128, activation='relu'),
    keras.layers.BatchNormalization(),
    keras.layers.Dropout(0.1),
    keras.layers.Dense(64, activation='relu'),
    keras.layers.BatchNormalization(),
    keras.layers.Dropout(0.1),
    keras.layers.Dense(32, activation='relu'),
    # Output layer with neurons equal to the number of stocks
    keras.layers.Dense(units=y_train.shape[1],)
])

model.compile(optimizer='adam', loss=masked_mse, metrics=[masked_mse])


# Train the model
model.fit(
    X_train,
    y_train,
    epochs=30,         # Number of times to go through the data
    batch_size=32,     # Number of samples per gradient update
    validation_data=(X_test, y_test)  # Optional: to monitor validation loss
)
# Evaluate the model
test_loss, test_mae = model.evaluate(X_test, y_test)
print(f'Test Loss: {test_loss}, Test MAE: {test_mae}')
# Make predictions
predictions = model.predict(X_test)
# Save the model
model.save('../models/stock_prediction_model.keras')
# Save predictions
predictions_df = pd.DataFrame(
    predictions, columns=['Stock1', 'Stock2', 'Stock3', 'Stock4'])

# Create folder if it doesn't exist
if not os.path.exists('../data/predictions'):
    os.makedirs('../data/predictions')

predictions_df.to_csv('../data/predictions/stock_predictions.csv', index=False)
# Save the model architecture
model_json = model.to_json()

(6428, 9) (6428, 4)


E0000 00:00:1750087884.592090    4384 cuda_executor.cc:1228] INTERNAL: CUDA Runtime error: Failed call to cudaGetRuntimeVersion: Error loading CUDA libraries. GPU will not be used.: Error loading CUDA libraries. GPU will not be used.
W0000 00:00:1750087884.594503    4384 gpu_device.cc:2341] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...


Epoch 1/30
[1m201/201[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - loss: 0.0471 - masked_mse: 0.0471 - val_loss: 0.0027 - val_masked_mse: 0.0034
Epoch 2/30
[1m201/201[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 0.0027 - masked_mse: 0.0027 - val_loss: 0.0021 - val_masked_mse: 0.0027
Epoch 3/30
[1m201/201[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 0.0023 - masked_mse: 0.0023 - val_loss: 0.0020 - val_masked_mse: 0.0028
Epoch 4/30
[1m201/201[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - loss: 0.0021 - masked_mse: 0.0021 - val_loss: 0.0017 - val_masked_mse: 0.0023
Epoch 5/30
[1m201/201[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - loss: 0.0019 - masked_mse: 0.0019 - val_loss: 0.0016 - val_masked_mse: 0.0021
Epoch 6/30
[1m201/201[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - loss: 0.0019 - masked_mse: 0.0019 - val_loss: 0.0015 - val_masked_mse: 0.0019
Epoc