In [1]:
import tensorflow as tf
from tensorflow import keras
import numpy as np
import pandas as pd
import os


# Load data
X_train = pd.read_csv(
    '../data/prepared/X_train_scaled.csv').drop("Date", axis=1).values
y_train = pd.read_csv(
    '../data/prepared/y_train_scaled.csv').drop("Date", axis=1).values
X_test = pd.read_csv(
    '../data/prepared/X_test_scaled.csv').drop("Date", axis=1).values
y_test = pd.read_csv(
    '../data/prepared/y_test_scaled.csv').drop("Date", axis=1).values

# Replace 0s y with NaN
y_train[y_train == 0] = np.nan
y_test[y_test == 0] = np.nan

print(X_train[:5])  # Display first 5 rows of training features



# Define a custom masked MSE loss function for stocks with non trading days

def masked_mse(y_true, y_pred):
    mask = ~tf.math.is_nan(y_true)
    y_true = tf.where(mask, y_true, 0.0)
    y_pred = tf.where(mask, y_pred, 0.0)
    mse = tf.reduce_sum(tf.square(y_true - y_pred) * tf.cast(mask,
                        tf.float32)) / tf.reduce_sum(tf.cast(mask, tf.float32))
    return mse


# Define a simple Keras model


model = keras.Sequential([
    keras.Input(shape=(X_train.shape[1],)),  # Explicit Input layer
    keras.layers.Dense(128, activation='relu'),
    keras.layers.BatchNormalization(),
    keras.layers.Dropout(0.1),
    keras.layers.Dense(128, activation='relu'),
    keras.layers.BatchNormalization(),
    keras.layers.Dense(64, activation='relu'),
    keras.layers.BatchNormalization(),
    keras.layers.Dropout(0.1),
    keras.layers.Dense(32, activation='relu'),
    # Output layer with neurons equal to the number of stocks
    keras.layers.Dense(units=y_train.shape[1],)
])

model.compile(optimizer='adam', loss=masked_mse, metrics=[masked_mse])


# Train the model
model.fit(
    X_train,
    y_train,
    epochs=30,         # Number of times to go through the data
    batch_size=32,     # Number of samples per gradient update
    validation_data=(X_test, y_test)  # Optional: to monitor validation loss
)
# Evaluate the model
test_loss, test_mae = model.evaluate(X_test, y_test)
print(f'Test Loss: {test_loss}, Test MAE: {test_mae}')
# Make predictions
predictions = model.predict(X_test)
# Save the model
model.save('../models/stock_prediction_model.keras')
# Save predictions
predictions_df = pd.DataFrame(
    predictions, columns=['Stock1', 'Stock2', 'Stock3', 'Stock4'])

# Create folder if it doesn't exist
if not os.path.exists('../data/predictions'):
    os.makedirs('../data/predictions')

predictions_df.to_csv('../data/predictions/stock_predictions.csv', index=False)
# Save the model architecture
model_json = model.to_json()

2025-06-18 14:10:45.020986: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-06-18 14:10:45.043784: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-06-18 14:10:45.151525: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1750248645.259223   57344 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1750248645.288809   57344 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1750248645.385999   57344 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linkin

[[7.50000000e-01 9.33012702e-01 9.00968868e-01 8.01937736e-01
  0.00000000e+00 4.96059308e-01 3.57910044e-01 6.70720094e-01
  1.43905588e-01 9.49005278e-01 3.66215398e-01 5.46415785e-01
  0.00000000e+00]
 [7.50000000e-01 9.33012702e-01 1.00000000e+00 3.56895868e-01
  1.24455507e-04 4.96407312e-01 3.58004216e-01 6.70310028e-01
  1.44082406e-01 9.48773754e-01 3.66740662e-01 5.46771230e-01
  1.00000000e+00]
 [7.50000000e-01 9.33012702e-01 7.22520934e-01 5.55111512e-17
  2.48911014e-04 4.96755316e-01 3.58098388e-01 6.69899962e-01
  1.44259224e-01 9.48542229e-01 3.67265925e-01 5.47126675e-01
  1.00000000e+00]
 [7.50000000e-01 9.33012702e-01 2.77479066e-01 0.00000000e+00
  3.73366521e-04 4.97103319e-01 3.58192559e-01 6.69489896e-01
  1.44436042e-01 9.48310705e-01 3.67791188e-01 5.47482120e-01
  1.00000000e+00]
 [7.50000000e-01 9.33012702e-01 0.00000000e+00 3.56895868e-01
  4.97822029e-04 4.97451323e-01 3.58286731e-01 6.69079830e-01
  1.44612860e-01 9.48079180e-01 3.68316452e-01 5.47837565e-0

E0000 00:00:1750248649.673357   57344 cuda_executor.cc:1228] INTERNAL: CUDA Runtime error: Failed call to cudaGetRuntimeVersion: Error loading CUDA libraries. GPU will not be used.: Error loading CUDA libraries. GPU will not be used.
W0000 00:00:1750248649.675291   57344 gpu_device.cc:2341] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...


[1m201/201[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 5ms/step - loss: 0.4932 - masked_mse: 0.4932 - val_loss: 0.0624 - val_masked_mse: 0.0709
Epoch 2/30
[1m201/201[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 0.0598 - masked_mse: 0.0598 - val_loss: 0.0192 - val_masked_mse: 0.0218
Epoch 3/30
[1m201/201[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 0.0346 - masked_mse: 0.0346 - val_loss: 0.0098 - val_masked_mse: 0.0109
Epoch 4/30
[1m201/201[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - loss: 0.0222 - masked_mse: 0.0222 - val_loss: 0.0071 - val_masked_mse: 0.0077
Epoch 5/30
[1m201/201[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 0.0160 - masked_mse: 0.0160 - val_loss: 0.0058 - val_masked_mse: 0.0063
Epoch 6/30
[1m201/201[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 0.0129 - masked_mse: 0.0129 - val_loss: 0.0044 - val_masked_mse: 0.0050
Epoch 7/30
[1m