In [None]:
import tensorflow as tf
from tensorflow import keras
import numpy as np
import pandas as pd
import os


# Load data
X_train = pd.read_csv('../data/prepared/X_train.csv').drop("Date", axis=1).values
y_train = pd.read_csv('../data/prepared/y_train.csv').drop("Date", axis=1).values
X_test = pd.read_csv('../data/prepared/X_test.csv').drop("Date", axis=1).values
y_test = pd.read_csv('../data/prepared/y_test.csv').drop("Date", axis=1).values

# Replace 0s in y_train with NaN
y_train[y_train == 0] = np.nan


# Define a custom masked MSE loss function for stocks with non trading days

def masked_mse(y_true, y_pred):
    mask = ~tf.math.is_nan(y_true)
    y_true = tf.where(mask, y_true, 0.0)
    y_pred = tf.where(mask, y_pred, 0.0)
    mse = tf.reduce_sum(tf.square(y_true - y_pred) * tf.cast(mask, tf.float32)) / tf.reduce_sum(tf.cast(mask, tf.float32))
    return mse


# Define a simple Keras model


model = keras.Sequential([
    keras.Input(shape=(X_train.shape[1],)),  # Explicit Input layer
    keras.layers.Dense(64, activation='relu'),
    keras.layers.Dense(32, activation='relu'),
    keras.layers.Dense(4)  # Output layer for 4 stocks (one neuron per stock)
])

model.compile(optimizer='adam', loss=masked_mse, metrics=[masked_mse])


# Train the model
model.fit(
    X_train, 
    y_train, 
    epochs=30,         # Number of times to go through the data
    batch_size=32,     # Number of samples per gradient update
    validation_data=(X_test, y_test)  # Optional: to monitor validation loss
)
# Evaluate the model
test_loss, test_mae = model.evaluate(X_test, y_test)
print(f'Test Loss: {test_loss}, Test MAE: {test_mae}')
# Make predictions
predictions = model.predict(X_test)
# Save the model
model.save('../models/stock_prediction_model.keras')
# Save predictions
predictions_df = pd.DataFrame(predictions, columns=['Stock1', 'Stock2', 'Stock3', 'Stock4'])

# Create folder if it doesn't exist
if not os.path.exists('../data/predictions'):
    os.makedirs('../data/predictions')

predictions_df.to_csv('../data/predictions/stock_predictions.csv', index=False)
# Save the model architecture
model_json = model.to_json()

Epoch 1/10
[1m201/201[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 5ms/step - loss: 58859744.0000 - masked_mse: 58859776.0000 - val_loss: 49095496.0000 - val_masked_mse: 55106444.0000
Epoch 2/10
[1m201/201[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - loss: 62122780.0000 - masked_mse: 62122588.0000 - val_loss: 46324096.0000 - val_masked_mse: 52141416.0000
Epoch 3/10
[1m201/201[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - loss: 54535020.0000 - masked_mse: 54534772.0000 - val_loss: 39944708.0000 - val_masked_mse: 45260744.0000
Epoch 4/10
[1m201/201[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - loss: 46763132.0000 - masked_mse: 46762976.0000 - val_loss: 32561094.0000 - val_masked_mse: 37139480.0000
Epoch 5/10
[1m201/201[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 37772480.0000 - masked_mse: 37772372.0000 - val_loss: 27016860.0000 - val_masked_mse: 30841148.0000
Epoch 6/10
[1m201/201[0m [3