# Training LSTM model 
#### Task: Binary classification of stocks as over-performing or under-performing
#### Training period - 1020 days, Validation period - 160 days after training period

**Load modules**

In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras.metrics import Precision, Recall, AUC
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score, roc_curve, accuracy_score
import matplotlib.pyplot as plt
from sklearn.metrics import matthews_corrcoef
import pickle
import os

2025-02-12 21:41:27.470658: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


**Load training and validation data**

In [2]:
year = 2022
counter = 1
data_dir = "../data"
year_dir = os.path.join(data_dir, str(year))
LSTM_train_val_path = os.path.join(year_dir, f"LSTM_train_val_{counter}.npz")
LSTM_data = np.load(LSTM_train_val_path)
X_train = LSTM_data['X_train']
y_train = LSTM_data['y_train']
X_val = LSTM_data['X_val']
y_val = LSTM_data['y_val']
BATCH_SIZE = 64

In [3]:
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
    except RuntimeError as e:
        print(e)

2025-02-12 21:41:31.584759: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2025-02-12 21:41:31.630620: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2025-02-12 21:41:31.630884: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-

**Model training, monitored by validation accuracy**

In [4]:
def data_generator(X, y):
    for i in range(len(X)):
        yield X[i], y[i]

def create_tf_dataset(X, y, batch_size=BATCH_SIZE, shuffle=True):
    dataset = tf.data.Dataset.from_generator(
        lambda: data_generator(X, y),
        output_signature=(
            tf.TensorSpec(shape=(15, 60), dtype=tf.float32),
            tf.TensorSpec(shape=(), dtype=tf.float32)
        )
    )
    if shuffle:
        dataset = dataset.shuffle(buffer_size=1000)
    dataset = dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE)
    return dataset

train_dataset = create_tf_dataset(X_train, y_train, batch_size=BATCH_SIZE)
val_dataset = create_tf_dataset(X_val, y_val, batch_size=BATCH_SIZE, shuffle=False)

2025-02-12 21:41:32.839172: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2025-02-12 21:41:32.839473: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2025-02-12 21:41:32.839621: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-

In [5]:
model = Sequential([
      LSTM(200, return_sequences=False, activation='tanh', recurrent_activation='sigmoid',
          dropout=0.2, recurrent_dropout=0.2, kernel_initializer='glorot_uniform',
          recurrent_initializer='orthogonal', bias_initializer='zeros',
          input_shape=(15, 60)),
      Dense(30, activation='relu', kernel_initializer='glorot_uniform', bias_initializer='zeros'),
      Dense(1, activation='sigmoid', kernel_initializer='glorot_uniform', bias_initializer='zeros')
  ])
model.compile(
    loss='binary_crossentropy',
    #optimizer='adam',
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-7),
    metrics=[
        'accuracy',
        Precision(name='precision'),
        Recall(name='recall'),
        AUC(name='auc')
    ]
  )
callbacks = [
    EarlyStopping(monitor='val_accuracy', patience=5, restore_best_weights=True),
    ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, min_lr=1e-5),
    ModelCheckpoint(filepath=os.path.join(year_dir, f"LSTM_model_{counter}.keras"), 
    monitor='val_accuracy', save_best_only=True, mode='max')
  ]

  super().__init__(**kwargs)


In [6]:
history = model.fit(
    train_dataset,
    validation_data=val_dataset,
    epochs=10,
    callbacks=callbacks
)

Epoch 1/10
   6188/Unknown [1m168s[0m 26ms/step - accuracy: 0.5187 - auc: 0.5274 - loss: 0.6933 - precision: 0.5216 - recall: 0.5256

2025-02-12 21:44:33.169951: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
2025-02-12 21:44:33.170015: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
	 [[IteratorGetNext/_2]]


[1m6188/6188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m176s[0m 27ms/step - accuracy: 0.5187 - auc: 0.5274 - loss: 0.6933 - precision: 0.5216 - recall: 0.5256 - val_accuracy: 0.5084 - val_auc: 0.5099 - val_loss: 0.6943 - val_precision: 0.5073 - val_recall: 0.5801 - learning_rate: 0.0010
Epoch 2/10


2025-02-12 21:44:41.052939: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
	 [[StatefulPartitionedCall/Shape/_6]]
2025-02-12 21:44:41.052969: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
2025-02-12 21:44:41.052991: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 4378288024112410952
2025-02-12 21:44:41.052999: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 13644471595552837875


[1m6186/6188[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 26ms/step - accuracy: 0.5215 - auc: 0.5316 - loss: 0.6912 - precision: 0.5208 - recall: 0.6191  

2025-02-12 21:47:19.781734: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
2025-02-12 21:47:19.781771: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
	 [[IteratorGetNext/_2]]


[1m6188/6188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m166s[0m 27ms/step - accuracy: 0.5215 - auc: 0.5316 - loss: 0.6912 - precision: 0.5208 - recall: 0.6191 - val_accuracy: 0.5042 - val_auc: 0.5027 - val_loss: 0.6937 - val_precision: 0.5050 - val_recall: 0.4269 - learning_rate: 0.0010
Epoch 3/10
[1m   1/6188[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m14:55[0m 145ms/step - accuracy: 0.5781 - auc: 0.5505 - loss: 0.7025 - precision: 0.8421 - recall: 0.4000

2025-02-12 21:47:27.302180: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
2025-02-12 21:47:27.302211: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
	 [[StatefulPartitionedCall/sequential_1/lstm_1/while/cond/_28/sequential_1/lstm_1/while/Less_1/_11]]
2025-02-12 21:47:27.302229: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 13644471595552837875
2025-02-12 21:47:27.302287: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 4378288024112410952
2025-02-12 21:47:27.302313: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 14327283832689713810


[1m6186/6188[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 27ms/step - accuracy: 0.5249 - auc: 0.5367 - loss: 0.6903 - precision: 0.5250 - recall: 0.5974  

2025-02-12 21:50:13.207524: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
2025-02-12 21:50:13.207557: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
	 [[IteratorGetNext/_2]]


[1m6188/6188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m174s[0m 28ms/step - accuracy: 0.5249 - auc: 0.5367 - loss: 0.6903 - precision: 0.5250 - recall: 0.5974 - val_accuracy: 0.4995 - val_auc: 0.4998 - val_loss: 0.6938 - val_precision: 0.4995 - val_recall: 0.5503 - learning_rate: 0.0010
Epoch 4/10
[1m   1/6188[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m16:07[0m 156ms/step - accuracy: 0.4531 - auc: 0.4570 - loss: 0.7026 - precision: 0.5600 - recall: 0.3684

2025-02-12 21:50:21.223842: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
2025-02-12 21:50:21.223873: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
	 [[IteratorGetNext/_2]]


[1m6186/6188[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 26ms/step - accuracy: 0.5304 - auc: 0.5440 - loss: 0.6890 - precision: 0.5274 - recall: 0.6504  

2025-02-12 21:53:02.100778: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
2025-02-12 21:53:02.100809: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
	 [[IteratorGetNext/_2]]


[1m6188/6188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m169s[0m 27ms/step - accuracy: 0.5304 - auc: 0.5440 - loss: 0.6890 - precision: 0.5274 - recall: 0.6504 - val_accuracy: 0.5013 - val_auc: 0.4997 - val_loss: 0.6943 - val_precision: 0.5009 - val_recall: 0.6996 - learning_rate: 0.0010
Epoch 5/10
[1m   1/6188[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m16:02[0m 156ms/step - accuracy: 0.5312 - auc: 0.5516 - loss: 0.6982 - precision: 0.6562 - recall: 0.5250

2025-02-12 21:53:10.619679: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
2025-02-12 21:53:10.619710: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
	 [[StatefulPartitionedCall/Shape/_6]]
2025-02-12 21:53:10.619730: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 13644471595552837875
2025-02-12 21:53:10.619780: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 4378288024112410952


[1m6186/6188[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 26ms/step - accuracy: 0.5348 - auc: 0.5504 - loss: 0.6879 - precision: 0.5316 - recall: 0.6415  

2025-02-12 21:55:51.370921: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
2025-02-12 21:55:51.370956: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
	 [[IteratorGetNext/_2]]


[1m6188/6188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m169s[0m 27ms/step - accuracy: 0.5348 - auc: 0.5504 - loss: 0.6879 - precision: 0.5316 - recall: 0.6415 - val_accuracy: 0.4996 - val_auc: 0.4983 - val_loss: 0.6945 - val_precision: 0.4997 - val_recall: 0.7214 - learning_rate: 0.0010
Epoch 6/10
[1m   1/6188[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m15:47[0m 153ms/step - accuracy: 0.5312 - auc: 0.6329 - loss: 0.6906 - precision: 0.8077 - recall: 0.4565

2025-02-12 21:55:59.535579: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
	 [[IteratorGetNext/_2]]
2025-02-12 21:55:59.535612: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]


[1m6186/6188[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 27ms/step - accuracy: 0.5380 - auc: 0.5561 - loss: 0.6867 - precision: 0.5346 - recall: 0.6422  

2025-02-12 21:58:44.292297: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
2025-02-12 21:58:44.292337: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
	 [[IteratorGetNext/_2]]


[1m6188/6188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m173s[0m 28ms/step - accuracy: 0.5380 - auc: 0.5562 - loss: 0.6867 - precision: 0.5346 - recall: 0.6422 - val_accuracy: 0.5012 - val_auc: 0.5021 - val_loss: 0.6959 - val_precision: 0.5009 - val_recall: 0.6569 - learning_rate: 5.0000e-04


2025-02-12 21:58:52.473879: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
2025-02-12 21:58:52.473914: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
	 [[IteratorGetNext/_2]]
