# Sequence Model Research

The scope of this notebook is to assess and train different sequence models given the training data generated.

Training data is generated based on financial time series data labeled with potential profits using a buy-sell system.

The goal is to create a sequence model that can choose favourable stock charts equal to or better than a human can via traditional technical analysis.

## Import Libraries and Data

In [1]:
import numpy as np
import os
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Input
from tensorflow.keras import backend as K
import matplotlib.pyplot as plt

# List available devices
devices = tf.config.list_physical_devices('GPU')
print("GPUs available: ", devices)

# Confirm TensorFlow is using the GPU
if devices:
    print("TensorFlow is using the GPU")
else:
    print("TensorFlow is not using the GPU")

# Define the data directory relative to the script location
data_dir = 'data'

# Define the file paths
sequences_path = os.path.join(data_dir, 'sequences.npy')
labels_path = os.path.join(data_dir, 'labels.npy')
metadata_path = os.path.join(data_dir, 'metadata.npy')

# Load the data
try:
    data_sequences = np.load(sequences_path)
    data_labels = np.load(labels_path)
    data_metadata = np.load(metadata_path)

    # Inspect the shape of the loaded data
    print(f'Sequences shape: {data_sequences.shape}')
    print(f'Labels shape: {data_labels.shape}')
    print(f'Metadata shape: {data_metadata.shape}')

except FileNotFoundError as e:
    print(f"Error loading files: {e}")


2024-06-07 14:45:50.550061: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-06-07 14:45:50.600373: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-06-07 14:45:50.600418: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-06-07 14:45:50.602011: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-06-07 14:45:50.612754: I tensorflow/core/platform/cpu_feature_guar

GPUs available:  [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]
TensorFlow is using the GPU
Sequences shape: (126071, 252, 15)
Labels shape: (126071,)
Metadata shape: (126071, 2)


## Data Preprocessing

In [2]:
# Shuffle the data
indices = np.arange(data_sequences.shape[0])
np.random.shuffle(indices)

data_sequences = data_sequences[indices]
data_labels = data_labels[indices]
data_metadata = data_metadata[indices]

# Define the threshold for considering a buy
threshold = 0.1  # Example threshold value

# Transform labels to binary (1 for buy, 0 for no buy)
binary_labels = (data_labels >= threshold).astype(int)

# Define the proportions for the splits
train_size = 0.7  # 70%
validation_size = 0.15  # 15%
test_size = 0.15  # 15%

# Calculate the number of samples for each set
num_samples = data_sequences.shape[0]
train_end = int(num_samples * train_size)
validation_end = int(num_samples * (train_size + validation_size))

# Split the data
X_train = data_sequences[:train_end]
y_train = binary_labels[:train_end]
profits_train = data_labels[:train_end]

X_val = data_sequences[train_end:validation_end]
y_val = binary_labels[train_end:validation_end]
profits_val = data_labels[train_end:validation_end]

X_test = data_sequences[validation_end:]
y_test = binary_labels[validation_end:]
profits_test = data_labels[validation_end:]

# Inspect the shape of the splits
print(f'Training set shape: {X_train.shape}, {y_train.shape}')
print(f'Validation set shape: {X_val.shape}, {y_val.shape}')
print(f'Test set shape: {X_test.shape}, {y_test.shape}')

Training set shape: (88249, 252, 15), (88249,)
Validation set shape: (18911, 252, 15), (18911,)
Test set shape: (18911, 252, 15), (18911,)


## Model

In [4]:
# Build the LSTM model
model = Sequential()

# Input layer
model.add(Input(shape=(X_train.shape[1], X_train.shape[2])))

# First LSTM layer (return_sequences=True to pass sequences to the next LSTM layer)
model.add(LSTM(100, return_sequences=True))

# Second LSTM layer (final LSTM layer, return_sequences=False to pass a single vector)
model.add(LSTM(50, return_sequences=False))

# Dense layer with 25 units
model.add(Dense(25))

# Output layer with 1 unit for binary classification (sigmoid activation)
model.add(Dense(1, activation='sigmoid'))

# Compile the model using the custom loss function
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Print the model summary
model.summary()

# Train the model with validation data
history = model.fit(X_train, y_train, validation_data=(X_val, y_val), batch_size=32, epochs=20)
"""
# Plot training & validation loss values
plt.figure(figsize=(10, 6))
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Model Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()

# Plot training & validation accuracy values
plt.figure(figsize=(10, 6))
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Model Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

# Predicting and evaluating the model
train_predict = model.predict(X_train)
val_predict = model.predict(X_val)
test_predict = model.predict(X_test)

# Convert probabilities to binary predictions
train_predict = (train_predict > 0.5).astype(int)
val_predict = (val_predict > 0.5).astype(int)
test_predict = (test_predict > 0.5).astype(int)

# Calculate accuracy
train_accuracy = np.mean(train_predict == y_train)
val_accuracy = np.mean(val_predict == y_val)
test_accuracy = np.mean(test_predict == y_test)

print(f'Train Accuracy: {train_accuracy * 100:.2f}%')
print(f'Validation Accuracy: {val_accuracy * 100:.2f}%')
print(f'Test Accuracy: {test_accuracy * 100:.2f}%')"""



KeyboardInterrupt: 