In [11]:
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Bidirectional
from tensorflow import keras
import csv
import numpy as np
from collections import deque
from sklearn.model_selection import train_test_split
import pandas as pd
import math
from tensorflow.keras.layers import Input
from sklearn.preprocessing import StandardScaler
from sklearn.utils.class_weight import compute_class_weight
from tensorflow.keras.optimizers import Adam


# The size of the dataset to work with
size = 50000


# The number of time steps in each example
timesteps = 25

# The size of each batch
batch_size = 16

# Number of Epochs
epochs = 100


def sequence_maker(x_data, y_data, timesteps):
    ''' Input: A numpy array of Time-series Data
        Yields: A batch of Training and testing data '''

    sequences = np.array([x_data[i:i+timesteps] for i in range(len(x_data) + 1 - timesteps)])
    labels = np.array(y_data[timesteps-1:])
    return sequences, labels

def get_column_names(name):
    """Returns the column names of a csv file."""
    with open(name) as f:
        reader = csv.reader(f)
        return next(reader)

def get_last_lines(name, n=5000):
    """Returns the last n lines of a file as a numpy array."""
    with open(name) as f:
        lines = deque(csv.reader(f), maxlen=n+1)
    return np.array(lines)[:-1]

def build_generators(name, timesteps):
    """Builds the generators for the neural network."""
    columns = get_column_names(name)
    data = get_last_lines(name, size)
    frame = pd.DataFrame(data, columns=columns)
    frame.pop('Timestamp')
    frame = frame.astype(float)
    features_to_scale = ["SMA_10", "SMA_50", "RSI_14", "Middle_Band", "Upper_Band", "Lower_Band", "TOD"]
    scaler = StandardScaler()
    frame[features_to_scale] = scaler.fit_transform(frame[features_to_scale])
    target = frame.pop('Return_Signal')
    xtr, xte, ytr, yte = train_test_split(frame, target, test_size=0.2, shuffle=False)
    train_shape = xtr.shape
    x_train, y_train = sequence_maker(xtr.to_numpy(), ytr.to_numpy(), timesteps)
    x_test, y_test = sequence_maker(xte.to_numpy(), yte.to_numpy(), timesteps)
    w = compute_class_weight('balanced', classes=np.unique([0.0, 1.0, 2.0]), y=y_train)
    weights = {0: w[0], 1: w[1], 2: w[2]}
    test_shape = xte.shape
    return  x_train, x_test, y_train, y_test, weights, train_shape


x_train, x_test, y_train, y_test, weights, shape = build_generators("drive/MyDrive/KC/XDGUSD_15_with_features.csv", timesteps)
print(weights)
train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train)).batch(batch_size).shuffle(1000).prefetch(tf.data.AUTOTUNE)
test_dataset = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(batch_size).prefetch(tf.data.AUTOTUNE)

{0: 0.6542931028838914, 1: 0.8589231232005501, 2: 3.2532552083333335}


In [2]:
class ClassSpecificAccuracy(tf.keras.metrics.Metric):
    def __init__(self, target_class=2, name="class_accuracy", **kwargs):
        name = f"{name}_{target_class}"
        super().__init__(name=name, **kwargs)
        self.target_class = target_class
        self.correct = self.add_weight(name="correct", initializer="zeros")
        self.total = self.add_weight(name="total", initializer="zeros")

    def update_state(self, y_true, y_pred, sample_weight=None):
        y_true = tf.cast(y_true, tf.int64)
        y_pred_labels = tf.argmax(y_pred, axis=1, output_type=tf.int64)

        mask = tf.equal(y_true, self.target_class)
        correct_preds = tf.reduce_sum(tf.cast(tf.logical_and(mask, tf.equal(y_pred_labels, y_true)), tf.float32))
        total_preds = tf.reduce_sum(tf.cast(mask, tf.float32))

        self.correct.assign_add(correct_preds)
        self.total.assign_add(total_preds)

    def result(self):
        return self.correct / (self.total + tf.keras.backend.epsilon())

    def reset_states(self):
        self.correct.assign(0)
        self.total.assign(0)

In [3]:
class ClassSpecificPrecision(tf.keras.metrics.Metric):
    def __init__(self, target_class=2, name="class_precision", **kwargs):
        name = f"{name}_{target_class}"
        super().__init__(name=name, **kwargs)
        self.target_class = target_class
        self.correct = self.add_weight(name="correct", initializer="zeros")
        self.total_predicted = self.add_weight(name="total_predicted", initializer="zeros")

    def update_state(self, y_true, y_pred, sample_weight=None):
        y_true = tf.cast(y_true, tf.int64)
        y_pred_labels = tf.argmax(y_pred, axis=1, output_type=tf.int64)

        # Mask: Find where predictions are the target class
        mask = tf.equal(y_pred_labels, self.target_class)

        # Correct predictions: Where the true label is also the target class
        correct_preds = tf.reduce_sum(tf.cast(tf.logical_and(mask, tf.equal(y_pred_labels, y_true)), tf.float32))

        # Total predictions of the target class
        total_predicted = tf.reduce_sum(tf.cast(mask, tf.float32))

        # Update metric
        self.correct.assign_add(correct_preds)
        self.total_predicted.assign_add(total_predicted)

    def result(self):
        return self.correct / (self.total_predicted + tf.keras.backend.epsilon())

    def reset_states(self):
        self.correct.assign(0)
        self.total_predicted.assign(0)

In [4]:
try:
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()  # Detect TPU
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)
    strategy = tf.distribute.TPUStrategy(tpu)  # Use TPU Strategy
    print("Running on TPU")
except ValueError:
    strategy = tf.distribute.get_strategy()  # Fallback to CPU/GPU
    print("Running on CPU/GPU")
    print("Running on", strategy)

Running on CPU/GPU
Running on <tensorflow.python.distribute.distribute_lib._DefaultDistributionStrategy object at 0x7800d2b8b790>


In [12]:
#Specify list of layers
layer_list = [
    Input(shape=(timesteps, shape[1])),
    Bidirectional(layers.LSTM(128, return_sequences=True)),
    Bidirectional(layers.LSTM(128, return_sequences=True)),
    Bidirectional(layers.LSTM(128, return_sequences=False)),
    layers.Dense(100, activation='relu'),
    layers.Dense(3, activation='softmax')

]




with strategy.scope():
    model = models.Sequential(layer_list)
    optim = tf.keras.optimizers.RMSprop(learning_rate=0.0005)
    model.compile(optimizer=optim, loss='sparse_categorical_crossentropy', metrics=['accuracy', ClassSpecificAccuracy(target_class=2), ClassSpecificPrecision(target_class=2)])
    model.fit(train_dataset, class_weight=weights, epochs=epochs, validation_data=test_dataset)

test_loss, test_acc, class_2_acc, class_2_prec = model.evaluate(test_dataset)
print('Test accuracy:', test_acc)
print('Class 2 accuracy:', class_2_acc)
print('Class 2 precision:', class_2_prec)

Epoch 1/100
[1m2499/2499[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 16ms/step - accuracy: 0.4028 - class_accuracy_2: 0.3541 - class_precision_2: 0.1392 - loss: 0.9832 - val_accuracy: 0.1742 - val_class_accuracy_2: 0.9306 - val_class_precision_2: 0.1305 - val_loss: 1.2465
Epoch 2/100
[1m2499/2499[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 15ms/step - accuracy: 0.3870 - class_accuracy_2: 0.4461 - class_precision_2: 0.1416 - loss: 0.9639 - val_accuracy: 0.2341 - val_class_accuracy_2: 0.8000 - val_class_precision_2: 0.1452 - val_loss: 1.2233
Epoch 3/100
[1m2499/2499[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 15ms/step - accuracy: 0.3788 - class_accuracy_2: 0.4803 - class_precision_2: 0.1467 - loss: 0.9634 - val_accuracy: 0.3162 - val_class_accuracy_2: 0.5017 - val_class_precision_2: 0.1637 - val_loss: 1.0445
Epoch 4/100
[1m2499/2499[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 16ms/step - accuracy: 0.3929 - class_accuracy_2: 0.4812 - class_

In [6]:
y_pred = model.predict(test_dataset)
y_pred_classes = np.argmax(y_pred, axis=1)  # Convert softmax output to class predictions

# Count occurrences of each predicted class
unique, counts = np.unique(y_pred_classes, return_counts=True)
print("Predicted Class Distribution:", dict(zip(unique, counts)))

[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step
Predicted Class Distribution: {1: 9993}


In [7]:
print(y_pred[:20])
print([i for i in y_pred if max(i)==i[2]])

[[0.35450786 0.35747004 0.28802204]
 [0.35450786 0.35747004 0.28802204]
 [0.35450786 0.35747004 0.28802204]
 [0.35450786 0.35747004 0.28802204]
 [0.35450786 0.35747004 0.28802204]
 [0.35450786 0.35747004 0.28802204]
 [0.35450786 0.35747004 0.28802204]
 [0.35450786 0.35747004 0.28802204]
 [0.35450786 0.35747004 0.28802204]
 [0.35450786 0.35747004 0.28802204]
 [0.35450786 0.35747004 0.28802204]
 [0.35450786 0.35747004 0.28802204]
 [0.35450786 0.35747004 0.28802204]
 [0.35450786 0.35747004 0.28802204]
 [0.35450786 0.35747004 0.28802204]
 [0.35450786 0.35747004 0.28802204]
 [0.35450786 0.35747004 0.28802204]
 [0.35450786 0.35747004 0.28802204]
 [0.35450786 0.35747004 0.28802204]
 [0.35450786 0.35747004 0.28802204]]
[]


In [9]:
from google.colab import runtime

runtime.unassign()