In [1]:
import numpy as np
import pandas as pd
# import pandas_ta as ta
from datetime import datetime as dt

from plotly import graph_objects as go

In [None]:
xau = pd.read_csv("../data/xauusd_1h.csv", index_col="datetime")
xau.size

In [3]:
def pivot_high(highs:pd.Series, window:int=14):
    len = highs.size
    ph = np.zeros(shape=(len, ), dtype=float)
    
    for i in range(window, len-window):
        if highs.iloc[i] == highs.iloc[i-window:i+window].max():
            ph[i] = highs.iloc[i]
    return ph

def pivot_low(lows:pd.Series, window:int=14):
    len = lows.size
    pl = np.zeros(shape=(len, ), dtype=float)
    
    for i in range(window, len-window):
        if lows.iloc[i] == lows.iloc[i-window:i+window].min():
            pl[i] = lows.iloc[i]
    return pl

In [4]:
xau['ph'] = pivot_high(xau['high'], window=5)
xau['pl'] = pivot_low(xau['low'], window=5)

In [None]:
print((xau['ph'] != 0).sum())
print((xau['pl'] != 0).sum())

In [9]:
def plot_data(df):
    fig = go.Figure(
        data=[go.Candlestick(x=df.index,
                            close=df['close'],
                            open=df['open'],
                            low=df['low'],
                            high=df['high']),
              go.Scatter(x=df.loc[df['pl'] != 0].index,
                         y=df['pl'].loc[df['pl'] != 0],
                         mode='markers',
                         fillcolor='magenta',
                         name="Pivot Low"),
              go.Scatter(x=df.loc[df['ph'] != 0].index,
                         y=df['ph'].loc[df['ph'] != 0],
                         mode='markers',
                         fillcolor='green',
                         name="Pivot High")]
    )

    fig.update_layout(title="XAUUSD - H1",
                    xaxis_title="Date Time",
                    yaxis_title="Price",
                    xaxis=dict(type="category"),
                    width=1200,
                    height=800,
                    # frame_bgcolor='black',
                    # paper_bgcolor='black'
                    )

    fig.show()

In [None]:
plot_data(xau.iloc[ph_ind[5]-50:ph_ind[5]+50])

# Machine Learning on the OHLC

## Data Preporcessing

In [None]:
xau.columns

In [None]:
columns = ['open', 'high', 'low', 'close']

xau_hlc = xau[columns]

xau_hlc.shape

In [5]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

seq_length = 20     # 20 bars as a sequence to go into the ML algorithm
scaler = MinMaxScaler()
data_scaled = scaler.fit_transform(xau_hlc)

In [6]:
X, y = [], []
for i in range(seq_length, data_scaled.shape[0]-1):
    X.append(data_scaled[i-seq_length:i])
    y.append(data_scaled[i+1, 1:4])      # we want to predict the high, low, close only. since open is already optained.

X, y = np.array(X), np.array(y)

In [None]:
X.shape, y.shape

In [69]:
## Train Test Split
# Train:        70%
# Cross Val:    15%
# Test:         15%

X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

In [70]:
import torch

torch.set_default_device('cpu')
device = torch.device('cuda')

# Convert to PyTorch tensors
X_train, y_train = torch.tensor(X_train, dtype=torch.float32), torch.tensor(y_train, dtype=torch.float32)
X_val, y_val = torch.tensor(X_val, dtype=torch.float32), torch.tensor(y_val, dtype=torch.float32)
X_test, y_test = torch.tensor(X_test, dtype=torch.float32), torch.tensor(y_test, dtype=torch.float32)


In [None]:
X_train.shape, y_train.shape, X_train.device

In [72]:
## Creating the dataloader class for batch processing

from torch.utils.data import Dataset, DataLoader

# Dataset and DataLoader
class TimeSeriesDataset(Dataset):
    def __init__(self, inputs, targets):
        self.inputs = inputs
        self.targets = targets

    def __len__(self):
        return len(self.inputs)

    def __getitem__(self, idx):
        return self.inputs[idx], self.targets[idx]


In [73]:
## Load the data

train_dataset = TimeSeriesDataset(X_train, y_train)
val_dataset = TimeSeriesDataset(X_val, y_val)
test_dataset = TimeSeriesDataset(X_test, y_test)


batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size)

## Defining the LSTM model

In [74]:
from torch import nn

class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        _, (hn, _) = self.lstm(x)
        x = self.fc(hn[-1])  # Take the output of the last LSTM layer
        return x

## Train the Model

In [78]:
input_size = len(columns)
hidden_size = 64
num_layers = 4
output_size = 3

model = LSTMModel(input_size, hidden_size, num_layers, output_size).to(device)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)



In [None]:
# Training Loop
epochs = 20

train_losses = []
val_losses = []

for epoch in range(epochs):
    model.train()
    train_loss = 0
    for X_batch, y_batch in train_loader:
        optimizer.zero_grad()
        predictions = model(X_batch.to(device))
        loss = criterion(predictions, y_batch.to(device))
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
    train_loss /= len(train_loader)
    train_losses.append(train_loss)
    
    # Validation
    model.eval()
    val_loss = 0
    with torch.no_grad():
        for X_batch, y_batch in val_loader:
            predictions = model(X_batch.to(device))
            loss = criterion(predictions, y_batch.to(device))
            val_loss += loss.item()
    val_loss /= len(val_loader)
    val_losses.append(val_loss)

    print(f"Epoch {epoch+1}/{epochs}, Train Loss: {train_loss:.8f}, Val Loss: {val_loss:.8f}")


In [None]:
length = len(val_losses)

import matplotlib.pyplot as plt

plt.plot(range(length), train_losses, 'go-', label="train")
plt.plot(range(length), val_losses, 'bo-', label="cross val")
plt.legend(loc=0)
plt.grid(alpha=0.5)
plt.show()

## Evaluate the model using the test data

In [None]:
# Evaluation
model.eval()
test_loss = 0
with torch.no_grad():
    for X_batch, y_batch in DataLoader(test_dataset, batch_size=32):
        predictions = model(X_batch.to(device))
        loss = criterion(predictions, y_batch.to(device))
        test_loss += loss.item()
test_loss /= len(test_dataset)
print(f"Test Loss: {test_loss:.8f}")

In [85]:
def inverse_transform_pred(pred:np.ndarray, columns:list):
    placeholder = np.zeros((pred.shape[0], len(columns)))

    # Fill in the predicted values into the appropriate columns
    placeholder[:, 1:4] = pred

    # Inverse transform the scaled data
    predicted_actual = scaler.inverse_transform(placeholder)

    # Extract the actual 'High', 'Low', 'Close' columns
    predicted_actual_values = predicted_actual[:, 1:4]
    return predicted_actual_values

In [None]:
with torch.no_grad():
    sample_predictions = model(X_test[:5].to(device)).cpu().numpy()
print("Sample Predictions:\n", sample_predictions)
print("True values:\n", y_test[:5].numpy())

In [None]:
pred = inverse_transform_pred(sample_predictions, columns)
pred

In [94]:
i = 15

sample_X = X_test[i]
with torch.no_grad():
    sample_y = model(sample_X.to(device)).cpu().numpy()
sample_X = sample_X.numpy()
actual_y = y_test[i].numpy()

sample_X = scaler.inverse_transform(sample_X)
sample_y = inverse_transform_pred(sample_y, columns).reshape(-1, 1)
actual_y = inverse_transform_pred(actual_y, columns).reshape(-1, 1)

In [None]:
from matplotlib import pyplot as plt

length = len(sample_X)

plt.plot(np.arange(length), sample_X[:, -1], 'bo-', label='X data')
plt.plot(length, sample_y[-1], 'go', label="Y pred")
plt.plot(length, actual_y[-1], 'ro', label="Y true")
plt.grid(alpha=.5)
plt.ylabel("Close Price")
plt.legend(loc=0)
plt.show()


In [None]:
length = len(sample_X)

fig = go.Figure(data=[
    go.Candlestick(x=np.arange(length),
                   open=sample_X[:, 0],
                   high=sample_X[:, 1],
                   low=sample_X[:, 2],
                   close=sample_X[:, 3],
                   name="X data"),
    go.Candlestick(x=(length, ),
                   open=(sample_X[-1, 3], ),       # Open of new candle is the close of last candle :)
                   high=(sample_y[0], ),
                   low=(sample_y[1], ),
                   close=(sample_y[2], ),
                   name="Y pred"),
    go.Candlestick(x=(length+1, ),
                   open=(actual_y[-1, 0], ),       # Open of new candle is the close of last candle :)
                   high=(actual_y[0], ),
                   low=(actual_y[1], ),
                   close=(actual_y[2], ),
                   name="Y true")
])

fig.update_layout(
    width=1200,
    height=800,
    title="Sample prediction vs. true value",
    xaxis_title="i_th bar",
    yaxis_title="Price"
)