In [1]:
# import necessary libraries
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import torch
import torch.nn as nn

import pandas as pd
import numpy as np
from ta.volatility import BollingerBands
from ta.trend import MACD
from ta.momentum import RSIIndicator

from torch.utils.data import Dataset
from torch.utils.data import DataLoader

import matplotlib.pyplot as plt
from matplotlib.pyplot import figure
from sklearn.metrics import mean_squared_error
from torch.utils.data import TensorDataset, DataLoader

In [3]:
df = pd.read_csv('xauusd_M15.csv')

# Calculate the moving averages
df['MA_daily'] = df['close'].rolling(window=50).mean()
df['MA_weekly'] = df['close'].rolling(window=200).mean()

# Calculate the Relative Strength Index (RSI)
rsi_indicator = RSIIndicator(df['close'])
df['RSI'] = rsi_indicator.rsi()

# Calculate Bollinger Bands
bollinger = BollingerBands(df['close'])
df['BB_High'] = bollinger.bollinger_hband()
df['BB_Low'] = bollinger.bollinger_lband()

# Calculate MACD
macd_indicator = MACD(df['close'])
df['MACD'] = macd_indicator.macd()

# Remove nas
data = df.dropna()
data

Unnamed: 0,time,open,high,low,close,tick_volume,MA_daily,MA_weekly,RSI,BB_High,BB_Low,MACD
199,2017-01-20 11:45:00,1199.69,1200.93,1199.69,1200.88,5231,1205.0736,1170.06375,37.676300,1208.416330,1196.929670,-1.652759
200,2017-01-20 12:00:00,1200.88,1201.98,1200.30,1201.88,5876,1205.0126,1170.36195,44.871360,1207.376055,1197.289945,-1.463715
201,2017-01-20 12:15:00,1201.88,1202.46,1201.65,1202.22,4237,1204.9654,1170.65990,47.107203,1206.365793,1197.722207,-1.271800
202,2017-01-20 12:30:00,1202.22,1203.08,1201.96,1201.96,4667,1204.9214,1170.95300,45.584686,1205.654391,1197.989609,-1.127686
203,2017-01-20 12:45:00,1201.97,1201.98,1200.33,1200.41,5189,1204.8548,1171.23695,37.751297,1204.910847,1198.177153,-1.125573
...,...,...,...,...,...,...,...,...,...,...,...,...
149812,2023-06-09 19:30:00,1960.87,1961.09,1960.25,1960.65,650,1962.9322,1956.24090,43.152519,1963.540560,1960.265440,-0.602261
149813,2023-06-09 19:45:00,1960.65,1960.68,1959.68,1960.17,1032,1962.8418,1956.24880,41.397734,1963.391175,1960.090825,-0.668991
149814,2023-06-09 20:00:00,1960.18,1960.64,1960.09,1960.24,282,1962.7570,1956.26135,41.769619,1963.160226,1960.001774,-0.708065
149815,2023-06-09 20:15:00,1960.24,1960.93,1960.23,1960.85,156,1962.7108,1956.27480,45.042555,1962.979569,1959.983431,-0.681947


In [6]:
# device agnostic code.
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [7]:
# Define the sequence length
sequence_length = 50 

# Normalization
scaler = MinMaxScaler()
data_normalized = scaler.fit_transform(data.values[:, 1:]) 

# Convert pandas DataFrame to Tensor
data_normalized = torch.FloatTensor(data_normalized).to(device)

# Creating sequences
def create_inout_sequences(input_data, tw):
    inout_seq = []
    L = len(input_data)
    for i in range(L-tw):
        train_seq = input_data[i:i+tw]
        train_label = input_data[i+tw:i+tw+1, data.columns.get_loc('close') - 1]  # Adjusted for 0-indexing
        inout_seq.append((train_seq ,train_label))
    return inout_seq

train_size = int(len(data_normalized) * 0.7)
train_data = data_normalized[:train_size]
test_data = data_normalized[train_size:]

train_sequence = create_inout_sequences(train_data, sequence_length)
test_sequence = create_inout_sequences(test_data, sequence_length)



In [8]:
# Creating dataloaders
batch_size = 64  # Feel free to adjust this

train_dataloader = DataLoader(train_sequence, batch_size=batch_size, shuffle=False)
test_dataloader = DataLoader(test_sequence, batch_size=batch_size, shuffle=False)


In [23]:
# Create LSTM model
class LSTM(nn.Module):
    def __init__(self, input_size=11, hidden_layer_size=100, output_size=1):
        super().__init__()
        self.hidden_layer_size = hidden_layer_size
        self.lstm = nn.LSTM(input_size, hidden_layer_size, batch_first=True)
        self.linear = nn.Linear(hidden_layer_size, output_size)
        self.hidden_cell = (torch.zeros(1,1,self.hidden_layer_size).to(device),
                            torch.zeros(1,1,self.hidden_layer_size).to(device))
    def init_hidden(self, batch_size):
        
        return (torch.zeros(1, batch_size, self.hidden_layer_size).to(device),
            torch.zeros(1, batch_size, self.hidden_layer_size).to(device))

    def forward(self, input_seq):

        lstm_out, self.hidden_cell = self.lstm(input_seq.view(len(input_seq), 1, -1), self.hidden_cell)
        predictions = self.linear(lstm_out.view(len(input_seq), -1))
        return predictions.view(input_seq.size(0), -1)



# Instantiate the model, define the loss and the optimizer
model = LSTM(input_size = 11).to(device)
loss_function = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [14]:
# device agnostic code.
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [24]:
# Training loop
epochs = 150

for i in range(epochs):
    for seq, labels in train_dataloader:
        optimizer.zero_grad()
        model.hidden_cell = model.init_hidden(seq.size(0))  # initialize with batch size

        y_pred = model(seq.to(device))

        single_loss = loss_function(y_pred, labels.to(device))
        single_loss.backward()
        optimizer.step()

    if i%25 == 1:
        print(f'epoch: {i:3} loss: {single_loss.item():10.8f}')

print(f'epoch: {i:3} loss: {single_loss.item():10.10f}')

RuntimeError: input.size(-1) must be equal to input_size. Expected 11, got 550