In [6]:
import torch
import numpy as np
import matplotlib.pyplot as plt
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import StandardScaler
import pandas as pd
import tkinter as tk
from tkinter import filedialog
import pandas_ta as ta
from sklearn.model_selection import TimeSeriesSplit
from torch.nn.utils.rnn import pad_sequence
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
import seaborn as sns

from collections import Counter

# Initialize the Tkinter root window
root = tk.Tk()
root.wm_attributes('-topmost', 1)
root.withdraw()

# Normalize data and assign movement direction values
def prep(dataset):
    scaler = StandardScaler()

    dataset['RSI (14D)'] = ta.rsi(dataset['Close'], length=14)
    dataset['20 Day CCI'] = ta.cci(high=dataset['High'], low=dataset['Low'], 
                                   close=dataset['Close'], length=20)
    dataset['Williams %R'] = ta.willr(high=dataset['High'], low=dataset['Low'], 
                                      close=dataset['Close'], length=14)
    dataset['EMA (5D)'] = dataset['Close'].ewm(span=5, adjust=False).mean()

    features = ['Open', 'High', 'Low', 'Close', 'Volume', 'RSI (14D)', 
                '20 Day CCI', 'Williams %R', 'Mortgage_rate', 'Unemp_rate',
                'disposable_income', 'Personal_consumption_expenditure', 
                'personal_savings']
    
    dataset[features] = dataset[features].astype(float)
    dataset[features] = scaler.fit_transform(dataset[features])

    dataset['MA10'] = dataset['Close'].rolling(window=10).mean()
    dataset['MA50'] = dataset['Close'].rolling(window=50).mean()
    
    dataset['Target'] = np.where(dataset['Close'].shift(-1) > dataset['Close'], 1, 0)
    prepared_data = dataset.dropna()

    return prepared_data, scaler

# Define LSTM Model class
class LSTM_Model(nn.Module):
    def __init__(self, input_layer, hidden_layer, output_layer):
        super(LSTM_Model, self).__init__()
        self.hidden_layer = hidden_layer
        self.lstm1 = nn.LSTM(input_layer, hidden_layer, batch_first=True, bidirectional=False)
        self.relu1 = nn.ReLU()
        self.lstm2 = nn.LSTM(hidden_layer, hidden_layer, batch_first=True, bidirectional=False)
        self.relu2 = nn.ReLU()
        self.linear_layer = nn.Linear(hidden_layer, output_layer)
        self.hidden_cell1 = (torch.zeros(1,1,self.hidden_layer),
                            torch.zeros(1,1,self.hidden_layer))
        self.hidden_cell2 = (torch.zeros(1,1,self.hidden_layer),
                            torch.zeros(1,1,self.hidden_layer))

    def forward(self, input_tensor):
        self.hidden_cell1 = (torch.zeros(1, input_tensor.size(0), self.hidden_layer), 
                            torch.zeros(1, input_tensor.size(0), self.hidden_layer))
        self.hidden_cell2 = (torch.zeros(1, input_tensor.size(0), self.hidden_layer), 
                            torch.zeros(1, input_tensor.size(0), self.hidden_layer))
        
        out, self.hidden_cell1 = self.lstm1(input_tensor, self.hidden_cell1)
        out = self.relu1(out)

        out, self.hidden_cell2 = self.lstm2(out, self.hidden_cell2)
        out = self.relu2(out)

        lstm_out_last = out[:, -1, :]

        output = self.linear_layer(lstm_out_last)

        return output

# create sequences for input data and corresponding labels
def create_sequence(input_data, sequence_length):
  sequences = []
  for i in range(len(input_data) - sequence_length):
    sequence = input_data[i : i + sequence_length, :-1]
    label = input_data[i + sequence_length, -1]
    sequences.append((sequence, label))
  return sequences

# train the model with data provided
def trainer(model, train_data, loss_func, opt, epochs):
  for epoch in range(epochs):
    for sequence, labels, in train_data:
      opt.zero_grad()
      model.hidden_cell = (torch.zeros(1, 1, model.hidden_layer),
                           torch.zeros(1, 1, model.hidden_layer))
      
      sequence = torch.tensor(sequence).float()
      labels = torch.tensor(labels).float().view(-1, 1)

      # Initialize the hidden state at the start of each sequence
      model.hidden_cell = (torch.zeros(1, sequence.size(0), 
                                       model.hidden_layer),
                            torch.zeros(1, sequence.size(0), 
                                        model.hidden_layer))

      y = model(sequence)
      loss = loss_func(y, labels)
      loss.backward()
      opt.step()

    # print progress as the model runs
    if epoch % 25 == 1:
      print(f'Epoch {epoch} loss: {loss.item()}')

# make predictions using trained model
def predictor(model, test_data):
    model.eval()
    with torch.no_grad():
        predictions = []
        for sequence, _ in test_data:
            sequence = torch.tensor(sequence).float()

            # Initialize the hidden state at the start of each sequence
            model.hidden_cell = (torch.zeros(1, sequence.size(0), 
                                             model.hidden_layer),
                                 torch.zeros(1, sequence.size(0), 
                                             model.hidden_layer))
            
            y = model(sequence)
            predictions.append(torch.round(torch.sigmoid(y)).item())
    return predictions

# Load and prepare data
file_path = filedialog.askopenfilename(parent=root, title="Select A File")
ticker = pd.read_csv(file_path)
ticker, scaler = prep(ticker)

# Create sequences
sequence_length = 10
sequences = create_sequence(ticker[['Open', 'High', 'Low', 'Close', 'Volume', 
                                    'RSI (14D)', '20 Day CCI', 'Williams %R', 'Mortgage_rate', 
                                    'Unemp_rate','disposable_income', 'Personal_consumption_expenditure', 
                                    'personal_savings', 'MA10', 'MA50', 'Target']].values, sequence_length)

# split test/train and create dataloader
train_size = int(len(sequences) * 0.8) # set train size
train_sequences = sequences[ : train_size]
test_sequences = sequences[train_size : ]

train_data = torch.utils.data.DataLoader(train_sequences, shuffle=True, batch_size=1)
test_data = torch.utils.data.DataLoader(test_sequences, shuffle=True, batch_size=1)

# Initialize model
model = LSTM_Model(input_layer=15, hidden_layer=50, output_layer=1)
loss_func = nn.BCEWithLogitsLoss()
opt = optim.Adam(model.parameters(), lr=0.001) # weight_decay=0.01

# Train
epochs = 150
trainer(model, train_data, loss_func, opt, epochs)

# run model and predict values
test_labels = [label for _, label in test_sequences]
predictions = predictor(model, test_data)

# calcluate statistics
accuracy = accuracy_score(test_labels, predictions)
cm = confusion_matrix(test_labels, predictions)

print(f'Confusion Matrix:\n{cm}')
print(f'Accuracy: {accuracy}') 






     Unnamed: 0      Open      High       Low     Close     Adj Close  \
49   1991-07-01 -0.793459 -0.794274 -0.792834 -0.793512    481.309998   
50   1991-08-01 -0.785622 -0.786959 -0.784756 -0.786118    504.149994   
51   1991-10-01 -0.777571 -0.779061 -0.776878 -0.778232    528.510010   
52   1991-11-01 -0.772706 -0.774313 -0.772308 -0.774211    540.929993   
53   1992-04-01 -0.754432 -0.755420 -0.753462 -0.754411    602.090027   
..          ...       ...       ...       ...       ...           ...   
279  2021-07-01  3.743657  3.726129  3.762053  3.752103  14522.379880   
280  2021-09-01  4.007581  3.998264  4.043490  4.006884  15309.379880   
281  2021-10-01  3.744058  3.749699  3.724410  3.766452  14566.700200   
282  2021-11-01  4.082774  4.068859  4.098527  4.099648  15595.919920   
283  2021-12-01  4.151082  4.138946  4.024510  3.988972  15254.049810   

       Volume      CPI  Mortgage_rate  Unemp_rate  disposable_income  \
49  -1.135636  136.200       1.192941    0.476384  

  sequence = torch.tensor(sequence).float()
  labels = torch.tensor(labels).float().view(-1, 1)


Epoch 1 loss: 0.9443248510360718
Epoch 26 loss: 0.35145068168640137
Epoch 51 loss: 0.08487936109304428
Epoch 76 loss: 0.2745114862918854
Epoch 101 loss: 0.8494176864624023
Epoch 126 loss: 0.044519905000925064
Confusion Matrix:
[[ 6  7]
 [18 14]]
Accuracy: 0.4444444444444444


  sequence = torch.tensor(sequence).float()
