In [120]:
import torch
import numpy as np
import matplotlib.pyplot as plt
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import StandardScaler
import pandas as pd
import tkinter as tk
from tkinter import filedialog
import pandas_ta as ta
from sklearn.model_selection import TimeSeriesSplit
from torch.nn.utils.rnn import pad_sequence
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
import seaborn as sns

root = tk.Tk()
root.wm_attributes('-topmost', 1)
root.withdraw()

# model, data prep, and model run

# normalize data and assign movement direction values
def prep(dataset):
    scaler = StandardScaler()

    dataset['RSI (14D)'] = ta.rsi(dataset['Close'], length=14)
    dataset['20 Day CCI'] = ta.cci(high=dataset['High'], low=dataset['Low'], 
                                    close=dataset['Close'], length=20)
    dataset['Williams %R'] = ta.willr(high=dataset['High'], low=dataset['Low'], 
                                        close=dataset['Close'], length=14)
    dataset['EMA (5D)'] = dataset['Close'].ewm(span=5, adjust=False).mean()

    features = ['Open', 'High', 'Low', 'Close', 'Volume', 'RSI (14D)', 
                '20 Day CCI', 'Williams %R', 'Mortgage_rate', 'Unemp_rate',
                'disposable_income', 'Personal_consumption_expenditure', 
                'personal_savings']
    
    dataset[features] = dataset[features].astype(float)
    dataset[features] = scaler.fit_transform(dataset[features])

    dataset['MA50'] = dataset['Close'].rolling(window=50).mean()

    dataset['Target'] = np.where(dataset['Close'].shift(-1) > dataset['Close'], 1, 0)
    prepared_data = dataset.dropna().tail(503)

    return prepared_data, scaler

# create LSTM model class
class LSTM_Model(nn.Module):
    def __init__(self, input_layer, hidden_layer, output_layer):
        super(LSTM_Model, self).__init__()
        self.hidden_layer = hidden_layer
        self.lstm = nn.LSTM(input_layer, hidden_layer, batch_first=True)
        self.linear_layer = nn.Linear(hidden_layer, output_layer)
        self.hidden_cell = (torch.zeros(1, 1, self.hidden_layer),
                            torch.zeros(1, 1, self.hidden_layer))

    # Define the forward pass of the LSTM_Model
    def forward(self, input_tensor):
        self.hidden_cell = (torch.zeros(1, input_tensor.size(0), self.hidden_layer), 
                            torch.zeros(1, input_tensor.size(0), self.hidden_layer))
        
        # Pass the input through the LSTM layer
        out, self.hidden_cell = self.lstm(input_tensor, self.hidden_cell)
        
        # Get the output of the last time step
        lstm_out_last = out[:, -1, :]
        
        # Pass the output through the linear layer
        linear_out = self.linear_layer(lstm_out_last)
        
        return linear_out

# create sequences for input data and corresponding labels
def create_sequence(input_data, sequence_length):
  sequences = []
  for i in range(len(input_data) - sequence_length):
    sequence = input_data[i : i + sequence_length, :-1]
    label = input_data[i + sequence_length, -1]
    sequences.append((sequence, label))
  return sequences

# train the model with data provided
def trainer(model, train_data, loss_func, opt, epochs):
  for epoch in range(epochs):
    for sequence, labels, in train_data:
      opt.zero_grad()
      model.hidden_cell = (torch.zeros(1, 1, model.hidden_layer),
                           torch.zeros(1, 1, model.hidden_layer))
      
      sequence = torch.tensor(sequence).float()
      labels = torch.tensor(labels).float().view(-1, 1)

      # Initialize the hidden state at the start of each sequence
      model.hidden_cell = (torch.zeros(1, sequence.size(0), 
                                       model.hidden_layer),
                            torch.zeros(1, sequence.size(0), 
                                        model.hidden_layer))

      y = model(sequence)
      loss = loss_func(y, labels)
      loss.backward()
      opt.step()

    # print progress as the model runs
    if epoch % 25 == 1:
      print(f'Epoch {epoch} loss: {loss.item():.4f}')

# make predictions using trained model
def predictor(model, test_data):
    model.eval()
    with torch.no_grad():
        predictions = []
        for sequence, _ in test_data:
            sequence = torch.tensor(sequence).float()

            # Initialize the hidden state at the start of each sequence
            model.hidden_cell = (torch.zeros(1, sequence.size(0), 
                                             model.hidden_layer),
                                 torch.zeros(1, sequence.size(0), 
                                             model.hidden_layer))
            
            y = model(sequence)

            batch_predictions = torch.round(torch.sigmoid(y))

            predictions.extend(batch_predictions.squeeze().tolist())

    return predictions

# load and prep data

# get dataset
file_path = filedialog.askopenfilename(parent=root, title="Select A File")
ticker = pd.read_csv(file_path)
ticker, scaler = prep(ticker)

# create sequences
sequence_length = 10
sequences = create_sequence(ticker[['Close', 'RSI (14D)', 
                '20 Day CCI', 'Williams %R', 'Mortgage_rate',
                'disposable_income', 'Personal_consumption_expenditure', 
                'personal_savings', 'MA50', 'Target']].values, sequence_length)

# cross-validation
tscv = TimeSeriesSplit(n_splits=5)
accuracies = []
precisions = []
recalls = []
f1s = []

for fold, (train_index, test_index) in enumerate(tscv.split(sequences)):
    print(f'Fold {fold+1}')
    
    train_sequences = [sequences[i] for i in train_index]
    test_sequences = [sequences[i] for i in test_index]
    
    train_data = torch.utils.data.DataLoader(train_sequences, shuffle=True, batch_size=8)
    test_data = torch.utils.data.DataLoader(test_sequences, shuffle=True, batch_size=8)
    
    # initialize model
    model = LSTM_Model(input_layer=9, hidden_layer=50, output_layer=1)
    loss_func = nn.BCEWithLogitsLoss()
    opt = optim.Adam(model.parameters(), lr=0.001, weight_decay=0.001)
    
    # train model
    epochs = 150
    trainer(model, train_data, loss_func, opt, epochs)
    
    # make predictions
    test_labels = [label for _, label in test_sequences]
    predictions = predictor(model, test_data)
    
    # calculate statistics
    accuracy = accuracy_score(test_labels, predictions)
    precision = precision_score(test_labels, predictions)
    recall = recall_score(test_labels, predictions)
    f1 = f1_score(test_labels, predictions)
    
    accuracies.append(accuracy)
    precisions.append(precision)
    recalls.append(recall)
    f1s.append(f1)
    
    print(f'Accuracy: {accuracy:.4f}')
    print(f'Precision: {precision:.4f}')
    print(f'Recall: {recall:.4f}')
    print(f'F1: {f1:.4f}')

# average scores across all folds
avg_accuracy = np.mean(accuracies)
avg_precision = np.mean(precisions)
avg_recall = np.mean(recalls)
avg_f1 = np.mean(f1s)

print(f'\nCross-Validation Results:')
print(f'Average Accuracy: {avg_accuracy:.4f}')
print(f'Average Precision: {avg_precision:.4f}')
print(f'Average Recall: {avg_recall:.4f}')
print(f'Average F1: {avg_f1:.4f}')


Fold 1
Epoch 1 loss: 0.7360


  sequence = torch.tensor(sequence).float()
  labels = torch.tensor(labels).float().view(-1, 1)


Epoch 26 loss: 0.7470
Epoch 51 loss: 0.8644
Epoch 76 loss: 0.6799
Epoch 101 loss: 0.6924
Epoch 126 loss: 0.3917


  sequence = torch.tensor(sequence).float()
  sequence = torch.tensor(sequence).float()
  labels = torch.tensor(labels).float().view(-1, 1)


Accuracy: 0.5976
Precision: 0.6667
Recall: 0.8000
F1: 0.7273
Fold 2
Epoch 1 loss: 0.5720
Epoch 26 loss: 0.7235
Epoch 51 loss: 0.6793
Epoch 76 loss: 0.6911
Epoch 101 loss: 0.7969
Epoch 126 loss: 0.3670


  sequence = torch.tensor(sequence).float()
  sequence = torch.tensor(sequence).float()
  labels = torch.tensor(labels).float().view(-1, 1)


Accuracy: 0.5000
Precision: 0.5217
Recall: 0.8182
F1: 0.6372
Fold 3
Epoch 1 loss: 0.6668
Epoch 26 loss: 0.6550
Epoch 51 loss: 0.7543
Epoch 76 loss: 0.5700
Epoch 101 loss: 0.6965
Epoch 126 loss: 0.6770


  sequence = torch.tensor(sequence).float()
  sequence = torch.tensor(sequence).float()
  labels = torch.tensor(labels).float().view(-1, 1)


Accuracy: 0.4146
Precision: 0.4889
Recall: 0.4681
F1: 0.4783
Fold 4
Epoch 1 loss: 0.7871
Epoch 26 loss: 0.4415
Epoch 51 loss: 0.9950
Epoch 76 loss: 0.5003
Epoch 101 loss: 0.4426
Epoch 126 loss: 0.4223


  sequence = torch.tensor(sequence).float()
  sequence = torch.tensor(sequence).float()
  labels = torch.tensor(labels).float().view(-1, 1)


Accuracy: 0.5854
Precision: 0.7778
Recall: 0.4286
F1: 0.5526
Fold 5
Epoch 1 loss: 0.6553
Epoch 26 loss: 0.6450
Epoch 51 loss: 0.5254
Epoch 76 loss: 0.5874
Epoch 101 loss: 0.7384
Epoch 126 loss: 0.5452
Accuracy: 0.5000
Precision: 0.5342
Recall: 0.8478
F1: 0.6555

Cross-Validation Results:
Average Accuracy: 0.5195
Average Precision: 0.5979
Average Recall: 0.6725
Average F1: 0.6102


  sequence = torch.tensor(sequence).float()


In [121]:
fall = (ticker.Target == 0).sum()
rise = (ticker.Target == 1).sum()

print(f'Number of days rising: {rise}')
print(f'Number of days falling: {fall}')

print(f'Rise % is: {(rise / (rise + fall)) * 100:.2f}%')
print(f'Fall % is: {(fall / (rise + fall)) * 100:.2f}%')

Number of days rising: 291
Number of days falling: 212
Rise % is: 57.85%
Fall % is: 42.15%
