In [2]:
import torch
import numpy as np
import matplotlib.pyplot as plt
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
import pandas as pd
import tkinter as tk
from tkinter import filedialog
import pandas_ta as ta
from sklearn.model_selection import TimeSeriesSplit
from torch.nn.utils.rnn import pad_sequence

# Initialize the Tkinter root window
root = tk.Tk()
root.wm_attributes('-topmost', 1)
root.withdraw()

# Normalize data and assign movement direction values
def prep(dataset):
    scaler = StandardScaler()

    dataset['RSI (14D)'] = ta.rsi(dataset['Close'], length=14)
    dataset['20 Day CCI'] = ta.cci(high=dataset['High'], low=dataset['Low'], 
                                   close=dataset['Close'], length=20)
    dataset['Williams %R'] = ta.willr(high=dataset['High'], low=dataset['Low'], 
                                      close=dataset['Close'], length=14)
    dataset['EMA (5D)'] = dataset['Close'].ewm(span=5, adjust=False).mean()

    features = ['Open', 'High', 'Low', 'Close', 'Volume', 'RSI (14D)', 
                '20 Day CCI', 'Williams %R', 'Mortgage_rate', 'Unemp_rate',
                'disposable_income', 'Personal_consumption_expenditure', 'personal_savings']
    
    dataset[features] = dataset[features].astype(float)
    dataset[features] = scaler.fit_transform(dataset[features])

    dataset['MA10'] = dataset['Close'].rolling(window=10).mean()
    dataset['MA50'] = dataset['Close'].rolling(window=50).mean()
    
    dataset['Target'] = np.where(dataset['Close'].shift(-1) > dataset['Close'], 1, 0)
    prepared_data = dataset.dropna().tail(503)

    return prepared_data, scaler

# Define LSTM Model class
class LSTM_Model(nn.Module):
    def __init__(self, input_layer, hidden_layer, output_layer):
        super(LSTM_Model, self).__init__()
        self.hidden_layer = hidden_layer
        #self.dropout_rate = dropout_rate
        self.lstm = nn.LSTM(input_layer, hidden_layer, batch_first=True, bidirectional=False)
        self.linear_layer = nn.Linear(hidden_layer, output_layer)
        self.hidden_cell = (torch.zeros(1,1,self.hidden_layer).cuda(),
                            torch.zeros(1,1,self.hidden_layer).cuda())

    def forward(self, input_tensor):
        self.hidden_cell = (torch.zeros(1, input_tensor.size(0), self.hidden_layer).cuda(), 
                            torch.zeros(1, input_tensor.size(0), self.hidden_layer).cuda())
        out, self.hidden_cell = self.lstm(input_tensor, self.hidden_cell)
        lstm_out_last = out[:, -1, :]
        predicted_values = self.linear_layer(lstm_out_last)
        return predicted_values

# Create sequences for input data and corresponding labels
def create_sequence(input_data, sequence_length):
    sequences = []
    labels = []
    for i in range(len(input_data) - sequence_length):
        sequence = input_data[i : i + sequence_length, :-1]
        label = input_data[i + sequence_length, -1]
        sequences.append(sequence)
        labels.append(label)

    sequences = np.array(sequences)
    labels = np.array(labels)

    return sequences, labels

# Convert sequences and labels to PyTorch tensors
def convert_to_tensor(sequences, labels):
    # Convert sequences and labels to tensors
    sequences = [torch.tensor(seq, dtype=torch.float32) for seq in sequences]
    labels = torch.tensor(labels, dtype=torch.float32).view(-1, 1)
    
    # Pad sequences
    sequences = pad_sequence(sequences, batch_first=True)
    
    return sequences, labels

# Train the model with data provided
def trainer(model, train_data, val_data, loss_func, opt, epochs, device='cuda'):
    train_errors, val_errors = [], []

    for epoch in range(epochs):
        model.train()
        epoch_loss = 0
        for sequence, labels in train_data:
            opt.zero_grad()
            model.hidden_cell = (torch.zeros(1, 1, model.hidden_layer).to(device),
                                 torch.zeros(1, 1, model.hidden_layer).to(device))
            
            sequence = sequence.to(device)
            labels = labels.to(device)

            y = model(sequence)
            loss = loss_func(y, labels)
            loss.backward()
            opt.step()

            epoch_loss += loss.item()

        # Compute average loss for the epoch
        train_errors.append(epoch_loss / len(train_data))

        # Validate the model
        val_accuracy = evaluate_model(model, val_data, device)
        val_errors.append(1 - val_accuracy)  # Convert accuracy to error

        # Print progress
        if epoch % 25 == 1:
            print(f'Epoch {epoch}, Training Loss: {epoch_loss / len(train_data)}, Validation Error: {1 - val_accuracy}')
    
    return train_errors, val_errors

def evaluate_model(model, data_loader, device='cuda'):
    model.eval()
    predictions, labels = [], []
    with torch.no_grad():
        for sequence, label in data_loader:
            sequence = sequence.float().to(device)
            output = model(sequence)
            pred = torch.round(torch.sigmoid(output)).cpu().numpy()
            predictions.extend(pred.flatten())
            labels.extend(label.numpy())
    return accuracy_score(labels, predictions)

# Time Series Cross-Validation
def time_series_cv(model_class, sequences, labels, n_splits, epochs, device='cuda'):
    tscv = TimeSeriesSplit(n_splits=n_splits)
    accuracies = []

    for train_index, test_index in tscv.split(sequences):
        train_sequences, test_sequences = [sequences[i] for i in train_index], [sequences[i] for i in test_index]
        train_labels, test_labels = [labels[i] for i in train_index], [labels[i] for i in test_index]
        
        # Convert to tensors
        train_sequences, train_labels = convert_to_tensor(train_sequences, train_labels)
        test_sequences, test_labels = convert_to_tensor(test_sequences, test_labels)

        # Create dataLoaders
        train_data = torch.utils.data.DataLoader(list(zip(train_sequences, train_labels)), shuffle=True, batch_size=1)
        test_data = torch.utils.data.DataLoader(list(zip(test_sequences, test_labels)), shuffle=False, batch_size=1)
        
        # Initialize model
        model = model_class(input_layer=10, hidden_layer=150, output_layer=1).to(device)
        opt = optim.Adam(model.parameters(), lr=0.001, weight_decay=0.01)
        loss_func = nn.BCEWithLogitsLoss()

        # Train and evaluate
        train_errors, val_errors = trainer(model, train_data, test_data, loss_func, opt, epochs, device)
        test_accuracy = evaluate_model(model, test_data, device)
        accuracies.append(test_accuracy)
    
    return accuracies

# Load and prepare data
file_path = filedialog.askopenfilename(parent=root, title="Select A File")
ticker = pd.read_csv(file_path)
ticker, scaler = prep(ticker)

ticker

Unnamed: 0.1,Unnamed: 0,Open,High,Low,Close,Adj Close,Volume,CPI,Mortgage_rate,Unemp_rate,disposable_income,Personal_consumption_expenditure,personal_savings,RSI (14D),20 Day CCI,Williams %R,EMA (5D),MA10,MA50,Target
49,1991-07-01,-0.793459,-0.794274,-0.792834,-0.793512,481.309998,-1.135636,136.200,1.192941,0.476384,-1.159748,-1.111277,0.346438,-0.393576,0.125310,0.638341,463.315369,-0.809164,-0.823858,1
50,1991-08-01,-0.785622,-0.786959,-0.784756,-0.786118,504.149994,-1.103749,136.600,1.059833,0.534575,-1.154240,-1.110459,0.472577,-0.191907,0.497262,0.775676,476.926911,-0.806501,-0.822199,1
51,1991-10-01,-0.777571,-0.779061,-0.776878,-0.778232,528.510010,-1.101594,137.200,0.903402,0.592766,-1.144141,-1.108638,0.693320,0.010196,0.829999,0.772793,494.121277,-0.804351,-0.820580,1
52,1991-11-01,-0.772706,-0.774313,-0.772308,-0.774211,540.929993,-1.065890,137.800,0.845896,0.592766,-1.140367,-1.101353,0.598716,0.109872,0.866166,0.736029,509.724182,-0.800951,-0.818937,1
53,1992-04-01,-0.754432,-0.755420,-0.753462,-0.754411,602.090027,-1.086983,139.400,0.902396,0.825528,-1.074197,-1.067593,0.882529,0.537577,1.474526,0.775468,540.512797,-0.792942,-0.816853,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
279,2021-07-01,3.743657,3.726129,3.762053,3.752103,14522.379880,2.446944,272.184,-1.504201,-0.338284,1.852898,2.016264,1.071737,1.419937,0.430587,0.773295,13577.078971,3.066925,1.513104,1
280,2021-09-01,4.007581,3.998264,4.043490,4.006884,15309.379880,2.331010,274.214,-1.491332,-0.745619,1.761464,2.088586,0.314903,1.559019,0.470339,0.748838,14154.512607,3.253312,1.579819,0
281,2021-10-01,3.744058,3.749699,3.724410,3.766452,14566.700200,2.697769,276.590,-1.423974,-0.803809,1.763539,2.146814,0.125695,0.942353,0.102715,0.433729,14291.908471,3.396147,1.641224,1
282,2021-11-01,4.082774,4.068859,4.098527,4.099648,15595.919920,3.168282,278.524,-1.423974,-1.036572,1.762995,2.168300,0.157230,1.168795,0.277095,0.777112,14726.578954,3.514513,1.709745,0
