In [3]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import torch
import torch.nn as nn
import itertools
import random

# Define your LSTM model class
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, learning_rate, window_size):
        super(LSTMModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.window_size = window_size
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, 1)
        self.learning_rate = learning_rate

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)

        out, _ = self.lstm(x, (h0, c0))

        out = self.fc(out[:, -1, :])
        return out


# Define a function to split the data
def split_data_with_window(x_in, y_in, split_window_size):
    x_out1_list, y_out1_list, x_out2_list, y_out2_list = [], [], [], []

    for i in range(0, len(x_in) - split_window_size, split_window_size + 1):
        x_out1_out2 = x_in.iloc[i:i + split_window_size + 1]
        y_out1_out2 = y_in.iloc[i:i + split_window_size + 1]

        x_out1 = x_out1_out2.iloc[:-1]
        y_out1 = y_out1_out2.iloc[:-1]

        x_out2 = x_out1_out2.iloc[-1:]
        y_out2 = y_out1_out2.iloc[-1:]

        x_out1_list.append(x_out1)
        y_out1_list.append(y_out1)
        x_out2_list.append(x_out2)
        y_out2_list.append(y_out2)

    x_out1 = pd.concat(x_out1_list)
    y_out1 = pd.concat(y_out1_list)
    x_out2 = pd.concat(x_out2_list)
    y_out2 = pd.concat(y_out2_list)

    return x_out1, y_out1, x_out2, y_out2


# Read the CSV file
data = pd.read_csv("../data/data/aapl_raw_data.csv")
data = data.drop("date", axis=1)
data = data.fillna(0)  # Filling null values with zero
data = data.astype('float32')

# Keep data until 31.07.2023
data = data.iloc[:10747]

# Set random seeds for reproducibility
seed = 42
torch.manual_seed(seed)
torch.cuda.manual_seed(seed) if torch.cuda.is_available() else None
np.random.seed(seed)
random.seed(seed)

# Ensuring deterministic behavior in cuDNN
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

# Define the hyperparameters to search over
input_sizes = [7]
hidden_sizes = [4]
num_layers_list = [1]
learning_rates = [0.0003]
window_sizes = [5]

num_epochs = 1500
patience = 10  # Number of epochs to wait for improvement

# Combine hyperparameters into a list of tuples
hyperparameter_combinations = list(itertools.product(input_sizes, hidden_sizes, num_layers_list, learning_rates, window_sizes))

# Loop through each column to use it as the target variable
for target_column in data.columns:
    print(f"Training model with target variable: {target_column}")
    print()

    # Set the target column as y_data and the rest as x_data
    y_data = data[target_column]
    x_data = data.drop(columns=[target_column])

    # Split Data to train and temp
    split_window_size = 3
    x_train, y_train, x_temp, y_temp = split_data_with_window(x_data, y_data, split_window_size)

    # Split temp into val and test
    split_window_size = 1
    x_val, y_val, x_test, y_test = split_data_with_window(x_temp, y_temp, split_window_size)

    # Normalize the data
    scaler = MinMaxScaler()
    x_train_normalized = scaler.fit_transform(x_train)
    x_val_normalized = scaler.transform(x_val)
    x_test_normalized = scaler.transform(x_test)

    # Convert to PyTorch tensors
    x_train_tensor = torch.tensor(x_train_normalized, dtype=torch.float32)
    y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).view(-1, 1)

    x_val_tensor = torch.tensor(x_val_normalized, dtype=torch.float32)
    y_val_tensor = torch.tensor(y_val.values, dtype=torch.float32).view(-1, 1)

    x_test_tensor = torch.tensor(x_test_normalized, dtype=torch.float32)
    y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32).view(-1, 1)

    # Walk-forward validation training with sliding window for each hyperparameter combination
    for hyperparams in hyperparameter_combinations:
        input_size, hidden_size, num_layers, learning_rate, window_size = hyperparams

        # Print hyperparameters
        print(f"Hyperparameters: input_size={input_size}, hidden_size={hidden_size}, num_layers={num_layers}, learning_rate={learning_rate}, window_size={window_size}")

        # Initialize the model
        model = LSTMModel(input_size, hidden_size, num_layers, learning_rate, window_size)

        # Define the loss function and optimizer
        criterion = nn.MSELoss()
        optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

        best_val_loss = float('inf')
        counter = 0

        # Train the model
        for epoch in range(num_epochs):
            model.train()
            running_loss = 0.0

            for i in range(len(x_train_tensor)):
                window_end = min(i + window_size, len(x_train_tensor))
                inputs = x_train_tensor[i:window_end].unsqueeze(0)
                labels = y_train_tensor[window_end - 1]

                optimizer.zero_grad()
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()

                running_loss += loss.item()

            model.eval()
            val_loss = 0.0

            with torch.no_grad():
                for i in range(len(x_val_tensor)):
                    window_end = min(i + window_size, len(x_val_tensor))
                    inputs = x_val_tensor[i:window_end].unsqueeze(0)
                    labels = y_val_tensor[window_end - 1]

                    outputs = model(inputs)
                    val_loss += criterion(outputs, labels)

            # Early stopping based on validation loss
            if val_loss < best_val_loss:
                best_val_loss = val_loss
                counter = 0
            else:
                counter += 1
                if counter >= patience:
                    print(f'Early stopping at epoch {epoch}')
                    break

            print(f'Epoch [{epoch + 1}/{num_epochs}], Training Loss: {running_loss / len(x_train_tensor)}, Validation Loss: {val_loss / len(x_val_tensor)}')

        # Calculate test loss after training is complete
        test_loss = 0.0
        with torch.no_grad():
            for i in range(len(x_test_tensor)):
                window_end = min(i + window_size, len(x_test_tensor))
                inputs = x_test_tensor[i:window_end].unsqueeze(0)
                labels = y_test_tensor[window_end - 1]

                outputs = model(inputs)
                test_loss += criterion(outputs, labels)

        print(f'Final Test Loss: {test_loss / len(x_test_tensor)}')

        for _ in range(4):
            print()

print()

Training model with target variable: open

Hyperparameters: input_size=7, hidden_size=4, num_layers=1, learning_rate=0.0003, window_size=5


  return F.mse_loss(input, target, reduction=self.reduction)


Epoch [1/1500], Training Loss: 27801.95051053062, Validation Loss: 27333.595703125
Epoch [2/1500], Training Loss: 26070.488700325834, Validation Loss: 25755.142578125
Epoch [3/1500], Training Loss: 24662.73662761667, Validation Loss: 24430.470703125
Epoch [4/1500], Training Loss: 23505.198705265935, Validation Loss: 23341.79296875
Epoch [5/1500], Training Loss: 22545.683825416607, Validation Loss: 22402.91015625
Epoch [6/1500], Training Loss: 21548.952729497578, Validation Loss: 21434.84375
Epoch [7/1500], Training Loss: 20618.004249594615, Validation Loss: 20512.015625
Epoch [8/1500], Training Loss: 19748.32208769504, Validation Loss: 19656.845703125
Epoch [9/1500], Training Loss: 18944.06635768439, Validation Loss: 18862.791015625
Epoch [10/1500], Training Loss: 18192.59623123343, Validation Loss: 18121.794921875
Epoch [11/1500], Training Loss: 17488.50295575279, Validation Loss: 17427.73046875
Epoch [12/1500], Training Loss: 16826.384332044327, Validation Loss: 16777.42578125
Epoch 

  return F.mse_loss(input, target, reduction=self.reduction)


Epoch [1/1500], Training Loss: 28281.659234074155, Validation Loss: 27806.3359375
Epoch [2/1500], Training Loss: 26533.99723871147, Validation Loss: 26215.443359375
Epoch [3/1500], Training Loss: 25113.788917967962, Validation Loss: 24877.890625
Epoch [4/1500], Training Loss: 23943.234187347804, Validation Loss: 23775.986328125
Epoch [5/1500], Training Loss: 22936.79370956132, Validation Loss: 22783.927734375
Epoch [6/1500], Training Loss: 21892.648935449328, Validation Loss: 21768.2734375
Epoch [7/1500], Training Loss: 20945.055619894592, Validation Loss: 20830.08984375
Epoch [8/1500], Training Loss: 20071.324757329847, Validation Loss: 19967.435546875
Epoch [9/1500], Training Loss: 19256.12814365151, Validation Loss: 19163.798828125
Epoch [10/1500], Training Loss: 18494.212879692055, Validation Loss: 18413.2890625
Epoch [11/1500], Training Loss: 17779.80272702703, Validation Loss: 17711.71875
Epoch [12/1500], Training Loss: 17108.456250595966, Validation Loss: 17054.474609375
Epoch [

  return F.mse_loss(input, target, reduction=self.reduction)


Epoch [1/1500], Training Loss: 27127.337133915556, Validation Loss: 26666.6796875
Epoch [2/1500], Training Loss: 25431.52050930531, Validation Loss: 25123.58984375
Epoch [3/1500], Training Loss: 24059.307048660845, Validation Loss: 23833.494140625
Epoch [4/1500], Training Loss: 22935.944802061855, Validation Loss: 22777.708984375
Epoch [5/1500], Training Loss: 21977.99566765365, Validation Loss: 21833.84375
Epoch [6/1500], Training Loss: 21001.929491792416, Validation Loss: 20887.74609375
Epoch [7/1500], Training Loss: 20084.7975334564, Validation Loss: 19982.294921875
Epoch [8/1500], Training Loss: 19238.10773718441, Validation Loss: 19144.90625
Epoch [9/1500], Training Loss: 18451.63924005573, Validation Loss: 18368.87109375
Epoch [10/1500], Training Loss: 17717.03197906525, Validation Loss: 17645.03515625
Epoch [11/1500], Training Loss: 17028.20943644479, Validation Loss: 16968.404296875
Epoch [12/1500], Training Loss: 16380.780460223341, Validation Loss: 16334.9111328125
Epoch [13/

  return F.mse_loss(input, target, reduction=self.reduction)


Epoch [1/1500], Training Loss: 27660.41091913312, Validation Loss: 27200.44140625
Epoch [2/1500], Training Loss: 25948.54856126681, Validation Loss: 25640.982421875
Epoch [3/1500], Training Loss: 24560.14757714108, Validation Loss: 24334.330078125
Epoch [4/1500], Training Loss: 23420.33017742317, Validation Loss: 23261.955078125
Epoch [5/1500], Training Loss: 22486.369191083235, Validation Loss: 22362.85546875
Epoch [6/1500], Training Loss: 21464.82079566286, Validation Loss: 21366.92578125
Epoch [7/1500], Training Loss: 20526.33781095909, Validation Loss: 20427.46875
Epoch [8/1500], Training Loss: 19667.860242012714, Validation Loss: 19577.041015625
Epoch [9/1500], Training Loss: 18866.27071641596, Validation Loss: 18784.291015625
Epoch [10/1500], Training Loss: 18115.629528030822, Validation Loss: 18046.47265625
Epoch [11/1500], Training Loss: 17411.965850229015, Validation Loss: 17362.724609375
Epoch [12/1500], Training Loss: 16751.3747150233, Validation Loss: 16721.66015625
Epoch [

  return F.mse_loss(input, target, reduction=self.reduction)


Epoch [1/1500], Training Loss: 1081681903064891.8, Validation Loss: 1084111821209600.0
Epoch [2/1500], Training Loss: 1081681628218269.5, Validation Loss: 1084111619883008.0
Epoch [3/1500], Training Loss: 1081681329434942.2, Validation Loss: 1084111418556416.0
Epoch [4/1500], Training Loss: 1081681058511754.9, Validation Loss: 1084111015903232.0
Epoch [5/1500], Training Loss: 1081680750945815.5, Validation Loss: 1084110814576640.0
Epoch [6/1500], Training Loss: 1081680475964818.8, Validation Loss: 1084110613250048.0
Epoch [7/1500], Training Loss: 1081680169934761.4, Validation Loss: 1084110411923456.0
Epoch [8/1500], Training Loss: 1081679912018406.1, Validation Loss: 1084110076379136.0
Epoch [9/1500], Training Loss: 1081679623515738.6, Validation Loss: 1084109539508224.0
Epoch [10/1500], Training Loss: 1081679371779612.6, Validation Loss: 1084109136855040.0
Epoch [11/1500], Training Loss: 1081679074928910.8, Validation Loss: 1084108935528448.0
Epoch [12/1500], Training Loss: 108167880

  return F.mse_loss(input, target, reduction=self.reduction)


Epoch [1/1500], Training Loss: 1477.0827132002764, Validation Loss: 1601.29443359375
Epoch [2/1500], Training Loss: 1375.9381709412535, Validation Loss: 1556.0877685546875
Epoch [3/1500], Training Loss: 1227.1264117063918, Validation Loss: 1549.349365234375
Epoch [4/1500], Training Loss: 1176.0651796448917, Validation Loss: 1585.2747802734375
Epoch [5/1500], Training Loss: 1108.0236272536438, Validation Loss: 1603.22509765625
Epoch [6/1500], Training Loss: 1129.3897837722923, Validation Loss: 1469.6185302734375
Epoch [7/1500], Training Loss: 975.8385512424835, Validation Loss: 1558.1126708984375
Epoch [8/1500], Training Loss: 840.1363723915558, Validation Loss: 2129.787841796875
Epoch [9/1500], Training Loss: 779.0273987774591, Validation Loss: 2287.599853515625
Epoch [10/1500], Training Loss: 719.0314337880462, Validation Loss: 2354.13916015625
Epoch [11/1500], Training Loss: 674.8785216691358, Validation Loss: 2372.530029296875
Epoch [12/1500], Training Loss: 638.2442117021714, Valid

  return F.mse_loss(input, target, reduction=self.reduction)


Epoch [1/1500], Training Loss: 7.71623811945319, Validation Loss: 8.313543319702148
Epoch [2/1500], Training Loss: 7.693008788534319, Validation Loss: 8.313610076904297
Epoch [3/1500], Training Loss: 7.691792592723835, Validation Loss: 8.313133239746094
Epoch [4/1500], Training Loss: 7.6907254041559145, Validation Loss: 8.312504768371582
Epoch [5/1500], Training Loss: 7.689439067895689, Validation Loss: 8.311439514160156
Epoch [6/1500], Training Loss: 7.687862586589357, Validation Loss: 8.310015678405762
Epoch [7/1500], Training Loss: 7.686076526201698, Validation Loss: 8.308466911315918
Epoch [8/1500], Training Loss: 7.683973968357831, Validation Loss: 8.306602478027344
Epoch [9/1500], Training Loss: 7.681275566763107, Validation Loss: 8.304149627685547
Epoch [10/1500], Training Loss: 7.677921568677332, Validation Loss: 8.301270484924316
Epoch [11/1500], Training Loss: 7.674313079771361, Validation Loss: 8.298552513122559
Epoch [12/1500], Training Loss: 7.670632182302673, Validation L

  return F.mse_loss(input, target, reduction=self.reduction)


Epoch [1/1500], Training Loss: 1.7908883906995037e+17, Validation Loss: 1.781535426310308e+17
Epoch [2/1500], Training Loss: 1.7908883174111565e+17, Validation Loss: 1.781535082712924e+17
Epoch [3/1500], Training Loss: 1.7908882816462256e+17, Validation Loss: 1.7815349109142323e+17
Epoch [4/1500], Training Loss: 1.7908881441549414e+17, Validation Loss: 1.7815349109142323e+17
Epoch [5/1500], Training Loss: 1.790888077089176e+17, Validation Loss: 1.7815349109142323e+17
Epoch [6/1500], Training Loss: 1.7908880426669635e+17, Validation Loss: 1.7815345673168486e+17
Epoch [7/1500], Training Loss: 1.7908879354706304e+17, Validation Loss: 1.7815345673168486e+17
Epoch [8/1500], Training Loss: 1.7908878871883514e+17, Validation Loss: 1.7815345673168486e+17
Epoch [9/1500], Training Loss: 1.7908878380299632e+17, Validation Loss: 1.7815345673168486e+17
Epoch [10/1500], Training Loss: 1.7908876871913194e+17, Validation Loss: 1.7815343955181568e+17
Epoch [11/1500], Training Loss: 1.790887641009086e+1

In [1]:
# Import necessary libraries
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import torch
import torch.nn as nn

# Define your LSTM model class
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, learning_rate, window_size):
        super(LSTMModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.window_size = window_size
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, 1)
        self.learning_rate = learning_rate

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)

        out, _ = self.lstm(x, (h0, c0))

        out = self.fc(out[:, -1, :])
        return out


# Define a function to split the data
def split_data_with_window(x_in, y_in, split_window_size):
    x_out1_list, y_out1_list, x_out2_list, y_out2_list = [], [], [], []

    for i in range(0, len(x_in) - split_window_size, split_window_size + 1):
        x_out1_out2 = x_in.iloc[i:i + split_window_size + 1]
        y_out1_out2 = y_in.iloc[i:i + split_window_size + 1]

        x_out1 = x_out1_out2.iloc[:-1]
        y_out1 = y_out1_out2.iloc[:-1]

        x_out2 = x_out1_out2.iloc[-1:]
        y_out2 = y_out1_out2.iloc[-1:]

        x_out1_list.append(x_out1)
        y_out1_list.append(y_out1)
        x_out2_list.append(x_out2)
        y_out2_list.append(y_out2)

    x_out1 = pd.concat(x_out1_list)
    y_out1 = pd.concat(y_out1_list)
    x_out2 = pd.concat(x_out2_list)
    y_out2 = pd.concat(y_out2_list)

    return x_out1, y_out1, x_out2, y_out2


# Read the CSV file
data = pd.read_csv("../data/data/aapl_raw_data.csv")
data = data.drop("date", axis=1)
data = data.fillna(0)  # Filling null values with zero
data = data.astype('float32')

# Keep data until 31.07.2023
data = data.iloc[:10747]

# Loop through each column to use it as the target variable
for target_column in data.columns:
    print(f"Training model with target variable: {target_column}")

    # Set the target column as y_data and the rest as x_data
    y_data = data[target_column]
    x_data = data.drop(columns=[target_column])

    # Split Data to train and temp
    split_window_size = 3
    x_train, y_train, x_temp, y_temp = split_data_with_window(x_data, y_data, split_window_size)

    # Split temp into val and test
    split_window_size = 1
    x_val, y_val, x_test, y_test = split_data_with_window(x_temp, y_temp, split_window_size)

    # Normalize the data
    scaler = MinMaxScaler()
    x_train_normalized = scaler.fit_transform(x_train)
    x_val_normalized = scaler.transform(x_val)
    x_test_normalized = scaler.transform(x_test)

    # Convert to PyTorch tensors
    x_train_tensor = torch.tensor(x_train_normalized, dtype=torch.float32)
    y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).view(-1, 1)

    x_val_tensor = torch.tensor(x_val_normalized, dtype=torch.float32)
    y_val_tensor = torch.tensor(y_val.values, dtype=torch.float32).view(-1, 1)

    x_test_tensor = torch.tensor(x_test_normalized, dtype=torch.float32)
    y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32).view(-1, 1)

    # At this point, you can train your LSTM model with the `x_train_tensor` and `y_train_tensor`.
    # And you can evaluate it using `x_val_tensor`, `y_val_tensor`, etc.


Training model with target variable: open
Training model with target variable: high
Training model with target variable: low
Training model with target variable: close
Training model with target variable: volume
Training model with target variable: adjusted_close
Training model with target variable: change_percent
Training model with target variable: avg_vol_20d
