Train Loss: 2.0018, Val Loss: 3.0510, Test Loss: 4.1544
Hyperparameters: input_size=7, hidden_size=8, num_layers=2, learning_rate=0.0002, window_size=1
training set is used after every iteration

In [27]:
import pandas as pd
from sklearn.model_selection import train_test_split
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import cm
from sklearn import metrics
import numpy as npw
from sklearn.preprocessing import StandardScaler

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from sklearn.preprocessing import MinMaxScaler


# Read the CSV file
data = pd.read_csv("../data/data/aapl_raw_data.csv")

data = data.drop("date", axis=1)

data.isnull().sum()
data=data.fillna(0)  # Filling null values with zero
data.isnull().sum()

data = data.astype('float32')


# Keep data until 31.08.2023
data = data.iloc[:10731]

#print(data['open'].dtype)
#print(data.shape)

# Assuming 'data' is a pandas DataFrame
x_data = data[['open', 'high', 'low', 'volume', 'adjusted_close', 'change_percent', 'avg_vol_20d']]
y_data = data["close"]

# Now x_data and y_data are pandas DataFrames/Series, respectively

x_data.tail(1)




# size of the window for data preparation
split_window_size = 20

# Initialize lists to store training and temporary sets
x_train_list, y_train_list, x_temp_list, y_temp_list = [], [], [], []

# Iterate through the data with the specified window size
for i in range(0, len(x_data) - split_window_size, split_window_size + 1):
    x_train_temp = x_data.iloc[i:i+split_window_size+1]
    y_train_temp = y_data.iloc[i:i+split_window_size+1]

    # Separate the last row for the temporary set
    # [ :-1]: all elements except the last one
    # [-1:]:  selects only the last element

    x_train = x_train_temp.iloc[:-1]
    y_train = y_train_temp.iloc[:-1]

    x_temp = x_train_temp.iloc[-1:]
    y_temp = y_train_temp.iloc[-1:]

    x_train_list.append(x_train)
    y_train_list.append(y_train)
    x_temp_list.append(x_temp)
    y_temp_list.append(y_temp)

# Concatenate the lists into pandas DataFrames
x_train = pd.concat(x_train_list)
y_train = pd.concat(y_train_list)
x_temp = pd.concat(x_temp_list)
y_temp = pd.concat(y_temp_list)

# print(y_train.head(50))
x_temp_train, x_temp_val, y_temp_train, y_temp_val = train_test_split(x_temp, y_temp, test_size=0.2, random_state=42)


# Split x_temp and y_temp into validation and test sets
x_val, x_test, y_val, y_test = train_test_split(
    x_temp, y_temp, test_size=0.5, random_state=42)


"""
# Print the last 5 rows of x_data
print("Last 5 rows of x_data:")
print(x_data.tail(5))

# Print the last 5 rows of x_train
print("\nLast 25 rows of x_train:")
print(x_train.tail(25))

print("\nLast 3 rows of y_train:")
print(y_temp.tail(3))
"""




scaler = MinMaxScaler()

x_train_normalized = scaler.fit_transform(x_train)
x_val_normalized = scaler.transform(x_val)
x_test_normalized = scaler.transform(x_test)

# Convert the data to PyTorch tensors
x_train_tensor = torch.tensor(x_train_normalized, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).view(-1, 1)

x_val_tensor = torch.tensor(x_val_normalized, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val.values, dtype=torch.float32).view(-1, 1)

x_test_tensor = torch.tensor(x_test_normalized, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32).view(-1, 1)




"""
# x_train_tensor inverse

x_test_original = scaler.inverse_transform(x_train_tensor.numpy())
print("\nFirst row of x_test_original:")
print(x_test_original[0])

print("\nFirst row of x_train:")
print(x_train.head(1))



print("\nLast row of x_test_original:")
print(x_test_original[-1])

print("\nLast row of x_train:")
print(x_train.tail(1))
"""



'\n# x_train_tensor inverse\n\nx_test_original = scaler.inverse_transform(x_train_tensor.numpy())\nprint("\nFirst row of x_test_original:")\nprint(x_test_original[0])\n\nprint("\nFirst row of x_train:")\nprint(x_train.head(1))\n\n\n\nprint("\nLast row of x_test_original:")\nprint(x_test_original[-1])\n\nprint("\nLast row of x_train:")\nprint(x_train.tail(1))\n'

In [28]:
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, learning_rate, window_size):
        super(LSTMModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.window_size = window_size
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, 1)
        self.learning_rate = learning_rate

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)

        out, _ = self.lstm(x, (h0, c0))

        out = self.fc(out[:, -1, :])
        return out

In [29]:
import torch
import torch.nn as nn
import itertools

# Define the hyperparameters to search over
input_sizes = [7]
hidden_sizes = [8]
num_layers_list = [2]
learning_rates = [0.0002]
window_sizes = [1]

num_epochs = 500
patience = 20  # Number of epochs to wait for improvement

# Combine hyperparameters into a list of tuples
hyperparameter_combinations = list(itertools.product(input_sizes, hidden_sizes, num_layers_list, learning_rates, window_sizes))

# Walk-forward validation training with sliding window for each hyperparameter combination
for hyperparams in hyperparameter_combinations:
    input_size, hidden_size, num_layers, learning_rate, window_size = hyperparams

    # Print hyperparameters
    print(f"Hyperparameters: input_size={input_size}, hidden_size={hidden_size}, num_layers={num_layers}, learning_rate={learning_rate}, window_size={window_size}")

    # Initialize the model
    model = LSTMModel(input_size, hidden_size, num_layers, learning_rate, window_size)

    # Define the loss function and optimizer
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

    best_test_loss = float('inf')
    counter = 0

    # Train the model
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0

        for i in range(len(x_train_tensor)):
            window_end = min(i + window_size, len(x_train_tensor))
            inputs = x_train_tensor[i:window_end].unsqueeze(0)
            labels = y_train_tensor[window_end - 1]

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

        model.eval()
        val_loss = 0.0

        with torch.no_grad():
            for i in range(len(x_val_tensor)):
                window_end = min(i + window_size, len(x_val_tensor))
                inputs = x_val_tensor[i:window_end].unsqueeze(0)
                labels = y_val_tensor[window_end - 1]

                outputs = model(inputs)
                val_loss += criterion(outputs, labels)

        # Calculate test loss
        test_loss = 0.0
        with torch.no_grad():
            for i in range(len(x_test_tensor)):
                window_end = min(i + window_size, len(x_test_tensor))
                inputs = x_test_tensor[i:window_end].unsqueeze(0)
                labels = y_test_tensor[window_end - 1]

                outputs = model(inputs)
                test_loss += criterion(outputs, labels)

        # Early stopping based on validation loss
        if val_loss < best_test_loss:
            best_test_loss = val_loss
            counter = 0
        else:
            counter += 1
            if counter >= patience:
                print(f'Early stopping at epoch {epoch}, validation loss: {best_test_loss}, test loss at early stopping: {test_loss}')
                break

        print(f'Epoch [{epoch + 1}/{num_epochs}], Training Loss: {running_loss / len(x_train_tensor)}, Validation Loss: {val_loss / len(x_val_tensor)}, Test Loss: {test_loss / len(x_test_tensor)}')

    # Print the final test loss after completing training for each hyperparameter combination
    print(f'Final Test Loss: {test_loss / len(x_test_tensor)}')


Hyperparameters: input_size=7, hidden_size=8, num_layers=2, learning_rate=0.0002, window_size=1


  return F.mse_loss(input, target, reduction=self.reduction)


Epoch [1/500], Training Loss: 27107.32438824564, Validation Loss: 28256.951171875, Test Loss: 25254.98828125
Epoch [2/500], Training Loss: 25115.585912200026, Validation Loss: 26405.896484375, Test Loss: 23520.52734375
Epoch [3/500], Training Loss: 23611.505554012343, Validation Loss: 24964.21875, Test Loss: 22179.646484375
Epoch [4/500], Training Loss: 22387.90740230075, Validation Loss: 23790.21875, Test Loss: 21095.2109375
Epoch [5/500], Training Loss: 21161.465193682758, Validation Loss: 22524.9140625, Test Loss: 19897.6328125
Epoch [6/500], Training Loss: 20064.927739588464, Validation Loss: 21377.58984375, Test Loss: 18838.0625
Epoch [7/500], Training Loss: 19060.80516629821, Validation Loss: 20370.33984375, Test Loss: 17908.4375
Epoch [8/500], Training Loss: 18143.09917406111, Validation Loss: 19420.521484375, Test Loss: 17034.103515625
Epoch [9/500], Training Loss: 17297.910440589956, Validation Loss: 18553.01171875, Test Loss: 16237.9541015625
Epoch [10/500], Training Loss: 16

In [30]:
ä

NameError: name 'ä' is not defined

In [None]:
import torch
import torch.nn as nn
import itertools

# Define the hyperparameters to search over
input_sizes = [7]
hidden_sizes = [8]
num_layers_list = [2]
learning_rates = [0.0006, 0.0004]
window_sizes = [1]

num_epochs = 500
patience = 20  # Number of epochs to wait for improvement

# Combine hyperparameters into a list of tuples
hyperparameter_combinations = list(itertools.product(input_sizes, hidden_sizes, num_layers_list, learning_rates, window_sizes))

# Walk-forward validation training with sliding window for each hyperparameter combination
for hyperparams in hyperparameter_combinations:
    input_size, hidden_size, num_layers, learning_rate, window_size = hyperparams

    # Print hyperparameters
    print(f"Hyperparameters: input_size={input_size}, hidden_size={hidden_size}, num_layers={num_layers}, learning_rate={learning_rate}, window_size={window_size}")

    # Initialize the model
    model = LSTMModel(input_size, hidden_size, num_layers, learning_rate, window_size)

    # Define the loss function and optimizer
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

    best_test_loss = float('inf')
    counter = 0

    # Train the model
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0

        for i in range(len(x_train_tensor)):
            window_end = min(i + window_size, len(x_train_tensor))
            inputs = x_train_tensor[i:window_end].unsqueeze(0)
            labels = y_train_tensor[window_end - 1]

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

        model.eval()
        val_loss = 0.0

        with torch.no_grad():
            for i in range(len(x_val_tensor)):
                window_end = min(i + window_size, len(x_val_tensor))
                inputs = x_val_tensor[i:window_end].unsqueeze(0)
                labels = y_val_tensor[window_end - 1]

                outputs = model(inputs)
                val_loss += criterion(outputs, labels)

        # Calculate test loss
        test_loss = 0.0
        with torch.no_grad():
            for i in range(len(x_test_tensor)):
                window_end = min(i + window_size, len(x_test_tensor))
                inputs = x_test_tensor[i:window_end].unsqueeze(0)
                labels = y_test_tensor[window_end - 1]

                outputs = model(inputs)
                test_loss += criterion(outputs, labels)

        # Early stopping based on validation loss
        if val_loss < best_test_loss:
            best_test_loss = val_loss
            counter = 0
        else:
            counter += 1
            if counter >= patience:
                print(f'Early stopping at epoch {epoch}, validation loss: {best_test_loss}, test loss at early stopping: {test_loss}')
                break

        print(f'Epoch [{epoch + 1}/{num_epochs}], Training Loss: {running_loss / len(x_train_tensor)}, Validation Loss: {val_loss / len(x_val_tensor)}, Test Loss: {test_loss / len(x_test_tensor)}')

    # Print the final test loss after completing training for each hyperparameter combination
    print(f'Final Test Loss: {test_loss / len(x_test_tensor)}')


Hyperparameters: input_size=7, hidden_size=8, num_layers=2, learning_rate=0.0006, window_size=1


  return F.mse_loss(input, target, reduction=self.reduction)


Epoch [1/500], Training Loss: 23400.067186187778, Validation Loss: 23787.876953125, Test Loss: 21094.40234375
Epoch [2/500], Training Loss: 20244.6513738632, Validation Loss: 21352.197265625, Test Loss: 18887.7421875
Epoch [3/500], Training Loss: 17393.558002350288, Validation Loss: 18985.783203125, Test Loss: 16705.267578125
Epoch [4/500], Training Loss: 15195.608644208347, Validation Loss: 16387.623046875, Test Loss: 14279.40625
Epoch [5/500], Training Loss: 13456.616412303509, Validation Loss: 14389.6611328125, Test Loss: 12469.0830078125
Epoch [6/500], Training Loss: 12021.760525339072, Validation Loss: 12937.287109375, Test Loss: 11169.9033203125
Epoch [7/500], Training Loss: 10833.14004425371, Validation Loss: 11980.4892578125, Test Loss: 10338.5068359375
Epoch [8/500], Training Loss: 9841.269588847867, Validation Loss: 11135.1064453125, Test Loss: 9608.9443359375
Epoch [9/500], Training Loss: 9020.20066013425, Validation Loss: 10250.5615234375, Test Loss: 8827.7421875
Epoch [10/

In [None]:
import torch
import torch.nn as nn
import itertools

# Define the hyperparameters to search over
input_sizes = [7]
hidden_sizes = [8]
num_layers_list = [2]
learning_rates = [0.0012, 0.0008]
window_sizes = [1]

num_epochs = 500
patience = 20  # Number of epochs to wait for improvement

# Combine hyperparameters into a list of tuples
hyperparameter_combinations = list(itertools.product(input_sizes, hidden_sizes, num_layers_list, learning_rates, window_sizes))

# Walk-forward validation training with sliding window for each hyperparameter combination
for hyperparams in hyperparameter_combinations:
    input_size, hidden_size, num_layers, learning_rate, window_size = hyperparams

    # Print hyperparameters
    print(f"Hyperparameters: input_size={input_size}, hidden_size={hidden_size}, num_layers={num_layers}, learning_rate={learning_rate}, window_size={window_size}")

    # Initialize the model
    model = LSTMModel(input_size, hidden_size, num_layers, learning_rate, window_size)

    # Define the loss function and optimizer
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

    best_test_loss = float('inf')
    counter = 0

    # Train the model
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0

        for i in range(len(x_train_tensor)):
            window_end = min(i + window_size, len(x_train_tensor))
            inputs = x_train_tensor[i:window_end].unsqueeze(0)
            labels = y_train_tensor[window_end - 1]

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

        model.eval()
        val_loss = 0.0

        with torch.no_grad():
            for i in range(len(x_val_tensor)):
                window_end = min(i + window_size, len(x_val_tensor))
                inputs = x_val_tensor[i:window_end].unsqueeze(0)
                labels = y_val_tensor[window_end - 1]

                outputs = model(inputs)
                val_loss += criterion(outputs, labels)

        # Calculate test loss
        test_loss = 0.0
        with torch.no_grad():
            for i in range(len(x_test_tensor)):
                window_end = min(i + window_size, len(x_test_tensor))
                inputs = x_test_tensor[i:window_end].unsqueeze(0)
                labels = y_test_tensor[window_end - 1]

                outputs = model(inputs)
                test_loss += criterion(outputs, labels)

        # Early stopping based on validation loss
        if val_loss < best_test_loss:
            best_test_loss = val_loss
            counter = 0
        else:
            counter += 1
            if counter >= patience:
                print(f'Early stopping at epoch {epoch}, validation loss: {best_test_loss}, test loss at early stopping: {test_loss}')
                break

        print(f'Epoch [{epoch + 1}/{num_epochs}], Training Loss: {running_loss / len(x_train_tensor)}, Validation Loss: {val_loss / len(x_val_tensor)}, Test Loss: {test_loss / len(x_test_tensor)}')

    # Print the final test loss after completing training for each hyperparameter combination
    print(f'Final Test Loss: {test_loss / len(x_test_tensor)}')


Hyperparameters: input_size=7, hidden_size=8, num_layers=2, learning_rate=0.0012, window_size=1


  return F.mse_loss(input, target, reduction=self.reduction)


Epoch [1/500], Training Loss: 19900.45519139746, Validation Loss: 20405.23046875, Test Loss: 18058.982421875
Epoch [2/500], Training Loss: 15509.017286085409, Validation Loss: 18517.537109375, Test Loss: 16548.25390625
Epoch [3/500], Training Loss: 12145.154373642414, Validation Loss: 13132.23828125, Test Loss: 11415.3798828125
Epoch [4/500], Training Loss: 9944.878583889671, Validation Loss: 10766.58984375, Test Loss: 9289.70703125
Epoch [5/500], Training Loss: 8256.98115338049, Validation Loss: 11346.1689453125, Test Loss: 9878.734375
Epoch [6/500], Training Loss: 7015.786163869513, Validation Loss: 10157.4833984375, Test Loss: 8842.259765625
Epoch [7/500], Training Loss: 5757.158187709201, Validation Loss: 7460.89990234375, Test Loss: 6469.998046875
Epoch [8/500], Training Loss: 4810.848219185448, Validation Loss: 5872.1328125, Test Loss: 4964.98388671875
Epoch [9/500], Training Loss: 4039.0613734527287, Validation Loss: 4892.9765625, Test Loss: 4101.79638671875
Epoch [10/500], Trai

In [None]:
ä

NameError: name 'ä' is not defined

In [None]:
import torch
import torch.nn as nn
import itertools

# Define the hyperparameters to search over
input_sizes = [7]
hidden_sizes = [8]
num_layers_list = [2]
learning_rates = [0.0011, 0.001, 0.0009]
window_sizes = [1]

num_epochs = 500
patience = 20  # Number of epochs to wait for improvement

# Combine hyperparameters into a list of tuples
hyperparameter_combinations = list(itertools.product(input_sizes, hidden_sizes, num_layers_list, learning_rates, window_sizes))

# Walk-forward validation training with sliding window for each hyperparameter combination
for hyperparams in hyperparameter_combinations:
    input_size, hidden_size, num_layers, learning_rate, window_size = hyperparams

    # Print hyperparameters
    print(f"Hyperparameters: input_size={input_size}, hidden_size={hidden_size}, num_layers={num_layers}, learning_rate={learning_rate}, window_size={window_size}")

    # Initialize the model
    model = LSTMModel(input_size, hidden_size, num_layers, learning_rate, window_size)

    # Define the loss function and optimizer
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

    best_test_loss = float('inf')
    counter = 0

    # Train the model
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0

        for i in range(len(x_train_tensor)):
            window_end = min(i + window_size, len(x_train_tensor))
            inputs = x_train_tensor[i:window_end].unsqueeze(0)
            labels = y_train_tensor[window_end - 1]

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

        model.eval()
        val_loss = 0.0

        with torch.no_grad():
            for i in range(len(x_val_tensor)):
                window_end = min(i + window_size, len(x_val_tensor))
                inputs = x_val_tensor[i:window_end].unsqueeze(0)
                labels = y_val_tensor[window_end - 1]

                outputs = model(inputs)
                val_loss += criterion(outputs, labels)

        # Calculate test loss
        test_loss = 0.0
        with torch.no_grad():
            for i in range(len(x_test_tensor)):
                window_end = min(i + window_size, len(x_test_tensor))
                inputs = x_test_tensor[i:window_end].unsqueeze(0)
                labels = y_test_tensor[window_end - 1]

                outputs = model(inputs)
                test_loss += criterion(outputs, labels)

        # Early stopping based on test loss
        if test_loss < best_test_loss:
            best_test_loss = test_loss
            counter = 0
        else:
            counter += 1
            if counter >= patience:
                print(f'Early stopping at epoch {epoch}, test loss: {best_test_loss}')
                break

        print(f'Epoch [{epoch + 1}/{num_epochs}], Training Loss: {running_loss / len(x_train_tensor)}, Validation Loss: {val_loss / len(x_val_tensor)}, Test Loss: {test_loss / len(x_test_tensor)}')

    # If early stopping occurs, move to the next hyperparameter combination
    else:
        continue  # Move to the next iteration of the hyperparameter loop

    # Print the final test loss after completing training for each hyperparameter combination
    print(f'Final Test Loss: {best_test_loss}')


Hyperparameters: input_size=7, hidden_size=8, num_layers=2, learning_rate=0.0011, window_size=1


  return F.mse_loss(input, target, reduction=self.reduction)


Epoch [1/500], Training Loss: 20374.349270097126, Validation Loss: 20789.828125, Test Loss: 18392.9296875
Epoch [2/500], Training Loss: 16022.835676761357, Validation Loss: 18612.896484375, Test Loss: 16568.322265625
Epoch [3/500], Training Loss: 12731.088056067203, Validation Loss: 13649.501953125, Test Loss: 11865.953125
Epoch [4/500], Training Loss: 10515.325082226664, Validation Loss: 11325.3330078125, Test Loss: 9776.712890625
Epoch [5/500], Training Loss: 8906.803613678194, Validation Loss: 9761.9990234375, Test Loss: 8377.490234375
Epoch [6/500], Training Loss: 7554.342636131226, Validation Loss: 11537.259765625, Test Loss: 10005.46875
Epoch [7/500], Training Loss: 6474.952864637629, Validation Loss: 9368.8681640625, Test Loss: 8108.6064453125
Epoch [8/500], Training Loss: 5365.121101685171, Validation Loss: 7061.2685546875, Test Loss: 6047.544921875
Epoch [9/500], Training Loss: 22526.0639765272, Validation Loss: 24056.880859375, Test Loss: 21341.6875
Epoch [10/500], Training L