In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import cm
from sklearn import metrics
import numpy as npw
from sklearn.preprocessing import StandardScaler

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from sklearn.preprocessing import MinMaxScaler


# Read the CSV file
data = pd.read_csv("../data/data/aapl_raw_data.csv")

data = data.drop("date", axis=1)

data.isnull().sum()
data=data.fillna(0)  # Filling null values with zero
data.isnull().sum()

data = data.astype('float32')


# Keep data until 31.08.2023
data = data.iloc[:10731]

#print(data['open'].dtype)
#print(data.shape)

# Assuming 'data' is a pandas DataFrame
x_data = data[['open', 'high', 'low', 'volume', 'adjusted_close', 'change_percent', 'avg_vol_20d']]
y_data = data["close"]

# Now x_data and y_data are pandas DataFrames/Series, respectively

x_data.tail(1)




# Set the window size for training
train_window_size = 20

# Initialize lists to store training and temporary sets
x_train_list, y_train_list, x_temp_list, y_temp_list = [], [], [], []

# Iterate through the data with the specified window size
for i in range(0, len(x_data) - train_window_size, train_window_size + 1):
    x_train_temp = x_data.iloc[i:i+train_window_size+1]
    y_train_temp = y_data.iloc[i:i+train_window_size+1]

    # Separate the last row for the temporary set
    x_train = x_train_temp.iloc[:-1]
    y_train = y_train_temp.iloc[:-1]

    x_temp = x_train_temp.iloc[-1:]
    y_temp = y_train_temp.iloc[-1:]

    x_train_list.append(x_train)
    y_train_list.append(y_train)
    x_temp_list.append(x_temp)
    y_temp_list.append(y_temp)

# Concatenate the lists into pandas DataFrames
x_train = pd.concat(x_train_list)
y_train = pd.concat(y_train_list)
x_temp = pd.concat(x_temp_list)
y_temp = pd.concat(y_temp_list)

# print(y_train.head(50))
x_temp_train, x_temp_val, y_temp_train, y_temp_val = train_test_split(x_temp, y_temp, test_size=0.2, random_state=42)


# Split x_temp and y_temp into validation and test sets
x_val, x_test, y_val, y_test = train_test_split(
    x_temp, y_temp, test_size=0.5, random_state=42)







scaler = MinMaxScaler()

x_train_normalized = scaler.fit_transform(x_train)
x_val_normalized = scaler.transform(x_val)
x_test_normalized = scaler.transform(x_test)

# Convert the data to PyTorch tensors
x_train_tensor = torch.tensor(x_train_normalized, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).view(-1, 1)

x_val_tensor = torch.tensor(x_val_normalized, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val.values, dtype=torch.float32).view(-1, 1)

x_test_tensor = torch.tensor(x_test_normalized, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32).view(-1, 1)






In [6]:
"""
learning_rate = 0.001
window_size = 50
test_window_size = 50
hidden_dim = 64
n_layers = 4
batch_evaluation_frequency = 10
epochs = len(x_train) - window_size
batch_size = 1
input_size = x_train.shape[1]  # Input size based on your dataset
output_size = 1  # Output size (for regression task)

"""

class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_dim, n_layers, output_size):
        super(LSTMModel, self).__init__()

        self.hidden_dim = hidden_dim
        self.n_layers = n_layers

        self.lstm = nn.LSTM(input_size, hidden_dim, n_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_size)

    def forward(self, x, hidden):
        batch_size = x.size(0)
        out, hidden = self.lstm(x, hidden)
        out = self.fc(out[:, -1, :])
        return out, hidden

    def init_hidden(self, batch_size):
        weight = next(self.parameters()).data
        hidden = (weight.new(self.n_layers, batch_size, self.hidden_dim).zero_(),
                  weight.new(self.n_layers, batch_size, self.hidden_dim).zero_())
        return hidden


In [7]:
import torch
import torch.optim as optim
import torch.nn as nn
from itertools import product

# Assuming x_train_scaled, y_train_scaled, x_val_scaled, y_val_scaled, x_test_scaled, y_test_scaled,
# input_size, output_size, test_window_size, scaler are available


    # Print hyperparameters only once
    print(f"Hyperparameters: {params}")

    input_size = 7
    output_size = 1

    model = LSTMModel(input_size, params['hidden_dim'], params['n_layers'], output_size)
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=params['learning_rate'])

    # Training using Walk-Forward Validation
    for i in range(params['window_size'], len(x_train)):
        optimizer.zero_grad()

        start_idx = i - params['window_size']
        end_idx = i
        x_window = torch.tensor(x_train_tensor[start_idx:end_idx], dtype=torch.float32)
        y_window = torch.tensor(y_train_tensor[start_idx:end_idx], dtype=torch.float32)
        x_window = x_window.view(1, params['window_size'], input_size)

        hidden = model.init_hidden(1)
        outputs, hidden = model(x_window, hidden)
        loss = criterion(outputs, y_window)

        loss.backward()
        optimizer.step()

        if i % params['batch_evaluation_frequency'] == 0:
            with torch.no_grad():
                x_val_window = torch.tensor(x_val_tensor[:params['window_size']], dtype=torch.float32)
                y_val_window = torch.tensor(y_val_tensor[:params['window_size']], dtype=torch.float32)
                x_val_window = x_val_window.view(1, params['window_size'], input_size)

                hidden = model.init_hidden(1)
                val_outputs, _ = model(x_val_window, hidden)
                val_loss = criterion(val_outputs, y_val_window)

                if i % 1000 == 0:  # Print every 1000 iterations
                    print(f"Iteration {i}, Train Loss: {loss.item()}, Validation Loss: {val_loss.item()}")

    # Test set evaluation
    with torch.no_grad():
        x_test_window = torch.tensor(x_test_tensor[:params['window_size']], dtype=torch.float32)
        y_test_window = torch.tensor(y_test_tensor[:params['window_size']], dtype=torch.float32)
        x_test_window = x_test_window.view(1, params['window_size'], input_size)

        hidden = model.init_hidden(1)
        test_outputs, _ = model(x_test_window, hidden)
        test_loss = criterion(test_outputs, y_test_window)

    print(f"Test Loss for parameters {params}: {test_loss.item()}")

    # Update the best_params if the current model performs better on the test set
    if best_params is None:
        best_params = params.copy()
        best_params['test_loss'] = test_loss.item()
    elif test_loss < best_params['test_loss']:
        best_params = params.copy()
        best_params['test_loss'] = test_loss.item()

print(f"Best Parameters: {best_params}")


Hyperparameters: {'learning_rate': 1e-05, 'window_size': 5, 'hidden_dim': 256, 'n_layers': 8, 'batch_evaluation_frequency': 5}


  x_window = torch.tensor(x_train_tensor[start_idx:end_idx], dtype=torch.float32)
  y_window = torch.tensor(y_train_tensor[start_idx:end_idx], dtype=torch.float32)
  return F.mse_loss(input, target, reduction=self.reduction)
  x_val_window = torch.tensor(x_val_tensor[:params['window_size']], dtype=torch.float32)
  y_val_window = torch.tensor(y_val_tensor[:params['window_size']], dtype=torch.float32)


Iteration 1000, Train Loss: 332.5583801269531, Validation Loss: 9417.529296875
Iteration 2000, Train Loss: 427.4424743652344, Validation Loss: 8965.142578125
Iteration 3000, Train Loss: 1652.799072265625, Validation Loss: 8539.595703125
Iteration 4000, Train Loss: 2.128542423248291, Validation Loss: 8366.58203125
Iteration 5000, Train Loss: 19.744075775146484, Validation Loss: 8042.1376953125
Iteration 6000, Train Loss: 1954.186279296875, Validation Loss: 7925.7958984375
Iteration 7000, Train Loss: 31323.359375, Validation Loss: 7336.06640625
Iteration 8000, Train Loss: 257966.65625, Validation Loss: 6757.73046875
Iteration 9000, Train Loss: 24523.64453125, Validation Loss: 6639.24755859375
Iteration 10000, Train Loss: 16394.1015625, Validation Loss: 6427.556640625


  x_test_window = torch.tensor(x_test_tensor[:params['window_size']], dtype=torch.float32)
  y_test_window = torch.tensor(y_test_tensor[:params['window_size']], dtype=torch.float32)


Test Loss for parameters {'learning_rate': 1e-05, 'window_size': 5, 'hidden_dim': 256, 'n_layers': 8, 'batch_evaluation_frequency': 5}: 1881.4996337890625
Hyperparameters: {'learning_rate': 1e-05, 'window_size': 5, 'hidden_dim': 256, 'n_layers': 8, 'batch_evaluation_frequency': 10}
Iteration 1000, Train Loss: 313.94427490234375, Validation Loss: 9337.021484375
Iteration 2000, Train Loss: 406.4568786621094, Validation Loss: 8888.2822265625
Iteration 3000, Train Loss: 1611.148193359375, Validation Loss: 8465.4970703125
Iteration 4000, Train Loss: 3.6939234733581543, Validation Loss: 8297.640625
Iteration 5000, Train Loss: 24.149866104125977, Validation Loss: 7977.3095703125
Iteration 6000, Train Loss: 1915.3638916015625, Validation Loss: 7866.20068359375
Iteration 7000, Train Loss: 31164.994140625, Validation Loss: 7279.61474609375
Iteration 8000, Train Loss: 257508.375, Validation Loss: 6705.1650390625
Iteration 9000, Train Loss: 24383.345703125, Validation Loss: 6587.91015625
Iteration