In [55]:
import pandas as pd
from sklearn.model_selection import train_test_split
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import cm
from sklearn import metrics
import numpy as npw
from sklearn.preprocessing import StandardScaler

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from sklearn.preprocessing import MinMaxScaler


# Read the CSV file
data = pd.read_csv("../data/data/aapl_raw_data.csv")

data = data.drop("date", axis=1)

data.isnull().sum()
data=data.fillna(0)  # Filling null values with zero
data.isnull().sum()

data = data.astype('float32')


# Keep data until 31.08.2023
data = data.iloc[:10731]

#print(data['open'].dtype)
#print(data.shape)

# Assuming 'data' is a pandas DataFrame
x_data = data[['open', 'high', 'low', 'volume', 'adjusted_close', 'change_percent', 'avg_vol_20d']]
y_data = data["close"]

# Now x_data and y_data are pandas DataFrames/Series, respectively

x_data.tail(1)

Unnamed: 0,open,high,low,volume,adjusted_close,change_percent,avg_vol_20d
10730,191.410004,192.669998,190.240005,46778000.0,190.422897,-0.59,54388236.0


In [56]:
# Set the window size for training
train_window_size = 20

# Initialize lists to store training and temporary sets
x_train_list, y_train_list, x_temp_list, y_temp_list = [], [], [], []

# Iterate through the data with the specified window size
for i in range(0, len(x_data) - train_window_size, train_window_size + 1):
    x_train_temp = x_data.iloc[i:i+train_window_size+1]
    y_train_temp = y_data.iloc[i:i+train_window_size+1]

    # Separate the last row for the temporary set
    x_train = x_train_temp.iloc[:-1]
    y_train = y_train_temp.iloc[:-1]

    x_temp = x_train_temp.iloc[-1:]
    y_temp = y_train_temp.iloc[-1:]

    x_train_list.append(x_train)
    y_train_list.append(y_train)
    x_temp_list.append(x_temp)
    y_temp_list.append(y_temp)

# Concatenate the lists into pandas DataFrames
x_train = pd.concat(x_train_list)
y_train = pd.concat(y_train_list)
x_temp = pd.concat(x_temp_list)
y_temp = pd.concat(y_temp_list)

# print(y_train.head(50))
x_temp_train, x_temp_val, y_temp_train, y_temp_val = train_test_split(x_temp, y_temp, test_size=0.2, random_state=42)


# Split x_temp and y_temp into validation and test sets
x_val, x_test, y_val, y_test = train_test_split(
    x_temp, y_temp, test_size=0.5, random_state=42)


In [57]:
scaler = MinMaxScaler()

x_train_normalized = scaler.fit_transform(x_train)
x_val_normalized = scaler.transform(x_val)
x_test_normalized = scaler.transform(x_test)

# Convert the data to PyTorch tensors
x_train_tensor = torch.tensor(x_train_normalized, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).view(-1, 1)

x_val_tensor = torch.tensor(x_val_normalized, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val.values, dtype=torch.float32).view(-1, 1)

x_test_tensor = torch.tensor(x_test_normalized, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32).view(-1, 1)

In [58]:
input_size = x_train.shape[1]  # Assuming x_train is your input data
output_size = 1  # Assuming you're predicting a single value
hidden_size = 64  # You can adjust this based on your requirements
num_layers = 4  # The number of LSTM layers
batch_size = 64  # Adjust based on your preference and available memory
window_size = 50  # The window size for walk-forward validation
learning_rate = 0.001  # Set your desired learning rate
epochs = 70  # Set the number of training epochs

#model = LSTNet(input_size, hidden_size, output_size, num_layers=num_layers, window_size=window_size, learning_rate=learning_rate, epochs=epochs)


In [59]:
class LSTNet(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers=1, batch_size=1, window_size=1, learning_rate=0.001, epochs=10):
        super(LSTNet, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.num_layers = num_layers
        self.batch_size = batch_size
        self.window_size = window_size
        self.learning_rate = learning_rate
        self.epochs = epochs  # Added epochs parameter

        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        # Initialize hidden state with zeros
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).requires_grad_().to(x.device)

        # Initialize cell state
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).requires_grad_().to(x.device)

        # Reshape input tensor to (batch_size, seq_len, input_size)
        #x = x.view(self.batch_size, -1, self.input_size)
        x = x.view(-1, self.window_size, self.input_size)

        # We need to detach as we are doing truncated backpropagation through time (BPTT)
        # If we don't, we'll backprop all the way to the start even after going through another batch
        out, _ = self.lstm(x, (h0.detach(), c0.detach()))

        # Index hidden state of the last time step
        out = self.fc(out[:, -1, :])
        return out


In [60]:
# Initialize the LSTNet model
model = LSTNet(input_size, hidden_size, output_size, num_layers, batch_size, window_size, learning_rate, epochs)

# Define the loss function and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Lists to store training/validation loss for each epoch
train_loss_list, val_loss_list = [], []

# Training loop
for epoch in range(epochs):
    model.train()  # Set the model to training mode
    train_loss_epoch = 0.0

    # Iterate through the training data in a walk-forward manner
    for i in range(0, len(x_train_tensor) - window_size + 1):
        # Extract the current window of data
        x_batch = x_train_tensor[i:i + window_size].unsqueeze(0)
        y_batch = y_train_tensor[i + window_size - 1].unsqueeze(0)

        # Zero the gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(x_batch)
        loss = criterion(outputs, y_batch)

        # Backward pass and optimization
        loss.backward()
        optimizer.step()

        train_loss_epoch += loss.item()

    # Calculate average training loss for the epoch
    avg_train_loss = train_loss_epoch / len(x_train_tensor)
    train_loss_list.append(avg_train_loss)

    # Validation
    model.eval()  # Set the model to evaluation mode
    with torch.no_grad():
        # Iterate through the validation data in a walk-forward manner
        val_loss_epoch = 0.0
        for i in range(0, len(x_val_tensor) - window_size + 1):
            x_val_batch = x_val_tensor[i:i + window_size].unsqueeze(0)
            y_val_batch = y_val_tensor[i + window_size - 1].unsqueeze(0)

            val_outputs = model(x_val_batch)
            val_loss = criterion(val_outputs, y_val_batch)

            val_loss_epoch += val_loss.item()

        # Calculate average validation loss for the epoch
        avg_val_loss = val_loss_epoch / len(x_val_tensor)
        val_loss_list.append(avg_val_loss)

    # Print training and validation loss for each epoch (optional)
    print(f'Epoch [{epoch + 1}/{epochs}], Train Loss: {avg_train_loss:.4f}, Val Loss: {avg_val_loss:.4f}')

# Plot training and validation loss
plt.plot(train_loss_list, label='Training Loss')
plt.plot(val_loss_list, label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()


KeyboardInterrupt: 