In [41]:
import pandas as pd
from sklearn.model_selection import train_test_split
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import cm
from sklearn import metrics
import numpy as npw
from sklearn.preprocessing import StandardScaler

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from sklearn.preprocessing import MinMaxScaler


# Read the CSV file
data = pd.read_csv("../data/data/aapl_raw_data.csv")

data = data.drop("date", axis=1)

data.isnull().sum()
data=data.fillna(0)  # Filling null values with zero
data.isnull().sum()

data = data.astype('float32')


# Keep data until 31.08.2023
data = data.iloc[:10731]

#print(data['open'].dtype)
#print(data.shape)

# Assuming 'data' is a pandas DataFrame
x_data = data[['open', 'high', 'low', 'volume', 'adjusted_close', 'change_percent', 'avg_vol_20d']]
y_data = data["close"]

# Now x_data and y_data are pandas DataFrames/Series, respectively

x_data.tail(1)




# Set the window size for training
train_window_size = 20

# Initialize lists to store training and temporary sets
x_train_list, y_train_list, x_temp_list, y_temp_list = [], [], [], []

# Iterate through the data with the specified window size
for i in range(0, len(x_data) - train_window_size, train_window_size + 1):
    x_train_temp = x_data.iloc[i:i+train_window_size+1]
    y_train_temp = y_data.iloc[i:i+train_window_size+1]

    # Separate the last row for the temporary set
    x_train = x_train_temp.iloc[:-1]
    y_train = y_train_temp.iloc[:-1]

    x_temp = x_train_temp.iloc[-1:]
    y_temp = y_train_temp.iloc[-1:]

    x_train_list.append(x_train)
    y_train_list.append(y_train)
    x_temp_list.append(x_temp)
    y_temp_list.append(y_temp)

# Concatenate the lists into pandas DataFrames
x_train = pd.concat(x_train_list)
y_train = pd.concat(y_train_list)
x_temp = pd.concat(x_temp_list)
y_temp = pd.concat(y_temp_list)

# print(y_train.head(50))
x_temp_train, x_temp_val, y_temp_train, y_temp_val = train_test_split(x_temp, y_temp, test_size=0.2, random_state=42)


# Split x_temp and y_temp into validation and test sets
x_val, x_test, y_val, y_test = train_test_split(
    x_temp, y_temp, test_size=0.5, random_state=42)


"""
# Print the last 5 rows of x_data
print("Last 5 rows of x_data:")
print(x_data.tail(5))

# Print the last 5 rows of x_train
print("\nLast 25 rows of x_train:")
print(x_train.tail(25))

print("\nLast 3 rows of y_train:")
print(y_temp.tail(3))
"""




scaler = MinMaxScaler()

x_train_normalized = scaler.fit_transform(x_train)
x_val_normalized = scaler.transform(x_val)
x_test_normalized = scaler.transform(x_test)

# Convert the data to PyTorch tensors
x_train_tensor = torch.tensor(x_train_normalized, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).view(-1, 1)

x_val_tensor = torch.tensor(x_val_normalized, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val.values, dtype=torch.float32).view(-1, 1)

x_test_tensor = torch.tensor(x_test_normalized, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32).view(-1, 1)




"""
# x_train_tensor inverse

x_test_original = scaler.inverse_transform(x_train_tensor.numpy())
print("\nFirst row of x_test_original:")
print(x_test_original[0])

print("\nFirst row of x_train:")
print(x_train.head(1))



print("\nLast row of x_test_original:")
print(x_test_original[-1])

print("\nLast row of x_train:")
print(x_train.tail(1))
"""



'\n# x_train_tensor inverse\n\nx_test_original = scaler.inverse_transform(x_train_tensor.numpy())\nprint("\nFirst row of x_test_original:")\nprint(x_test_original[0])\n\nprint("\nFirst row of x_train:")\nprint(x_train.head(1))\n\n\n\nprint("\nLast row of x_test_original:")\nprint(x_test_original[-1])\n\nprint("\nLast row of x_train:")\nprint(x_train.tail(1))\n'

In [44]:
import torch
import torch.nn as nn
import torch.optim as optim

# Define the LSTM model
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(LSTMModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x, hx, cx):
        out, (hn, cn) = self.lstm(x, (hx, cx))
        out = self.fc(out[:, -1, :])  # Taking the output from the last time step
        return out, (hn, cn)

# Initialize model and other hyperparameters
input_size = 7  # Number of features
hidden_size = 64  # Number of hidden units
output_size = 1  # Number of output units
num_layers = 16  # Number of LSTM layers
learning_rate = 0.0001
window_size = 10
stride = 1
num_epochs = 1
print_interval = 1000

# Initialize model, criterion, and optimizer
model = LSTMModel(input_size, hidden_size, num_layers, output_size)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Initial hidden and cell states
hx = torch.zeros(num_layers, 1, hidden_size)  # Assuming batch size is 1 for simplicity
cx = torch.zeros(num_layers, 1, hidden_size)

# Training loop
for epoch in range(num_epochs):
    model.train()
    train_loss = 0.0

    # Iterate over the training data with a sliding window
    for i in range(0, len(x_train_tensor) - window_size + 1, stride):
        # Extract a segment of input and target sequences based on the window
        x_window = x_train_tensor[i:i+window_size].unsqueeze(0)  # Add batch dimension
        y_window = y_train_tensor[i:i+window_size]

        optimizer.zero_grad()
        outputs, (hn, cn) = model(x_window, hx, cx)  # Include initial hidden states
        loss = criterion(outputs.squeeze(0), y_window)  # Squeeze to match target dimensions
        loss.backward()
        optimizer.step()

        train_loss += loss.item()

        # Print the loss every print_interval iterations
        if (i + 1) % print_interval == 0:
            avg_loss = train_loss / min(print_interval, len(x_train_tensor) - window_size + 1 - i)
            print(f'Epoch [{epoch+1}/{num_epochs}], Iteration [{i+1}/{len(x_train_tensor) - window_size + 1}], Loss: {avg_loss:.6f}')
            train_loss = 0.0  # Reset the loss

    # Average the loss over all windows
    train_loss /= len(x_train_tensor) - window_size + 1
    train_losses.append(train_loss)

    # Validation and printing omitted for brevity


Epoch [1/3], Iteration [1000/10211], Loss: 501.533543
Epoch [1/3], Iteration [2000/10211], Loss: 729.443687
Epoch [1/3], Iteration [3000/10211], Loss: 823.493972
Epoch [1/3], Iteration [4000/10211], Loss: 109.409308
Epoch [1/3], Iteration [5000/10211], Loss: 1203.029473
Epoch [1/3], Iteration [6000/10211], Loss: 261.584988
Epoch [1/3], Iteration [7000/10211], Loss: 9343.494938
Epoch [1/3], Iteration [8000/10211], Loss: 159777.455990
Epoch [1/3], Iteration [9000/10211], Loss: 16581.715630
Epoch [1/3], Iteration [10000/10211], Loss: 115690.540478
Epoch [2/3], Iteration [1000/10211], Loss: 1225.690309
Epoch [2/3], Iteration [2000/10211], Loss: 681.830103
Epoch [2/3], Iteration [3000/10211], Loss: 177.644553
Epoch [2/3], Iteration [4000/10211], Loss: 671.220928
Epoch [2/3], Iteration [5000/10211], Loss: 874.370415
Epoch [2/3], Iteration [6000/10211], Loss: 198.423201
Epoch [2/3], Iteration [7000/10211], Loss: 7207.519338
Epoch [2/3], Iteration [8000/10211], Loss: 146655.841758
Epoch [2/3],

In [None]:
# Define the LSTM model
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(LSTMModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x, hx, cx):
        out, (hn, cn) = self.lstm(x, (hx, cx))
        out = self.fc(out[:, -1, :])  # Taking the output from the last time step
        return out, (hn, cn)

In [None]:
input_size = 7  # Number of features
hidden_size = 64  # Number of hidden units
output_size = 1  # Number of output units
num_layers = 128  # Number of LSTM layers

learning_rate = 0.0001  # Change this to your desired learning rate

for epoch in range(num_epochs):
    model.train()
    train_loss = 0.0

    # Iterate over the training data with a sliding window
    for i in range(0, len(x_train_tensor) - window_size + 1, stride):
        # Extract a segment of input and target sequences based on the window
        x_window = x_train_tensor[i:i+window_size]
        y_window = y_train_tensor[i:i+window_size]

        optimizer.zero_grad()
        outputs = model(x_window.unsqueeze(0))  # Add batch dimension
        loss = criterion(outputs.squeeze(0), y_window)  # Squeeze to match target dimensions
        loss.backward()
        optimizer.step()

        train_loss += loss.item()

        # Print the loss every 100 iterations
        if (i + 1) % print_interval == 0:
            avg_loss = train_loss / min(print_interval, len(x_train_tensor) - window_size + 1 - i)
            print(f'Epoch [{epoch+1}/{num_epochs}], Iteration [{i+1}/{len(x_train_tensor) - window_size + 1}], Loss: {avg_loss:.6f}')
            train_loss = 0.0  # Reset the loss

    # Average the loss over all windows
    train_loss /= len(x_train_tensor) - window_size + 1
    train_losses.append(train_loss)

    # Validation and printing omitted for brevity


Epoch [1/1], Iteration [1000/10211], Loss: 23.289828
Epoch [1/1], Iteration [2000/10211], Loss: 7.852781
Epoch [1/1], Iteration [3000/10211], Loss: 6.227853
Epoch [1/1], Iteration [4000/10211], Loss: 4.554815
Epoch [1/1], Iteration [5000/10211], Loss: 24.954045
Epoch [1/1], Iteration [6000/10211], Loss: 7.500409
Epoch [1/1], Iteration [7000/10211], Loss: 34.939914
Epoch [1/1], Iteration [8000/10211], Loss: 624.957047
Epoch [1/1], Iteration [9000/10211], Loss: 1074.647610
Epoch [1/1], Iteration [10000/10211], Loss: 2639.636837
