In [7]:
import pandas as pd
from sklearn.model_selection import train_test_split
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import cm
from sklearn import metrics
import numpy as npw
from sklearn.preprocessing import StandardScaler

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from sklearn.preprocessing import MinMaxScaler


# Read the CSV file
data = pd.read_csv("../data/data/aapl_raw_data.csv")

data = data.drop("date", axis=1)

data.isnull().sum()
data=data.fillna(0)  # Filling null values with zero
data.isnull().sum()

data = data.astype('float32')


# Keep data until 31.08.2023
data = data.iloc[:10731]

#print(data['open'].dtype)
#print(data.shape)

# Assuming 'data' is a pandas DataFrame
x_data = data[['open', 'high', 'low', 'volume', 'adjusted_close', 'change_percent', 'avg_vol_20d']]
y_data = data["close"]

# Now x_data and y_data are pandas DataFrames/Series, respectively

x_data.tail(1)




# Set the window size for training
train_window_size = 20

# Initialize lists to store training and temporary sets
x_train_list, y_train_list, x_temp_list, y_temp_list = [], [], [], []

# Iterate through the data with the specified window size
for i in range(0, len(x_data) - train_window_size, train_window_size + 1):
    x_train_temp = x_data.iloc[i:i+train_window_size+1]
    y_train_temp = y_data.iloc[i:i+train_window_size+1]

    # Separate the last row for the temporary set
    x_train = x_train_temp.iloc[:-1]
    y_train = y_train_temp.iloc[:-1]

    x_temp = x_train_temp.iloc[-1:]
    y_temp = y_train_temp.iloc[-1:]

    x_train_list.append(x_train)
    y_train_list.append(y_train)
    x_temp_list.append(x_temp)
    y_temp_list.append(y_temp)

# Concatenate the lists into pandas DataFrames
x_train = pd.concat(x_train_list)
y_train = pd.concat(y_train_list)
x_temp = pd.concat(x_temp_list)
y_temp = pd.concat(y_temp_list)

# print(y_train.head(50))
x_temp_train, x_temp_val, y_temp_train, y_temp_val = train_test_split(x_temp, y_temp, test_size=0.2, random_state=42)


# Split x_temp and y_temp into validation and test sets
x_val, x_test, y_val, y_test = train_test_split(
    x_temp, y_temp, test_size=0.5, random_state=42)


"""
# Print the last 5 rows of x_data
print("Last 5 rows of x_data:")
print(x_data.tail(5))

# Print the last 5 rows of x_train
print("\nLast 25 rows of x_train:")
print(x_train.tail(25))

print("\nLast 3 rows of y_train:")
print(y_temp.tail(3))
"""




scaler = MinMaxScaler()

x_train_normalized = scaler.fit_transform(x_train)
x_val_normalized = scaler.transform(x_val)
x_test_normalized = scaler.transform(x_test)

# Convert the data to PyTorch tensors
x_train_tensor = torch.tensor(x_train_normalized, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).view(-1, 1)

x_val_tensor = torch.tensor(x_val_normalized, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val.values, dtype=torch.float32).view(-1, 1)

x_test_tensor = torch.tensor(x_test_normalized, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32).view(-1, 1)




"""
# x_train_tensor inverse

x_test_original = scaler.inverse_transform(x_train_tensor.numpy())
print("\nFirst row of x_test_original:")
print(x_test_original[0])

print("\nFirst row of x_train:")
print(x_train.head(1))



print("\nLast row of x_test_original:")
print(x_test_original[-1])

print("\nLast row of x_train:")
print(x_train.tail(1))
"""



'\n# x_train_tensor inverse\n\nx_test_original = scaler.inverse_transform(x_train_tensor.numpy())\nprint("\nFirst row of x_test_original:")\nprint(x_test_original[0])\n\nprint("\nFirst row of x_train:")\nprint(x_train.head(1))\n\n\n\nprint("\nLast row of x_test_original:")\nprint(x_test_original[-1])\n\nprint("\nLast row of x_train:")\nprint(x_train.tail(1))\n'

(num_layers, batch_size, hidden_size)

In [8]:
print(x_train_tensor.size())
print(x_val_tensor.size())
print(x_test_tensor.size())

print(y_train_tensor.shape)
print(y_val_tensor.shape)
print(y_test_tensor.shape)
print(y_train_tensor.dtype)
print(y_val_tensor.dtype)
print(y_test_tensor.dtype)
print(y_train.shape)



torch.Size([10220, 7])
torch.Size([255, 7])
torch.Size([256, 7])
torch.Size([10220, 1])
torch.Size([255, 1])
torch.Size([256, 1])
torch.float32
torch.float32
torch.float32
(10220,)


In [10]:
import torch
import torch.nn as nn

class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers=1, learning_rate=0.001, window_size=1):
        super(LSTMModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.window_size = window_size
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, 1)
        self.learning_rate = learning_rate

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)

        out, _ = self.lstm(x, (h0, c0))

        out = self.fc(out[:, -1, :])
        return out

# Initialize the model with specified parameters

num_epochs = 40

input_size = 7  # Number of features
hidden_size = 64  # Number of LSTM units
num_layers = 1  # Number of LSTM layers
learning_rate = 0.002  # Learning rate
window_size = 1  # Window size
model = LSTMModel(input_size, hidden_size, num_layers, learning_rate, window_size)

# Define the loss function and optimizer
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# Define the number of epochs and other hyperparameters


# Walk-forward validation training
for epoch in range(num_epochs):
    model.train()  # Set the model to training mode
    running_loss = 0.0

    # Train the model using x_train_tensor and y_train_tensor
    for i in range(len(x_train_tensor)):
        inputs = x_train_tensor[i].unsqueeze(0).unsqueeze(0)  # Add two extra dimensions for batch and sequence length
        labels = y_train_tensor[i]

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    # Validate the model after each epoch using x_val_tensor and y_val_tensor
    model.eval()  # Set the model to evaluation mode
    val_loss = 0.0

    with torch.no_grad():
        for i in range(len(x_val_tensor)):
            inputs = x_val_tensor[i].unsqueeze(0).unsqueeze(0)
            labels = y_val_tensor[i]

            outputs = model(inputs)
            val_loss += criterion(outputs, labels)

    print(f'Epoch [{epoch + 1}/{num_epochs}], Training Loss: {running_loss / len(x_train_tensor)}, Validation Loss: {val_loss / len(x_val_tensor)}')

# After training, you can test the model similarly using x_test_tensor and y_test_tensor


Epoch [1/40], Training Loss: 3500.7135788455685, Validation Loss: 28938.677734375
Epoch [2/40], Training Loss: 704.0937008114194, Validation Loss: 10732.970703125
Epoch [3/40], Training Loss: 193.5464496340352, Validation Loss: 3901.673583984375
Epoch [4/40], Training Loss: 69.77921395364974, Validation Loss: 1730.45166015625
Epoch [5/40], Training Loss: 38.714995886982585, Validation Loss: 523.294677734375
Epoch [6/40], Training Loss: 24.947766534454505, Validation Loss: 280.80755615234375
Epoch [7/40], Training Loss: 18.148202322890132, Validation Loss: 168.93051147460938
Epoch [8/40], Training Loss: 13.533390395611642, Validation Loss: 110.64844512939453
Epoch [9/40], Training Loss: 10.760460673573643, Validation Loss: 78.00726318359375
Epoch [10/40], Training Loss: 9.135634566030348, Validation Loss: 66.69690704345703
Epoch [11/40], Training Loss: 8.120729917929394, Validation Loss: 65.50001525878906
Epoch [12/40], Training Loss: 7.448811996288567, Validation Loss: 67.7917938232421

In [None]:
import torch
import torch.nn as nn

class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers=1, learning_rate=0.001, window_size=1):
        super(LSTMModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.window_size = window_size
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, 1)
        self.learning_rate = learning_rate

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)

        out, _ = self.lstm(x, (h0, c0))

        out = self.fc(out[:, -1, :])
        return out

# Initialize the model with specified parameters


model = LSTMModel(input_size, hidden_size, num_layers, learning_rate, window_size)


input_size = 7  # Number of features
hidden_size = 64  # Number of LSTM units
num_layers = 1  # Number of LSTM layers
learning_rate = 0.002  # Learning rate
window_size = 1  # Window size

num_epochs = 50


# Define the loss function and optimizer
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# Define the number of epochs and other hyperparameters


# Walk-forward validation training
for epoch in range(num_epochs):
    model.train()  # Set the model to training mode
    running_loss = 0.0

    # Train the model using x_train_tensor and y_train_tensor
    for i in range(len(x_train_tensor)):
        inputs = x_train_tensor[i].unsqueeze(0).unsqueeze(0)  # Add two extra dimensions for batch and sequence length
        labels = y_train_tensor[i]

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    # Validate the model after each epoch using x_val_tensor and y_val_tensor
    model.eval()  # Set the model to evaluation mode
    val_loss = 0.0

    with torch.no_grad():
        for i in range(len(x_val_tensor)):
            inputs = x_val_tensor[i].unsqueeze(0).unsqueeze(0)
            labels = y_val_tensor[i]

            outputs = model(inputs)
            val_loss += criterion(outputs, labels)

    print(f'Epoch [{epoch + 1}/{num_epochs}], Training Loss: {running_loss / len(x_train_tensor)}, Validation Loss: {val_loss / len(x_val_tensor)}')

# After training, you can test the model similarly using x_test_tensor and y_test_tensor


NameError: name 'input_size' is not defined

In [None]:
ä

NameError: name 'ä' is not defined

In [None]:
import torch
import torch.nn as nn

class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers=1):
        super(LSTMModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, 1)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)

        out, _ = self.lstm(x, (h0, c0))

        out = self.fc(out[:, -1, :])
        return out

# Initialize the model
input_size = 7  # Number of features
hidden_size = 64  # Number of LSTM units
num_layers = 1  # Number of LSTM layers
model = LSTMModel(input_size, hidden_size, num_layers)

# Define the loss function and optimizer
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Define the number of epochs and other hyperparameters
num_epochs = 20

# Walk-forward validation training
for epoch in range(num_epochs):
    model.train()  # Set the model to training mode
    running_loss = 0.0

    # Train the model using x_train_tensor and y_train_tensor
    for i in range(len(x_train_tensor)):
        inputs = x_train_tensor[i].unsqueeze(0).unsqueeze(0)  # Add two extra dimensions for batch and sequence length
        labels = y_train_tensor[i]

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    # Validate the model after each epoch using x_val_tensor and y_val_tensor
    model.eval()  # Set the model to evaluation mode
    val_loss = 0.0

    with torch.no_grad():
        for i in range(len(x_val_tensor)):
            inputs = x_val_tensor[i].unsqueeze(0).unsqueeze(0)
            labels = y_val_tensor[i]

            outputs = model(inputs)
            val_loss += criterion(outputs, labels)

    print(f'Epoch [{epoch + 1}/{num_epochs}], Training Loss: {running_loss / len(x_train_tensor)}, Validation Loss: {val_loss / len(x_val_tensor)}')

# After training, you can test the model similarly using x_test_tensor and y_test_tensor


  return F.mse_loss(input, target, reduction=self.reduction)


Epoch [1/20], Training Loss: 7866.308846315957, Validation Loss: 43718.625
Epoch [2/20], Training Loss: 3021.7532946506185, Validation Loss: 22354.06640625
Epoch [3/20], Training Loss: 1157.344041200229, Validation Loss: 11424.998046875
Epoch [4/20], Training Loss: 539.100078871751, Validation Loss: 3254.304931640625
Epoch [5/20], Training Loss: 266.58026361648194, Validation Loss: 1525.247314453125
Epoch [6/20], Training Loss: 141.06373034616854, Validation Loss: 1941.3118896484375
Epoch [7/20], Training Loss: 78.17904310129695, Validation Loss: 971.2310180664062
Epoch [8/20], Training Loss: 47.265845416082264, Validation Loss: 406.8347473144531
Epoch [9/20], Training Loss: 33.74324130602129, Validation Loss: 200.84503173828125
Epoch [10/20], Training Loss: 25.14998716014659, Validation Loss: 115.7063980102539
Epoch [11/20], Training Loss: 19.457609586389943, Validation Loss: 73.5960693359375
Epoch [12/20], Training Loss: 15.93100107560381, Validation Loss: 48.07429885864258
Epoch [13