In [1]:
import torch
import peewee
import pandas as pd
import torch.optim as optim
import torch.nn as nn

In [2]:
import sys
sys.path.append("../")

In [3]:
from src.model.data import create_dataloaders
from src.model.main import LSTM

In [4]:
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

#### Prepare Data

In [5]:
db = peewee.SqliteDatabase("../data/dataset.sqlite3")
conn = db.connection()
tables = db.get_tables()

In [6]:
stock = "AAPL"
stock_df = pd.read_sql(f"SELECT * FROM {tables[0]} WHERE symbol = '{stock}'", conn)
stock_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 338142 entries, 0 to 338141
Data columns (total 7 columns):
 #   Column     Non-Null Count   Dtype  
---  ------     --------------   -----  
 0   symbol     338142 non-null  object 
 1   timestamp  338142 non-null  object 
 2   open       338142 non-null  float64
 3   high       338142 non-null  float64
 4   low        338142 non-null  float64
 5   close      338142 non-null  float64
 6   volume     338142 non-null  float64
dtypes: float64(5), object(2)
memory usage: 18.1+ MB


#### Hyperparameters

In [7]:
input_size = 5  # OHLCV
hidden_size = 50
num_layers = 2
output_size = 5  # Predicting OHLCV

seq_length = 10
batch_size = 32
test_size = 0.2
num_epochs = 10
learning_rate = 0.001

In [8]:
train_dataloader, test_dataloader = create_dataloaders(stock_df, seq_length, batch_size, test_size)

DatetimeIndex(['2016-01-01 00:00:00+00:00', '2016-01-01 00:10:00+00:00',
               '2016-01-01 00:15:00+00:00', '2016-01-01 00:35:00+00:00',
               '2016-01-01 00:45:00+00:00', '2016-01-01 00:50:00+00:00',
               '2016-01-01 00:55:00+00:00', '2016-01-04 09:00:00+00:00',
               '2016-01-04 09:05:00+00:00', '2016-01-04 09:10:00+00:00',
               ...
               '2023-11-21 15:50:00+00:00', '2023-11-21 15:55:00+00:00',
               '2023-11-21 16:00:00+00:00', '2023-11-21 16:05:00+00:00',
               '2023-11-21 16:10:00+00:00', '2023-11-21 16:15:00+00:00',
               '2023-11-21 16:20:00+00:00', '2023-11-21 16:25:00+00:00',
               '2023-11-21 16:30:00+00:00', '2023-11-21 16:35:00+00:00'],
              dtype='datetime64[ns, UTC]', name='timestamp', length=338142, freq=None)


In [9]:
model = LSTM(input_size, hidden_size, num_layers, output_size, device)

In [10]:
device = model.device
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

for epoch in range(num_epochs):
    model.train()
    total_loss = 0

    for inputs, targets in train_dataloader:
        inputs, targets = inputs.float().to(device), targets.float().to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    average_loss = total_loss / len(train_dataloader)
    print(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {average_loss:.4f}")

print("Training finished.")

KeyError: 50497