In [2]:
import tqdm
import pandas as pd
import numpy as np
from tqdm import tqdm, notebook
#tqdm_notebook().pandas()
#notebook.tqdm().pandas()
from sklearn.preprocessing import MinMaxScaler
import torch
import torch.nn as nn
import pytorch_lightning as pl
from torch import optim
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping
from pytorch_lightning.loggers import TensorBoardLogger
from torch.utils.data import DataLoader

In [None]:
rows = []

for _, row in tqdm(df.iterrows(), total=df.shape[0]):
  row_data = dict(
      day_of_week=row.date.day_of_week,
      day_of_month=row.date.day,
      week_of_year=row.date.week,
      month=row.date.month,
      open=row.open,
      high=row.high,
      low=row.low,
      close_change=row.close_change,
      close=row.close)
  rows.append(row_data)

features_df = pd.DataFrame(rows)

In [None]:
train_size = int(len(features_df)*0.9)

train_df, test_df = features_df[:train_size], features_df[train_size:]
print((len(train_df), len(test_df), len(features_df)))

In [None]:
scaler = MinMaxScaler(feature_range=(-1, 1))
scaler = scaler.fit(features_df)

In [None]:
train_df = pd.DataFrame(
    scaler.transform(train_df),
    index=train_df.index,
    columns=train_df.columns)

test_df = pd.DataFrame(
    scaler.transform(test_df),
    index=test_df.index,
    columns=test_df.columns)

In [None]:
def create_sequences(input_data: pd.DataFrame, target_column, sequence_length):
  sequences = []
  data_size = len(input_data)

  for i in tqdm(range(data_size - sequence_length)):
    sequence = input_data[i:i+sequence_length]

    label_position = i + sequence_length
    label = input_data.iloc[label_position][target_column]

    sequences.append((sequence, label))
  return sequences

In [None]:
SEQUENCE_LENGTH = 120

train_sequences = create_sequences(train_df, 'close', SEQUENCE_LENGTH)
test_sequences = create_sequences(test_df, 'close', SEQUENCE_LENGTH)

In [None]:
class BTCDataset:

  def __init__(self, sequences):
    self.sequences = sequences

  def __len__(self):
    return len(self.sequences)

  def __getitem__(self, idx):
    sequence, label = self.sequences[idx]

    return dict(
        sequence=torch.Tensor(sequence.to_numpy()),
        label=torch.tensor(label).float()
    )

In [None]:
class BTCPriceDataModule(pl.LightningDataModule):
  def __init__(self, train_sequences, test_sequences, batch_size=8):
    super().__init__()
    self.train_sequences = train_sequences
    self.test_sequences = test_sequences
    self.batch_size = batch_size

  def setup(self, stage=None):
    self.train_dataset = BTCDataset(self.train_sequences)
    self.test_dataset = BTCDataset(self.test_sequences)

  def train_dataloader(self):
    return DataLoader(
        self.train_dataset,
        batch_size=self.batch_size,
        shuffle=False,
        num_workers=2)

  def val_dataloader(self):
    return DataLoader(
        self.test_dataset,
        batch_size=1,
        shuffle=False,
        num_workers=1)

  def test_dataloader(self):
    return DataLoader(
        self.test_dataset,
        batch_size=1,
        shuffle=False,
        num_workers=1)

In [None]:
N_EPOCHS = 8
BATCH_SIZE = 64

data_module = BTCPriceDataModule(train_sequences, test_sequences, batch_size=BATCH_SIZE)
data_module.setup()

In [None]:
train_dataset = BTCDataset(train_sequences)

In [None]:
class PricePredictModel(nn.Module):
  def __init__(self, n_features, n_hidden=128, n_layers=2):
    super().__init__()
    self.n_hidden = n_hidden
    self.lstm = nn.LSTM(
        input_size = n_features,
        hidden_size = n_hidden,
        batch_first = True,
        num_layers = n_layers,
        dropout=0.2)
    self.regressor = nn.Linear(n_hidden, 1)

  def forward(self, x):
    self.lstm.flatten_parameters()
    _, (hidden, _) = self.lstm(x)
    out = hidden[-1]
    return self.regressor(out)

In [None]:
class BTCPricePredictor(pl.LightningModule):
  def __init__(self, n_features: int):
    super().__init__()
    self.model = PricePredictModel(n_features)
    self.criterion = nn.MSELoss()

  def forward(self, x, labels=None):
    output = self.model(x)
    loss = 0
    if labels is not None:
      loss = self.criterion(output, labels.unsqueeze(dim=1))
    return loss, output

  def training_step(self, batch, batch_idx):
    sequences = batch['sequence']
    labels = batch['label']

    loss, output = self(sequences, labels)
    self.log('train_loss', loss, prog_bar=True, logger=True)
    return loss

  def validation_step(self, batch, batch_idx):
    sequences = batch['sequence']
    labels = batch['label']

    loss, output = self(sequences, labels)
    self.log('val_loss', loss, prog_bar=True, logger=True)
    return loss

  def test_step(self, batch, batch_idx):
    sequences = batch['sequence']
    labels = batch['label']

    loss, output = self(sequences, labels)
    self.log('test_loss', loss, prog_bar=True, logger=True)
    return loss

  def configure_optimizers(self):
    return optim.AdamW(self.parameters(), lr=0.0001)

In [None]:
model = BTCPricePredictor(n_features=train_df.shape[1])

In [None]:
#from lightning_fabric.loggers import TensorBoardLogger
checkpoint_callback = ModelCheckpoint(
    dirpath = 'checkpoints',
    filename = 'best-checkpoint',
    save_top_k = 1,
    verbose=True,
    monitor = 'val_loss',
    mode = 'min'
)

logger = TensorBoardLogger('lightning_logs', name='btc-price')

early_stopping_callback = EarlyStopping(monitor='val_loss', patience=2)

trainer = pl.Trainer(
    logger = logger,
    callbacks=[checkpoint_callback, early_stopping_callback],
    max_epochs = N_EPOCHS,
    #gpus=1,
    accelerator="auto"
    #progress_bar_refresh_rate=30
)

In [None]:
trainer.fit(model, data_module)