# Data

In [None]:
 #!wget http://your_domain/klines_1m_BTC_USDT.parquet

In [None]:
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt

In [None]:
df = pd.read_parquet('klines_5m_BTC_USDT.parquet')
df

In [None]:
df = df.resample('30min').apply({
    'open': 'first',
    'close': 'last',
    'low': 'min',
    'high': 'max',
})
df

In [None]:
df['open'].shift(0)

In [None]:
# Look at the change between open and close of a candle, normalized by the average price of btc at this time
data = 2 * (df['close'] - df['open']) / (df['close'] + df['open'])
sns.histplot(data, kde=True).set(xlim=(-0.02, 0.02))

In [None]:
delta_price = df['close'] - df['close'].shift(1)
direction = (delta_price > 0) * 1.0 - (delta_price < 0) * 1.0
direction

In [None]:
plt.scatter(direction.iloc[:250].index, direction.iloc[:250])
plt.show()

In [None]:
price = (df['close'] - df['close'].shift(1)) / df['close']
price = price.bfill()

# Pytorch

## Dataset

In [None]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
SEQ_LEN = 512
split_date = '2024-01-01'

# Ensure alignment
assert price.index.equals(direction.index)

# Split by date
train_mask = price.index < split_date
val_mask = price.index >= split_date

price_train, price_val = price[train_mask], price[val_mask]
dir_train, dir_val = direction[train_mask], direction[val_mask]


In [None]:
class PriceToDirectionDataset(Dataset):
    def __init__(self, price_series, direction_series, seq_len=512):
        self.prices = price_series.values.astype(np.float32)
        self.directions = direction_series.values.astype(np.float32)
        self.seq_len = seq_len

        assert len(self.prices) == len(self.directions)

    def __len__(self):
        return len(self.prices) - self.seq_len

    def __getitem__(self, idx):
        x = self.prices[idx:idx + self.seq_len]
        y = self.directions[idx + self.seq_len]
        return torch.tensor(x).unsqueeze(-1), torch.tensor([y])


In [None]:
train_dataset = PriceToDirectionDataset(price_train, dir_train, seq_len=SEQ_LEN)
val_dataset = PriceToDirectionDataset(price_val, dir_val, seq_len=SEQ_LEN)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64)


## Model

In [None]:
import torch
import torch.nn as nn

class Forecast(nn.Module):
    """
    GRU-based sequence model for binary classification of time series direction.

    Given a sequence of prices (or features), the model outputs two scores (logits)
    at each time step, representing unnormalized probabilities for 'up' and 'down' directions.

    Final softmax is applied externally (e.g., in the loss function like nn.CrossEntropyLoss).
    """

    def __init__(self, input_dim=1, hidden_dim=16, num_layers=1):
        """
        Args:
            input_dim (int): Number of input features per time step.
            hidden_dim (int): Size of the GRU hidden state.
            num_layers (int): Number of stacked GRU layers.
        """
        super().__init__()
        self.gru = nn.GRU(input_dim, hidden_dim, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, 2)  # Output logits for two classes: [up, down]

    def forward(self, x):
        """
        Forward pass through the model.

        Args:
            x (Tensor): Input tensor of shape (batch, sequence_length, input_dim)

        Returns:
            Tensor: Output logits of shape (batch, sequence_length, 2)
                    representing scores for each class at each time step.
        """
        out, _ = self.gru(x)     # out shape: (batch, seq_len, hidden_dim)
        out = self.fc(out)       # out shape: (batch, seq_len, 2)
        return out


Generate some noise and run it through the model to confirm the proper implementation

In [None]:
x = torch.randn(2, 512, 1)  # batch of 2, seq len 512
model = Forecast()
y = model(x)
print(y)  # should be (2, 512, 1)

## Training

In [None]:
from tqdm import tqdm

class Trainer:
    def __init__(self, model, train_loader, val_loader, lr=1e-3):
        """
        Trainer for binary classification (up/down) based on GRU outputs.

        Model output: logits of shape (batch, time, 2)
        Target: float labels → mapped to class index:
            y >= 0 → 0 (up)
            y <  0 → 1 (down)
        """
        self.model = model.to(device)
        self.train_loader = train_loader
        self.val_loader = val_loader
        self.opt = torch.optim.Adam(self.model.parameters(), lr=lr)
        self.loss_fn = nn.CrossEntropyLoss()

        self.train_loss_steps = []
        self.val_loss_checkpoints = []

    def train(self, epochs=10):
        for epoch in range(epochs):
            self.model.train()
            pbar = tqdm(self.train_loader, desc=f"Epoch {epoch+1}", leave=False)
            val_check_interval = max(1, len(self.train_loader) // 10)

            for i, (x, y) in enumerate(pbar):
                x, y = x.to(device), y.to(device)

                # Convert float targets to class indices: y >= 0 → 0, y < 0 → 1
                y_class = (y.squeeze() < 0).long()  # True (down) → 1, False (up) → 0

                logits = self.model(x)[:, -1, :]  # shape: (batch, 2)
                loss = self.loss_fn(logits, y_class)

                assert not torch.isnan(x).any().any()

                self.opt.zero_grad()
                loss.backward()
                self.opt.step()

                self.train_loss_steps.append(loss.item())

                if (i + 1) % val_check_interval == 0:
                    val_loss = self.evaluate()
                    self.val_loss_checkpoints.append(val_loss)
                    pbar.set_postfix(train_loss=loss.item(), val_loss=val_loss)
                else:
                    pbar.set_postfix(train_loss=loss.item())

    def evaluate(self):
        self.model.eval()
        val_losses = []

        with torch.no_grad():
            for x, y in self.val_loader:
                x, y = x.to(device), y.to(device)
                y_class = (y.squeeze() < 0).long()
                logits = self.model(x)[:, -1, :]
                loss = self.loss_fn(logits, y_class)
                val_losses.append(loss.item())

        return sum(val_losses) / len(val_losses)

    def plot_losses(self, smooth_window=1024):
        """
        Plots training loss (smoothed) and validation loss.

        Args:
            smooth_window (int): Window size for moving average on train loss.
        """
        plt.figure(figsize=(10, 4))

        # Smooth train loss using moving average
        if len(self.train_loss_steps) >= smooth_window:
            kernel = np.ones(smooth_window) / smooth_window
            smoothed_train = np.convolve(self.train_loss_steps, kernel, mode='valid')
            plt.plot(smoothed_train, label=f"Train Loss (smoothed, {smooth_window})")
        else:
            plt.plot(self.train_loss_steps, label="Train Loss")

        # Plot val loss checkpoints
        val_x = np.linspace(0, len(self.train_loss_steps), len(self.val_loss_checkpoints))
        plt.plot(val_x, self.val_loss_checkpoints, label="Val Loss")

        plt.title("Training & Validation Loss")
        plt.xlabel("Train Steps")
        plt.ylabel("Loss")
        plt.legend()
        plt.grid(True)
        plt.tight_layout()
        plt.show()

In [None]:
trainer = Trainer(model, train_loader, val_loader, lr=1e-3)
trainer.train(epochs=10)

# access loss history
train_loss = trainer.train_loss_history
val_loss = trainer.val_loss_history


In [None]:
plt.plot(trainer.train_loss_steps)
plt.show()
plt.plot(trainer.val_loss_checkpoints)
plt.show()

In [None]:
trainer.plot_losses(trainer)

In [None]:
def plot_losses(self, smooth_window=64*4):
        """
        Plots training loss (smoothed) and validation loss.

        Args:
            smooth_window (int): Window size for moving average on train loss.
        """
        plt.figure(figsize=(10, 4))

        # Smooth train loss using moving average
        if len(self.train_loss_steps) >= smooth_window:
            kernel = np.ones(smooth_window) / smooth_window
            smoothed_train = np.convolve(self.train_loss_steps, kernel, mode='valid')
            plt.plot(smoothed_train, label=f"Train Loss (smoothed, {smooth_window})")
        else:
            plt.plot(self.train_loss_steps, label="Train Loss")

        # Plot val loss checkpoints
        val_x = np.linspace(0, len(self.train_loss_steps), len(self.val_loss_checkpoints))
        plt.plot(val_x, self.val_loss_checkpoints, label="Val Loss")

        plt.title("Training & Validation Loss")
        plt.xlabel("Train Steps")
        plt.ylabel("Loss")
        plt.legend()
        plt.grid(True)
        plt.tight_layout()
        plt.show()
trainer.plot_losses = plot_losses