In [10]:
import pandas as pd
import numpy as np
import torch.optim as optim
from torch.cuda.amp import GradScaler, autocast
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import torch
from torch.utils.data import DataLoader, Dataset



In [26]:
print("PyTorch Version:", torch.__version__)
print("CUDA Available:", torch.cuda.is_available())
print("GPU Name:", torch.cuda.get_device_name(0) if torch.cuda.is_available() else "No GPU")


PyTorch Version: 2.5.1
CUDA Available: False
GPU Name: No GPU


In [11]:
def hyperbolic_discounting_loss(preds, targets, confs, beta=0.5):
    """
    Calculate loss with hyperbolic discounting and confidence weighting.
    Args:
        preds: Predicted prices (batch_size, 4)
        targets: True prices (batch_size, 4)
        confs: Predicted confidence values (batch_size, 4)
        beta: Hyperbolic discount factor (smaller = more discounting for distant horizons)
    """
    time_horizons = torch.tensor([1, 5, 21, 252], device=preds.device)  # Trading days
    discount_weights = 1 / (1 + beta * time_horizons)
    mse_loss = (preds - targets).pow(2)
    weighted_loss = discount_weights * mse_loss / (confs + 1e-6)  # Confidence adjustment
    confidence_penalty = torch.log(confs + 1e-6).mean()  # Encourage confident predictions
    return weighted_loss.mean() + 0.01 * confidence_penalty  # Regularize confidence


In [18]:
# read data
input_data = pd.read_csv('../data/training_raw.csv')

# Define horizons for target predictions (trading days)
horizons = {"1_day": 1, "1_week": 5, "1_month": 21, "1_year": 252}

# Step 1: Create Target Columns
def create_targets(df, target_column, horizons):
    """
    Generate shifted target columns for specified horizons.
    """
    for horizon, shift in horizons.items():
        df[f"Target_{horizon}"] = df[target_column].shift(-shift)
    return df

# Step 2: Normalize Features
def normalize_features(df, exclude_columns):
    """
    Normalize input features using Min-Max scaling, excluding specified columns.
    """
    scaler = MinMaxScaler()
    input_features = df.drop(columns=exclude_columns)
    scaled_features = scaler.fit_transform(input_features)
    
    # Reconstruct normalized DataFrame
    normalized_df = pd.DataFrame(scaled_features, columns=input_features.columns, index=input_features.index)
    for column in exclude_columns:
        normalized_df[column] = df[column]  # Add back excluded columns (e.g., targets)
    
    return normalized_df, scaler

# Step 3: Create Sequences for Transformer Input
def create_sequences(data, target_columns, seq_length):
    """
    Create sequences for transformer input with specified sequence length.
    """
    X, y = [], []
    data_values = data.drop(columns=target_columns).values
    target_values = data[target_columns].values

    for i in range(len(data) - seq_length):
        X.append(data_values[i:i + seq_length])
        y.append(target_values[i + seq_length])
    
    return torch.tensor(X, dtype=torch.float32), torch.tensor(y, dtype=torch.float32)

# Step 4: Custom Dataset for PyTorch
class StockDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

# Full Data Preparation Pipeline
def prepare_data_pipeline(df, target_column, sequence_length=30):
    """
    Prepare data for transformer input: normalize, create targets, and generate sequences.
    """
    # Create targets
    df = create_targets(df, target_column, horizons)

    # Normalize features
    exclude_columns = [f"Target_{horizon}" for horizon in horizons.keys()]
    df, scaler = normalize_features(df, exclude_columns)

    # Drop NaN values
    df.dropna(inplace=True)

    # Split data into training, validation, and test sets
    train_df, test_df = train_test_split(df, test_size=0.2, shuffle=False)
    train_df, val_df = train_test_split(train_df, test_size=0.2, shuffle=False)

    # Create sequences
    target_columns = exclude_columns
    train_X, train_y = create_sequences(train_df, target_columns, sequence_length)
    val_X, val_y = create_sequences(val_df, target_columns, sequence_length)
    test_X, test_y = create_sequences(test_df, target_columns, sequence_length)

    # Create DataLoaders
    batch_size = 32
    train_loader = DataLoader(StockDataset(train_X, train_y), batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(StockDataset(val_X, val_y), batch_size=batch_size, shuffle=False)
    test_loader = DataLoader(StockDataset(test_X, test_y), batch_size=batch_size, shuffle=False)

    return train_loader, val_loader, test_loader, scaler

# Example Usage
# Assuming combined_df is the enriched DataFrame
target_column = "COST_Close"
sequence_length = 256 # 21 = 1 month, 252 = 1 year

# Prepare the data
input_data = input_data.drop(columns = ['Date'])  # Reset index to avoid misalignment
train_loader, val_loader, test_loader, scaler = prepare_data_pipeline(input_data, target_column, sequence_length)

# Display the shapes of the datasets
print(f"Train Loader: {len(train_loader.dataset)} samples")     
print(f"Validation Loader: {len(val_loader.dataset)} samples")
print(f"Test Loader: {len(test_loader.dataset)} samples")


  return torch.tensor(X, dtype=torch.float32), torch.tensor(y, dtype=torch.float32)


Train Loader: 2059 samples
Validation Loader: 323 samples
Test Loader: 468 samples


In [19]:
def train_model(model, train_loader, val_loader, epochs, device):
    model = model.to(device)
    optimizer = optim.Adam(model.parameters(), lr=1e-4)
    scaler = GradScaler()  # For mixed precision

    for epoch in range(epochs):
        model.train()
        train_loss = 0
        for X, y in train_loader:
            X, y = X.to(device), y.to(device)
            optimizer.zero_grad()

            with autocast():  # Enable mixed precision
                price_preds, conf_preds = model(X)
                loss = hyperbolic_discounting_loss(price_preds, y, conf_preds)

            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
            train_loss += loss.item()

        print(f"Epoch {epoch + 1}/{epochs}, Train Loss: {train_loss / len(train_loader):.4f}")

        # Validation loop
        model.eval()
        val_loss = 0
        with torch.no_grad():
            for X, y in val_loader:
                X, y = X.to(device), y.to(device)
                price_preds, conf_preds = model(X)
                loss = hyperbolic_discounting_loss(price_preds, y, conf_preds)
                val_loss += loss.item()

        print(f"Epoch {epoch + 1}/{epochs}, Val Loss: {val_loss / len(val_loader):.4f}")


In [21]:
# Model parameters
embed_dim = 128
num_heads = 8
num_layers = 4

# get input dimensions from first batch
batch_X, _ = next(iter(train_loader))
input_dim = batch_X.shape[-1]
print(f"Input dimension: {input_dim}")

device = "cuda" if torch.cuda.is_available() else "cpu"



Input dimension: 247


In [None]:
# enable automatic mixed precision
from torch import nn    


In [None]:
from pathlib import Path
import sys

# Add the src directory to the system path if not already present
src_path = Path("../src").resolve()
if str(src_path) not in sys.path:
    sys.path.append(str(src_path))

from model_def import StockPriceTransformer

In [25]:
model = StockPriceTransformer(input_dim, embed_dim, num_heads, num_layers)

# Train the model
train_model(model, train_loader, val_loader, epochs=100, device=device)


  scaler = GradScaler()  # For mixed precision
  with autocast():  # Enable mixed precision


IndexError: index 0 is out of range