In [2]:
import pandas as pd
import os
import numpy as np

In [3]:
data = []

In [4]:
market = 'sp500' # [forbes2000, nasdaq, nyse, sp500]
year = '2017'

In [7]:
stock = pd.read_csv(f'dataset/sp500_{year}.csv')
static = pd.read_csv(f"dataset/sp500_{year}_static.csv", index_col=0)
stock['Date'] = pd.to_datetime(stock['Date'])
data = stock

In [8]:
data = data[~data.isna().any(axis=1)]

In [10]:
from sklearn.preprocessing import StandardScaler

scalers = {}
for ticker in data['Stock Name'].unique():
    scaler = StandardScaler()
    ticker_data = data[data['Stock Name'] == ticker][['Open', 'High', 'Low', 'Close', 'Adjusted Close']]
    scaled_data = scaler.fit_transform(ticker_data)
    data.loc[data['Stock Name'] == ticker, ['Open', 'High', 'Low', 'Close', 'Adjusted Close']] = scaled_data
    scalers[ticker] = scaler 

In [11]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
data['ticker'] = le.fit_transform(data['Stock Name'])

In [12]:
def create_sequences(data, target_col, input_length, output_length):
    sequences = []
    targets = []

    data = data.reset_index().sort_values(by=['ticker', 'Date'])


    feature_columns = ['Open', 'High', 'Low', 'Close', 'Adjusted Close']
    all_features = data[feature_columns].values  # Convert to NumPy array
    all_targets = data[target_col].values

    tickers = data['ticker'].unique()
    ticker_indices = data.groupby('ticker').apply(lambda x: x.index.to_numpy())

    for indices in ticker_indices:
        ticker_features = all_features[indices]
        ticker_targets = all_targets[indices]

        for i in range(len(indices) - input_length - output_length + 1):
            input_seq = ticker_features[i:i + input_length]
            target_seq = ticker_targets[i + input_length:i + input_length + output_length]

            sequences.append(input_seq)
            targets.append(target_seq)

    return np.array(sequences), np.array(targets)


In [13]:
input, targets = create_sequences(data, 'Adjusted Close', 90, 7)

  ticker_indices = data.groupby('ticker').apply(lambda x: x.index.to_numpy())


In [14]:
input

array([[[-1.44115541, -1.42996061, -1.43533341, -1.42574887,
         -1.41769825],
        [-1.41169717, -1.41160391, -1.40310422, -1.40776239,
         -1.40078189],
        [-1.40816221, -1.42063659, -1.41683147, -1.42427458,
         -1.41631213],
        ...,
        [-1.13213852, -1.1345057 , -1.12050142, -1.11968229,
         -1.12592429],
        [-1.11947147, -1.13304885, -1.12676819, -1.13737396,
         -1.14260489],
        [-1.14068132, -1.14936581, -1.135124  , -1.13855343,
         -1.14371684]],

       [[-1.41169717, -1.41160391, -1.40310422, -1.40776239,
         -1.40078189],
        [-1.40816221, -1.42063659, -1.41683147, -1.42427458,
         -1.41631213],
        [-1.42053462, -1.39149903, -1.41086306, -1.38151958,
         -1.37610025],
        ...,
        [-1.11947147, -1.13304885, -1.12676819, -1.13737396,
         -1.14260489],
        [-1.14068132, -1.14936581, -1.135124  , -1.13855343,
         -1.14371684],
        [-1.13979761, -1.14878305, -1.1303493 , 

In [15]:
import torch


In [16]:
input = torch.tensor(input)
target = torch.tensor(targets)

In [17]:
input.shape
# target.shape

torch.Size([566047, 90, 5])

In [106]:
# input = input.unsqueeze(-1)


In [19]:
from torch.utils.data import Dataset

class StockDataset(Dataset):
    def __init__(self, input, target, ):
        self.input = input
        self.target = target
    def __len__(self):
        return len(self.input)
    
    def __getitem__(self, idx):
        return self.input[idx], self.target[idx]

In [20]:
from torch.utils.data import random_split
from torch.utils.data import DataLoader

dataset = StockDataset(input, target)

In [24]:
dataset[0][1].shape

torch.Size([7])

In [25]:
import torch
import torch.nn as nn
import math


class PositionalEncoding(nn.Module):
    """Positional Encoding for time-series data."""
    def __init__(self, d_model, max_len=5000):
        super(PositionalEncoding, self).__init__()
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(
            torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model)
        )
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0)  # [1, max_len, d_model]
        self.register_buffer("pe", pe)

    def forward(self, x):

        return x + self.pe[:, : x.size(1), :]


class DecompositionLayer(nn.Module):
    """Decomposes time series into trend and seasonal components."""
    def __init__(self, kernel_size):
        super(DecompositionLayer, self).__init__()
        self.moving_avg = nn.AvgPool1d(kernel_size=kernel_size, stride=1, padding=(kernel_size - 1) // 2)

    def forward(self, x):
        # x: [batch_size, seq_len, feature_dim] or [batch_size, seq_len, feature_dim, 1]
        if len(x.shape) == 4:
            # Handle [batch_size, seq_len, feature_dim, 1]
            x = x.squeeze(-1)  # [batch_size, seq_len, feature_dim]

        # Apply moving average on dimension representing time
        # For AvgPool1d: input should be [batch, channels, sequence]
        # Here: batch = batch_size, channels = feature_dim, sequence = seq_len
        trend = self.moving_avg(x.permute(0, 2, 1)).permute(0, 2, 1)  # [batch_size, seq_len, feature_dim]
        seasonal = x - trend
        return seasonal, trend


class EncoderLayer(nn.Module):
    """Single Encoder Layer."""
    def __init__(self, d_model, n_heads, ff_dim, dropout=0.1):
        super(EncoderLayer, self).__init__()
        self.attn = nn.MultiheadAttention(embed_dim=d_model, num_heads=n_heads, dropout=dropout)
        self.ffn = nn.Sequential(
            nn.Linear(d_model, ff_dim),
            nn.ReLU(),
            nn.Linear(ff_dim, d_model)
        )
        self.norm1 = nn.LayerNorm(d_model)
        self.norm2 = nn.LayerNorm(d_model)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        # x: [seq_len, batch_size, d_model]
        attn_out, _ = self.attn(x, x, x)  # self-attention
        x = self.norm1(x + self.dropout(attn_out))
        ffn_out = self.ffn(x)
        x = self.norm2(x + self.dropout(ffn_out))
        return x  # [seq_len, batch_size, d_model]


class Encoder(nn.Module):
    """Autoformer Encoder."""
    def __init__(self, d_model, n_heads, ff_dim, num_layers, dropout=0.1):
        super(Encoder, self).__init__()
        self.layers = nn.ModuleList([EncoderLayer(d_model, n_heads, ff_dim, dropout) for _ in range(num_layers)])

    def forward(self, x):
        # x: [seq_len, batch_size, d_model]
        for layer in self.layers:
            x = layer(x)
        return x  # [seq_len, batch_size, d_model]


class DecoderLayer(nn.Module):
    """Single Decoder Layer."""
    def __init__(self, d_model, n_heads, ff_dim, dropout=0.1):
        super(DecoderLayer, self).__init__()
        self.self_attn = nn.MultiheadAttention(embed_dim=d_model, num_heads=n_heads, dropout=dropout)
        self.cross_attn = nn.MultiheadAttention(embed_dim=d_model, num_heads=n_heads, dropout=dropout)
        self.ffn = nn.Sequential(
            nn.Linear(d_model, ff_dim),
            nn.ReLU(),
            nn.Linear(ff_dim, d_model)
        )
        self.norm1 = nn.LayerNorm(d_model)
        self.norm2 = nn.LayerNorm(d_model)
        self.norm3 = nn.LayerNorm(d_model)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x, enc_output):
        # x, enc_output: [seq_len, batch_size, d_model]
        # Self-attention
        self_attn_out, _ = self.self_attn(x, x, x)
        x = self.norm1(x + self.dropout(self_attn_out))

        # Cross-attention
        cross_attn_out, _ = self.cross_attn(x, enc_output, enc_output)
        x = self.norm2(x + self.dropout(cross_attn_out))

        # Feed-forward
        ffn_out = self.ffn(x)
        x = self.norm3(x + self.dropout(ffn_out))

        return x  # [seq_len, batch_size, d_model]


class Decoder(nn.Module):
    """Autoformer Decoder."""
    def __init__(self, d_model, n_heads, ff_dim, num_layers, dropout=0.1):
        super(Decoder, self).__init__()
        self.layers = nn.ModuleList([DecoderLayer(d_model, n_heads, ff_dim, dropout) for _ in range(num_layers)])

    def forward(self, x, enc_output):
        # x: [seq_len, batch_size, d_model]
        # enc_output: [seq_len, batch_size, d_model]
        for layer in self.layers:
            x = layer(x, enc_output)
        return x  # [seq_len, batch_size, d_model]


class Autoformer(nn.Module):
    def __init__(self, input_dim, d_model, n_heads, ff_dim, num_layers, kernel_size, target_len, dropout=0.1):
        super(Autoformer, self).__init__()
        self.input_projection = nn.Linear(input_dim, d_model)
        self.positional_encoding = PositionalEncoding(d_model)
        self.decomposition = DecompositionLayer(kernel_size)
        self.encoder = Encoder(d_model, n_heads, ff_dim, num_layers, dropout)
        self.decoder = Decoder(d_model, n_heads, ff_dim, num_layers, dropout)
        self.output_projection = nn.Linear(d_model, 1)

    def prepare_decoder_input(self, target):
        shifted_target = torch.zeros_like(target)
        shifted_target[:, 1:] = target[:, :-1]
        return shifted_target

    def forward(self, x, target):
        
        seasonal, trend = self.decomposition(x)  
        enc_input = self.input_projection(seasonal)

        enc_input = self.positional_encoding(enc_input)  
        enc_input = enc_input.permute(1, 0, 2)

        enc_output = self.encoder(enc_input) 

        dec_input = self.prepare_decoder_input(target)

        # dec_input = self.input_projection(dec_input)
        dec_input = self.positional_encoding(dec_input)
        dec_input = dec_input.permute(1, 0, 2)

        dec_output = self.decoder(dec_input, enc_output)
        dec_output = dec_output.permute(1, 0, 2)

        output = self.output_projection(dec_output).squeeze(-1) 
        return output


In [26]:
from torch.utils.data import random_split
from torch.utils.data import DataLoader

dataset = StockDataset(input, target)

In [111]:
import torch
from torch.utils.data import DataLoader, Dataset, random_split
import torch.optim as optim
from tqdm import tqdm

def train_autoformer(
    model, train_loader, val_loader, num_epochs, device, learning_rate=0.0005, patience=5
):
    model.to(device)
    criterion = torch.nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode="min", factor=0.5, patience=2, verbose=True)
    best_val_loss = float("inf")
    epochs_no_improve = 0

    for epoch in range(num_epochs):
        print(f"Epoch {epoch + 1}/{num_epochs}")
        model.train()
        train_loss = 0.0

        train_progress = tqdm(train_loader, desc="Training", leave=False)
        for inputs, targets in train_progress:
            inputs, targets = inputs.float().to(device), targets.float().to(device)
            
            optimizer.zero_grad()
            outputs = model(inputs, targets.unsqueeze(-1))
            loss = criterion(outputs, targets)
            loss.backward()

            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            optimizer.step()
            train_loss += loss.item()

        train_loss /= len(train_loader)
        print(f"Train Loss: {train_loss:.4f}")
        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            val_progress = tqdm(val_loader, desc="Validating", leave=False)
            for inputs, targets in val_progress:
                inputs, targets = inputs.float().to(device), targets.float().to(device)
                outputs = model(inputs, targets.unsqueeze(-1))
                loss = criterion(outputs, targets)
                val_loss += loss.item()

        val_loss /= len(val_loader)
        print(f"Validation Loss: {val_loss:.4f}")
        scheduler.step(val_loss)

        if val_loss < best_val_loss:
            best_val_loss = val_loss
            torch.save(model.state_dict(), './best_long.pth')
            print(f"Model saved with validation loss: {val_loss:.4f}")
            epochs_no_improve = 0
        else:
            epochs_no_improve += 1
            if epochs_no_improve >= patience:
                print("Early stopping due to no improvement.")
                break


In [31]:
import torch
from torch.utils.data import DataLoader, Dataset, Subset
import torch.optim as optim
from tqdm import tqdm

if __name__ == "__main__":
    input_length = 90  # Length of input sequence
    output_length = 7  # Forecast horizon
    batch_size = 32
    num_epochs = 20
    d_model = 128
    n_heads = 8
    ff_dim = 128
    num_layers = 2
    kernel_size = 5
    dropout = 0.01
    device = "cuda" if torch.cuda.is_available() else "cpu"

    # Let's say we want:
    #  - 70% training
    #  - 15% validation
    #  - 15% testing
    total_len = len(dataset)
    train_end = int(total_len * 0.7)
    val_end = int(total_len * 0.85)  # 70% train + 15% val = 85% total

    train_dataset = Subset(dataset, range(0, train_end))
    val_dataset = Subset(dataset, range(train_end, val_end))
    test_dataset = Subset(dataset, range(val_end, total_len))

    # Create DataLoaders
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    # Initialize Autoformer Model
    model = Autoformer(
        input_dim=5,
        d_model=d_model,
        n_heads=n_heads,
        ff_dim=ff_dim,
        num_layers=num_layers,
        kernel_size=kernel_size,
        target_len=output_length,
        dropout=dropout,
    )
    model.load_state_dict(torch.load('best_long.pth'))
    model.to(device)

    for inputs, targets in train_loader:
        inputs, targets = inputs.float().to(device), targets.float().to(device)
        print(inputs.size())
        print(targets.unsqueeze(-1).size())
        outputs = model(inputs, targets.unsqueeze(-1))
        print(outputs.size())
        break




torch.Size([32, 90, 5])
torch.Size([32, 7, 1])
torch.Size([32, 7])


  model.load_state_dict(torch.load('best_long.pth'))


In [114]:

    # After training, evaluate on the test set:
    model.eval()
    test_loss = 0.0
    criterion = torch.nn.MSELoss()
    with torch.no_grad():
        for inputs, targets in tqdm(test_loader, desc="Testing"):
            inputs, targets = inputs.float().to(device), targets.float().to(device)
            outputs = model(inputs, targets.unsqueeze(-1))
            loss = criterion(outputs, targets)
            test_loss += loss.item()

    test_loss /= len(test_loader)
    print(f"Test Loss: {test_loss:.4f}")


Testing: 100%|██████████| 2654/2654 [00:07<00:00, 351.87it/s]

Test Loss: 0.0014



