In [245]:
# pip uninstall torch torchvision torchaudio -y


In [246]:
# pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118

In [314]:
import torch

print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")

if torch.cuda.is_available():
    print(f"Device name: {torch.cuda.get_device_name(0)}")
else:
    print("CUDA is not available.")

PyTorch version: 2.5.0+cu118
CUDA available: True
Device name: NVIDIA GeForce RTX 2060


In [316]:
# !pip install yfinance

In [318]:
import math
import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

import yfinance as yf

In [320]:
class TimeSeriesTransformer(nn.Module):
    def __init__(self, input_dim, d_model, nhead, num_layers, dim_feedforward, dropout=0.1):
        super(TimeSeriesTransformer, self).__init__()
        self.model_type = 'Transformer'
        self.d_model = d_model

        # Set batch_first=True here
        self.positional_encoding = PositionalEncoding(d_model, dropout, batch_first=True)
        encoder_layers = nn.TransformerEncoderLayer(d_model, nhead, dim_feedforward, dropout, batch_first=True)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layers, num_layers)

        self.input_projection = nn.Linear(input_dim, d_model)
        self.decoder = nn.Linear(d_model, 1)

    def forward(self, src):
        # Now src shape should be [batch_size, sequence_length, input_dim]
        src = self.input_projection(src) * math.sqrt(self.d_model)
        src = self.positional_encoding(src)
        output = self.transformer_encoder(src)
        output = output.mean(dim=1)  # Global average pooling over the sequence length
        output = self.decoder(output)
        return output

class PositionalEncoding(nn.Module):
    def __init__(self, d_model, dropout=0.1, max_len=5000, batch_first=False):
        super(PositionalEncoding, self).__init__()
        self.dropout = nn.Dropout(p=dropout)
        self.batch_first = batch_first
        
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float32).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).to(torch.float32) * (-math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        if d_model % 2 == 1:
            pe[:, 1::2] = torch.cos(position * div_term[:-1])
        else:
            pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0)  # Shape: [1, max_len, d_model]
        self.register_buffer('pe', pe)

    def forward(self, x):
        # x shape: [batch_size, sequence_length, d_model]
        if self.batch_first:
            x = x + self.pe[:, :x.size(1)]
        else:
            x = x + self.pe[:x.size(0)]
        return self.dropout(x)


In [322]:
def calculate_bollinger_bands(data, window=10, num_of_std=2):
    """Calculate Bollinger Bands"""
    rolling_mean = data.rolling(window=window).mean()
    rolling_std = data.rolling(window=window).std()
    upper_band = rolling_mean + (rolling_std * num_of_std)
    lower_band = rolling_mean - (rolling_std * num_of_std)
    return upper_band, lower_band

def calculate_rsi(data, window=10):
    """Calculate Relative Strength Index"""
    delta = data.diff()
    gain = delta.clip(lower=0)
    loss = -delta.clip(upper=0)
    avg_gain = gain.rolling(window=window, min_periods=1).mean()
    avg_loss = loss.rolling(window=window, min_periods=1).mean()
    rs = avg_gain / avg_loss
    rsi = 100 - (100 / (1 + rs))
    return rsi

def calculate_roc(data, periods=10):
    """Calculate Rate of Change."""
    roc = ((data - data.shift(periods)) / data.shift(periods)) * 100
    return roc

In [509]:
tickers = ['META', 'AAPL', 'MSFT', 'AMZN', 'GOOG']

# Download historical data for each ticker
ticker_data_frames = []
stats = {}

for ticker in tickers:
    print(f"Processing {ticker}...")
    # Download historical data
    data = yf.download(ticker, period="5y", interval="1d")
    
    if data.empty:
        print(f"No data found for {ticker}. Skipping.")
        continue  # Skip to the next ticker if no data is found

    # Use 'Open' and 'Close' prices, converting to Series
    open_price = data['Open'].squeeze()
    close_price = data['Close'].squeeze()
    high = data['High'].squeeze()
    low = data['Low'].squeeze()
    volume = data['Volume'].squeeze()

    # Calculate additional indicators if needed
    daily_return = close_price.pct_change()
    moving_avg_10 = close_price.rolling(window=10).mean()
    moving_avg_30 = close_price.rolling(window=30).mean()
    bollinger_upper, bollinger_lower = calculate_bollinger_bands(close_price)
    rsi = calculate_rsi(close_price)
    roc = calculate_roc(close_price)

    # Create a dictionary of variables
    variables = {
        ticker + '_open': open_price,
        ticker + '_close': close_price,
        ticker + '_high': high,
        ticker + '_low': low,
        ticker + '_volume': volume,
        ticker + '_daily_return': daily_return,
        ticker + '_ma10': moving_avg_10,
        ticker + '_ma30': moving_avg_30,
        ticker + '_bollinger_upper': bollinger_upper,
        ticker + '_bollinger_lower': bollinger_lower,
        ticker + '_rsi': rsi,
        ticker + '_roc': roc,
    }

    # Verify that all variables are Series
    for name, var in variables.items():
        print(f"{name}: type={type(var)}, length={len(var)}")

    # Create DataFrame from the variables dictionary
    ticker_df = pd.DataFrame(variables)

    # Handle missing values
    ticker_df.dropna(inplace=True)

    if ticker_df.empty:
        print(f"No data left for {ticker} after dropping NaNs.")
        continue

    # Calculate mean and std for normalization
    MEAN = ticker_df.mean()
    STD = ticker_df.std()

    # Store stats for each feature
    for column in MEAN.index:
        stats[f"{column}_mean"] = MEAN[column]
        stats[f"{column}_std"] = STD[column]

    # Normalize the data
    ticker_df = (ticker_df - MEAN) / STD

    ticker_data_frames.append(ticker_df)
    print(f"Finished processing {ticker}.")


[*********************100%***********************]  1 of 1 completed

Processing META...
META_open: type=<class 'pandas.core.series.Series'>, length=1258
META_close: type=<class 'pandas.core.series.Series'>, length=1258
META_high: type=<class 'pandas.core.series.Series'>, length=1258
META_low: type=<class 'pandas.core.series.Series'>, length=1258
META_volume: type=<class 'pandas.core.series.Series'>, length=1258
META_daily_return: type=<class 'pandas.core.series.Series'>, length=1258
META_ma10: type=<class 'pandas.core.series.Series'>, length=1258
META_ma30: type=<class 'pandas.core.series.Series'>, length=1258
META_bollinger_upper: type=<class 'pandas.core.series.Series'>, length=1258
META_bollinger_lower: type=<class 'pandas.core.series.Series'>, length=1258
META_rsi: type=<class 'pandas.core.series.Series'>, length=1258
META_roc: type=<class 'pandas.core.series.Series'>, length=1258
Finished processing META.
Processing AAPL...



[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


AAPL_open: type=<class 'pandas.core.series.Series'>, length=1258
AAPL_close: type=<class 'pandas.core.series.Series'>, length=1258
AAPL_high: type=<class 'pandas.core.series.Series'>, length=1258
AAPL_low: type=<class 'pandas.core.series.Series'>, length=1258
AAPL_volume: type=<class 'pandas.core.series.Series'>, length=1258
AAPL_daily_return: type=<class 'pandas.core.series.Series'>, length=1258
AAPL_ma10: type=<class 'pandas.core.series.Series'>, length=1258
AAPL_ma30: type=<class 'pandas.core.series.Series'>, length=1258
AAPL_bollinger_upper: type=<class 'pandas.core.series.Series'>, length=1258
AAPL_bollinger_lower: type=<class 'pandas.core.series.Series'>, length=1258
AAPL_rsi: type=<class 'pandas.core.series.Series'>, length=1258
AAPL_roc: type=<class 'pandas.core.series.Series'>, length=1258
Finished processing AAPL.
Processing MSFT...
MSFT_open: type=<class 'pandas.core.series.Series'>, length=1258
MSFT_close: type=<class 'pandas.core.series.Series'>, length=1258
MSFT_high: typ

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


AMZN_open: type=<class 'pandas.core.series.Series'>, length=1258
AMZN_close: type=<class 'pandas.core.series.Series'>, length=1258
AMZN_high: type=<class 'pandas.core.series.Series'>, length=1258
AMZN_low: type=<class 'pandas.core.series.Series'>, length=1258
AMZN_volume: type=<class 'pandas.core.series.Series'>, length=1258
AMZN_daily_return: type=<class 'pandas.core.series.Series'>, length=1258
AMZN_ma10: type=<class 'pandas.core.series.Series'>, length=1258
AMZN_ma30: type=<class 'pandas.core.series.Series'>, length=1258
AMZN_bollinger_upper: type=<class 'pandas.core.series.Series'>, length=1258
AMZN_bollinger_lower: type=<class 'pandas.core.series.Series'>, length=1258
AMZN_rsi: type=<class 'pandas.core.series.Series'>, length=1258
AMZN_roc: type=<class 'pandas.core.series.Series'>, length=1258
Finished processing AMZN.
Processing GOOG...
GOOG_open: type=<class 'pandas.core.series.Series'>, length=1258
GOOG_close: type=<class 'pandas.core.series.Series'>, length=1258
GOOG_high: typ

In [511]:
# Combine all ticker data into a single DataFrame
df = pd.concat(ticker_data_frames, axis=1)

# Handle any remaining missing values
df.dropna(inplace=True)


In [513]:
SEQUENCE_LEN = 30  # Use the past 30 days to predict the next day's closing price

def create_sequences(data, labels, sequence_length=SEQUENCE_LEN):
    sequences = []
    lab = []
    data_size = len(data)

    for i in range(data_size - sequence_length):
        sequences.append(data[i:i + sequence_length])
        lab.append(labels[i + sequence_length])  # Predict the next day's value

    return np.array(sequences), np.array(lab)


In [515]:
sequences_list = []
labels_list = []

for ticker in tickers:
    # Extract features for the ticker
    features = df.filter(like=ticker).values

    # The label is the next day's closing price (unscaled)
    close_price = df[ticker + '_close'].values
    mean_close = stats[ticker + '_close_mean']
    std_close = stats[ticker + '_close_std']

    # Unscale the closing price for labels
    # unscaled_close = close_price * std_close + mean_close <- was causing issues due to inconsistent normalisation

    # Create sequences and labels
    ticker_sequences, ticker_labels = create_sequences(features, close_price)

    sequences_list.append(ticker_sequences)
    labels_list.append(ticker_labels)

# Combine data from all tickers
all_sequences = np.concatenate(sequences_list)
all_labels = np.concatenate(labels_list)

# Reshape sequences to [num_samples, seq_len, num_features]
# For PyTorch, we need to transpose to [seq_len, num_samples, num_features]
# all_sequences = all_sequences.transpose(1, 0, 2)


In [517]:
print(f"all_sequences.shape: {all_sequences.shape}")
print(f"all_labels.shape: {all_labels.shape}")


all_sequences.shape: (5995, 30, 12)
all_labels.shape: (5995,)


In [519]:
# split data

In [521]:
class TimeSeriesDataset(Dataset):
    def __init__(self, sequences, labels):
        self.sequences = sequences.astype(np.float32)  # [num_samples, sequence_length, num_features]
        self.labels = labels.astype(np.float32)        # [num_samples]

    def __len__(self):
        return self.sequences.shape[0]  # num_samples

    def __getitem__(self, idx):
        sequence = self.sequences[idx]  # Shape: [sequence_length, num_features]
        label = self.labels[idx]
        return sequence, label


# Split data into training, validation, and test sets
print(all_sequences.shape)
dataset_size = all_sequences.shape[0]
train_size = int(0.8 * dataset_size)
val_size = int(0.1 * dataset_size)
test_size = dataset_size - train_size - val_size
print(f"Train size: {train_size}, Validation size: {val_size}, Test size: {test_size}")

indices = np.random.permutation(dataset_size)
train_indices = indices[:train_size]
val_indices = indices[train_size:train_size+val_size]
test_indices = indices[train_size+val_size:]
print(f"Train indices: {len(train_indices)}, Validation indices: {len(val_indices)}, Test indices: {len(test_indices)}")

train_dataset = TimeSeriesDataset(all_sequences[train_indices, :, :], all_labels[train_indices])
val_dataset = TimeSeriesDataset(all_sequences[val_indices, :, :], all_labels[val_indices])
test_dataset = TimeSeriesDataset(all_sequences[test_indices, :, :], all_labels[test_indices])

# Create DataLoaders
BATCH_SIZE = 64

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE)


(5995, 30, 12)
Train size: 4796, Validation size: 599, Test size: 600
Train indices: 4796, Validation indices: 599, Test indices: 600


In [560]:
class EarlyStopping:
    def __init__(self, patience=10, verbose=False, delta=0, path='checkpoint.pth'):
        """
        Args:
            patience (int): How long to wait after last improvement.
            verbose (bool): If True, prints messages when an improvement is made.
            delta (float): Minimum change to qualify as an improvement.
            path (str): Path to save the model checkpoint.
        """
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.val_loss_min = np.Inf
        self.delta = delta
        self.path = path

    def __call__(self, val_loss, model):
        # print(val_loss)
        score = val_loss  # We want to minimize validation loss
        # print(f'\nself.best_score {self.best_score} \ncur score {score} \nand delta {self.delta}')

        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(val_loss, model)

        elif score < self.best_score + self.delta:
            self.counter += 1
            if self.verbose:
                print(f'EarlyStopping counter: {self.counter} out of {self.patience}, current best is {self.best_score}')

            if self.counter >= self.patience:
                self.early_stop = True

        else:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
            self.counter = 0

    def save_checkpoint(self, val_loss, model):
        """Saves model when directional accuracy increases."""
        if self.verbose:
            print(f'Validation accuracy increased ({self.val_loss_min:.6f} --> {val_loss:.6f}).  Saving model...')
        torch.save(model.state_dict(), self.path)
        self.val_loss_min = val_loss


In [592]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
input_dim = all_sequences.shape[2]  # Number of features
d_model = 64
nhead = 4
num_layers = 6
dim_feedforward = 128
dropout = 0.1

model = TimeSeriesTransformer(input_dim, d_model, nhead, num_layers, dim_feedforward, dropout)
model = model.to(device)

criterion = nn.L1Loss()  

In [600]:
from torch.optim.lr_scheduler import ReduceLROnPlateau

optimizer = optim.Adam(model.parameters(), lr=0.002)
scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.9, patience=20, verbose=True)



In [602]:
EPOCHS = 10000

# Initialize EarlyStopping object
early_stopping = EarlyStopping(patience=100, verbose=True, path='best_model.pth')

for epoch in range(EPOCHS):
    # Training 
    model.train()
    train_loss = 0.0
    for sequences, labels in train_loader:
        sequences = sequences.to(device)  # [batch_size, sequence_length, num_features]
        labels = labels.to(device)        # [batch_size]

        optimizer.zero_grad()
        outputs = model(sequences)        # [batch_size, 1]
        outputs = outputs.squeeze()       # [batch_size]

        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        train_loss += loss.item() * labels.size(0)

    avg_train_loss = train_loss / len(train_loader.dataset)

    # Validation 
    model.eval()
    val_loss = 0.0
    val_directional_accuracy = 0.0
    with torch.no_grad():
        for sequences, labels in val_loader:
            sequences = sequences.to(device)
            labels = labels.to(device)

            outputs = model(sequences)
            outputs = outputs.squeeze()

            loss = criterion(outputs, labels)
            val_loss += loss.item() * labels.size(0)

            # Compute directional accuracy
            prev_close = sequences[:, -1, 0]
            true_diff = labels - prev_close
            pred_diff = outputs - prev_close
            correct_direction = torch.eq(torch.sign(true_diff), torch.sign(pred_diff)).float()
            val_directional_accuracy += correct_direction.sum().item()

    avg_val_loss = val_loss / len(val_loader.dataset)
    avg_val_accuracy = val_directional_accuracy / len(val_loader.dataset)

    print(f"Epoch {epoch+1}/{EPOCHS}, "
          f"Train Loss: {avg_train_loss:.4f}, "
          f"Val Loss: {avg_val_loss:.4f}, "
          f"Val Directional Accuracy: {avg_val_accuracy:.4f} \n")

    # Early stopping
    early_stopping(avg_val_accuracy, model)

    if early_stopping.early_stop:
        print("Early stopping triggered. Training stopped.")
        break

    model.load_state_dict(torch.load('best_model.pth'))

Epoch 1/10000, Train Loss: 0.1232, Val Loss: 0.0851, Val Directional Accuracy: 0.6144 

Validation accuracy increased (inf --> 0.614357).  Saving model...


  model.load_state_dict(torch.load('best_model.pth'))


Epoch 2/10000, Train Loss: 0.0826, Val Loss: 0.0703, Val Directional Accuracy: 0.6544 

Validation accuracy increased (0.614357 --> 0.654424).  Saving model...
Epoch 3/10000, Train Loss: 0.0838, Val Loss: 0.0634, Val Directional Accuracy: 0.6444 

EarlyStopping counter: 1 out of 100, current best is 0.654424040066778
Epoch 4/10000, Train Loss: 0.0890, Val Loss: 0.0688, Val Directional Accuracy: 0.6694 

Validation accuracy increased (0.654424 --> 0.669449).  Saving model...
Epoch 5/10000, Train Loss: 0.0833, Val Loss: 0.0692, Val Directional Accuracy: 0.6511 

EarlyStopping counter: 1 out of 100, current best is 0.669449081803005
Epoch 6/10000, Train Loss: 0.0895, Val Loss: 0.0899, Val Directional Accuracy: 0.6144 

EarlyStopping counter: 2 out of 100, current best is 0.669449081803005
Epoch 7/10000, Train Loss: 0.0839, Val Loss: 0.0761, Val Directional Accuracy: 0.6611 

EarlyStopping counter: 3 out of 100, current best is 0.669449081803005
Epoch 8/10000, Train Loss: 0.0891, Val Loss:

In [604]:
model.load_state_dict(torch.load('best_model.pth', weights_only=True))

model.eval()
test_loss = 0.0
test_directional_accuracy = 0.0
with torch.no_grad():
    for sequences, labels in test_loader:
        sequences = sequences.to(device)
        labels = labels.to(device)

        outputs = model(sequences)
        outputs = outputs.squeeze()

        loss = criterion(outputs, labels)
        test_loss += loss.item() * labels.size(0)

        # Compute directional accuracy
        prev_close = sequences[:, -1, 0]
        true_diff = labels - prev_close
        pred_diff = outputs - prev_close
        correct_direction = torch.eq(torch.sign(true_diff), torch.sign(pred_diff)).float()
        test_directional_accuracy += correct_direction.sum().item()

avg_test_loss = test_loss / len(test_loader.dataset)
avg_test_accuracy = test_directional_accuracy / len(test_loader.dataset)

print(f"Test Loss: {avg_test_loss:.4f}, Test Directional Accuracy: {avg_test_accuracy:.4f}")


Test Loss: 0.0662, Test Directional Accuracy: 0.6600
