In [1]:
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
import matplotlib.pyplot as plt

# Custom libraries
from Components.TrainModel import DataModule, TEMPUS
from Components.TickerData import TickerData
from Components.BackTesting import BackTesting

# Torch ML libraries
import torch
import torch.nn as nn
from torch.optim import AdamW

device = "cuda" if torch.cuda.is_available() else "cpu"
if device == "cuda":
    torch.backends.cuda.matmul.allow_tf32 = True
    torch.backends.cudnn.allow_tf32 = True

In [None]:
#TODO: Feature importance with SHAP values and plot
#TODO: hyperparameter tuning
#TODO: buy signals become if prediction > current by some delta (~5%). Reverse is sell (decrease by some delta). Senstitvity analysis should be conducted to compare this delta level
#TODO: Use quantstats for a HTMl tearsheet
#TODO: market-regime detector with Hiden-markov model
#TODO: Add a Echo State Networks (ESN) layer to the model
#TODO: randomly sample 50 tickers, run backtest for all of them, and plot. take average sharpe ratio, and other metrics

In [2]:
# Set the Wikipedia page title and section header
tickers = pd.read_html("https://en.wikipedia.org/wiki/Nasdaq-100")[4]
# Clean up the dataframe
tickers = tickers.iloc[:, [1]].to_numpy().flatten()

In [3]:
#tickers = ['IONQ','QBTS','RGTI']
training_dfs = []
stocks_dfs = []
for ticker in tickers:
    training_data, raw_stock_data = TickerData(ticker,years=10,prediction_window=5).process_all()
    training_dfs.append(training_data)
    stocks_dfs.append(raw_stock_data)

training_data = pd.concat(training_dfs, ignore_index=False)
stock_data = pd.concat(stocks_dfs, ignore_index=False)
training_data

Error while processing the data for CCEP
Error while merging data for CCEP; error: "['State'] not in index"


Unnamed: 0_level_0,Ticker,ema_20,ema_50,ema_100,stoch_rsi,macd,State,Close,shifted_prices
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2023-07-26 00:00:00-04:00,ADBE,510.347534,501.721580,498.307934,0.722119,8.426182,0,514.549988,530.299988
2023-07-27 00:00:00-04:00,ADBE,510.692528,502.201909,498.618074,0.573108,7.649928,0,513.969971,523.760010
2023-07-28 00:00:00-04:00,ADBE,512.423715,503.247716,499.217122,0.366788,8.143179,0,528.869995,526.880005
2023-07-31 00:00:00-04:00,ADBE,515.637646,504.930942,500.146881,0.364328,9.816886,0,546.169983,529.729980
2023-08-01 00:00:00-04:00,ADBE,518.824534,506.663061,501.116249,0.372044,11.250053,0,549.099976,520.599976
...,...,...,...,...,...,...,...,...,...
2025-03-17 00:00:00-04:00,ZS,198.903952,198.439884,195.948238,1.000000,-0.727792,0,202.649994,209.869995
2025-03-18 00:00:00-04:00,ZS,199.159766,198.563418,196.059956,0.913036,-0.414348,0,201.589996,215.729996
2025-03-19 00:00:00-04:00,ZS,199.615978,198.774656,196.216195,1.000000,0.024213,0,203.949997,211.550003
2025-03-20 00:00:00-04:00,ZS,199.990647,198.961925,196.361419,0.903473,0.335630,0,203.550003,209.449997


In [5]:
training_data.to_csv("Data/NASDAQ_100_TrainingData_v2.1.csv", index=True)
stock_data.to_csv("Data/NASDAQ_100_StockData_v2.1.csv", index=True)

In [2]:
training_data = pd.read_csv("Data/NASDAQ_100_TrainingData_v2.1.csv")
training_data = training_data.set_index(training_data['Date']).drop(columns=['Date'])

In [3]:
stock_data = pd.read_csv("Data/NASDAQ_100_StockData_v2.1.csv")
stock_data = stock_data.set_index(stock_data['Date']).drop(columns=['Date'])

In [6]:
# Automatically get the number of features given my data_module object

#Best config: {'lr': 4.390449033248878e-05, 'hidden_size': 256, 'num_layers': 1, 'dropout': 0.3477694988633191, 'weight_decay': 0.0001801390872725824, 'batch_size': 16, 'window_size': 10, 'grad_clip_norm': 0.8393802881451728}

config = {
    "lr": 4.390449033248878e-05,
    "weight_decay": 0.0001801390872725824,
    "hidden_size": 256,
    "num_layers": 1,
    "dropout": 0.3477694988633191,
    "batch_size": 16,
    "window_size": 50,
    "clip_size": 0.8393802881451728,
    "epochs": 20,
    "device": "cuda" if torch.cuda.is_available() else "cpu"
}

data_module = DataModule(training_data, window_size=config["window_size"], batch_size=config["batch_size"])
config["input_size"] = data_module.num_features

# Instantiate the model
model = TEMPUS(config)
# Set up loss and optimizer
criterion = nn.MSELoss()
optimizer = AdamW(model.parameters(), lr=config["lr"], weight_decay=config["weight_decay"])
# Train Model
history = model.train_model(data_module.train_loader, data_module.test_loader, criterion, optimizer, config["epochs"])

Training Epochs:   0%|          | 0/20 [00:00<?, ?it/s]


Best MAPE: 8.29%


In [7]:
def export_model_to_torchscript(model, save_path, data_loader, device):
    """
    Exports a PyTorch model to TorchScript format.

    Parameters:
        model (nn.Module): The trained PyTorch model to export.
        save_path (str): File name for the saved TorchScript model (including extension, e.g., 'TEMPUS_v3.pt').
        data_loader (DataLoader): DataLoader containing sample input data to trace the model.
        device (str): Device on which the model operates, e.g., 'cpu' or 'cuda'.
    """
    try:
        # Set model to evaluation mode
        model.eval()

        # Fetch a sample input tensor from DataLoader
        example_inputs, _ = next(iter(data_loader))
        example_inputs = example_inputs.to(device)

        # Export model to TorchScript using tracing
        scripted_model = torch.jit.trace(model.to(device), example_inputs)

        # Save the TorchScript model
        torch.jit.save(scripted_model, save_path)

        print(f"Model successfully exported and saved to {save_path}")
        return save_path

    except Exception as e:
        print(f"Error exporting model to TorchScript: {str(e)}")
        return None

# Export the trained TEMPUS model
script_path = export_model_to_torchscript(
    model=model,
    save_path="Models/Tempus_v2.pt",
    data_loader=data_module.test_loader,
    device=config["device"]
)


  if seq_len % factor != 0:
  if features == lstm_features.size(2):  # If dimensions match


Model successfully exported and saved to Models/Tempus_v2.pt


In [2]:
# %%
def load_and_predict_with_torchscript(script_path, out_of_sample_data, device, window_size):
    """
    Loads a TorchScript model and uses it for prediction on out-of-sample data.

    Parameters:
        script_path (str): Path to the saved TorchScript model
        out_of_sample_data (DataFrame): New data for prediction
        device (str): Device on which to run inference
        window_size (int): Window size used during training

    Returns:
        numpy.ndarray: Predictions from the model
    """
    # Load the TorchScript model
    loaded_model = torch.jit.load(script_path)
    loaded_model.to(device)
    loaded_model.eval()

    print("TorchScript model loaded successfully")

    # Prepare the out-of-sample data
    # Assuming similar preprocessing as in the DataModule class

    # If using same format as your DataModule
    from Components.TrainModel import DataModule
    data_module_test = DataModule(
        out_of_sample_data,
        window_size=window_size,
        batch_size=1,  # For prediction, we can use batch size of 1
        eval_size=0.0  # All data is for testing
    )

    # Make predictions
    predictions = []

    with torch.no_grad():
        for X, _ in data_module_test.test_loader:
            X = X.to(device)
            output = loaded_model(X)
            predictions.append(output.cpu().numpy())

    # Concatenate all predictions
    all_predictions = np.concatenate(predictions, axis=0)

    return all_predictions


# Example usage with out-of-sample data
# Assuming you have some out-of-sample data for testing
# For this example, I'll use a portion of the test data as "out-of-sample"

# Get out-of-sample data (this is just an example, replace with your actual out-of-sample data)
# One option is to use the most recent data that wasn't used in training
ticker = "IONQ"  # Replace with your ticker of interest
out_of_sample_data, raw_stock_data = TickerData(ticker, years=1, prediction_window=5).process_all()

# Load the model and make predictions
predictions = load_and_predict_with_torchscript(
    script_path="Models/Tempus_v2.pt",
    out_of_sample_data=out_of_sample_data,
    device=device,
    window_size=50
)

print(f"Generated {len(predictions)} predictions for {ticker}")

# Visualize the predictions
plt.figure(figsize=(12, 6))
plt.plot(predictions[:, 0], label='Predicted Price Change (%)')
plt.title(f'Price Change Predictions for {ticker} using TorchScript Model')
plt.xlabel('Time Steps')
plt.ylabel('Price Change (%)')
plt.legend()
plt.grid(True)
plt.show()


TorchScript model loaded successfully


ValueError: Length of values (0) does not match length of index (163)

In [14]:
predictions

array([[[ 197.23183 ]],

       [[ 199.64954 ]],

       [[ 219.9546  ]],

       [[ 245.14612 ]],

       [[ 243.0559  ]],

       [[ 222.05608 ]],

       [[ 204.10439 ]],

       [[ 166.6733  ]],

       [[ 142.82838 ]],

       [[ 103.549255]],

       [[ 106.09485 ]],

       [[ 112.11592 ]],

       [[  65.137634]],

       [[  59.818314]],

       [[  42.377377]],

       [[  67.400314]],

       [[  28.211246]],

       [[ 335.3945  ]],

       [[ 277.64224 ]],

       [[ 174.39905 ]],

       [[ 372.317   ]],

       [[ 456.91788 ]],

       [[ 523.6588  ]],

       [[ 526.3262  ]],

       [[ 662.19684 ]],

       [[ 727.921   ]],

       [[ 839.13196 ]],

       [[1142.3556  ]],

       [[1283.5662  ]],

       [[1299.2537  ]],

       [[1823.0692  ]],

       [[2044.4244  ]],

       [[2401.3284  ]],

       [[2903.4006  ]],

       [[3192.3923  ]],

       [[3339.8413  ]],

       [[3675.4111  ]],

       [[4101.0884  ]],

       [[4474.4575  ]],

       [[4557.9507  ]],



In [58]:
# Add trading signals based on predictions
training_data['entry_signal'] = np.where(
    (training_data['Predicted'].notna()) &
    (training_data['Predicted'] > training_data['Close'] * 1.05),  # 5% increase prediction
    1,  # Buy signal
    0
)

training_data['exit_signal'] = np.where(
    (training_data['Predicted'].notna()) &
    (training_data['Predicted'] < training_data['Close'] * 0.95),  # 5% decrease prediction
    1,  # Sell signal
    0
)


In [None]:
# Get predictions
preds_df = model.get_predictions(training_data)
merged_df = pd.merge(stock_data, preds_df, on=['Date', 'Ticker'], how='inner')

In [None]:
# Create a combined plot with stock prices and prediction markers
def plot_combined_predictions(data, ticker):
    # Filter for a particular ticker
    if type(ticker) == str:
        data = data[data['Ticker'] == ticker]
    else:
        return "Ticker provided is not a valid value"

    # Create figure
    fig = go.Figure()

    # Plot stock price trend line
    fig.add_trace(go.Scatter(
        x=data['Date'],
        y=data['Close'],
        mode='lines',
        name='Stock Price',
        line=dict(width=1)
    ))

    # Split signals by type and correctness
    buy_signals = data[data['Predicted'] == 2]
    sell_signals = data[data['Predicted'] == 1]
    hold_signals = data[data['Predicted'] == 0]

    # Correct/incorrect buy signals
    correct_buy = buy_signals[buy_signals['Predicted'] == buy_signals['Actual']]
    incorrect_buy = buy_signals[buy_signals['Predicted'] != buy_signals['Actual']]

    # Correct/incorrect sell signals
    correct_sell = sell_signals[sell_signals['Predicted'] == sell_signals['Actual']]
    incorrect_sell = sell_signals[sell_signals['Predicted'] != sell_signals['Actual']]

    # Correct/incorrect hold signals
    correct_hold = hold_signals[hold_signals['Predicted'] == hold_signals['Actual']]
    incorrect_hold = hold_signals[hold_signals['Predicted'] != hold_signals['Actual']]

    # Plot buy signals
    fig.add_trace(go.Scatter(
        x=correct_buy['Date'],
        y=data.loc[correct_buy.index]['Close'],
        mode='markers',
        name='Correct Buy Signal',
        marker=dict(symbol='triangle-up', size=10, color='green')
    ))

    fig.add_trace(go.Scatter(
        x=incorrect_buy['Date'],
        y=data.loc[incorrect_buy.index]['Close'],
        mode='markers',
        name='Incorrect Buy Signal',
        marker=dict(symbol='triangle-up', size=8, color='gray', opacity=0.2)
    ))

    # Plot sell signals
    fig.add_trace(go.Scatter(
        x=correct_sell['Date'],
        y=data.loc[correct_sell.index]['Close'],
        mode='markers',
        name='Correct Sell Signal',
        marker=dict(symbol='triangle-down', size=10, color='red')
    ))

    fig.add_trace(go.Scatter(
        x=incorrect_sell['Date'],
        y=data.loc[incorrect_sell.index]['Close'],
        mode='markers',
        name='Incorrect Sell Signal',
        marker=dict(symbol='triangle-down', size=8, color='gray', opacity=0.2)
    ))

    # Plot hold signals (using a different symbol)
    fig.add_trace(go.Scatter(
        x=correct_hold['Date'],
        y=data.loc[correct_hold.index]['Close'],
        mode='markers',
        name='Correct Hold Signal',
        marker=dict(symbol='circle', size=8, color='blue')
    ))

    fig.add_trace(go.Scatter(
        x=incorrect_hold['Date'],
        y=data.loc[incorrect_hold.index]['Close'],
        mode='markers',
        name='Incorrect Hold Signal',
        marker=dict(symbol='circle', size=6, color='gray', opacity=0.2)
    ))

    # Update layout
    fig.update_layout(
        title=f'{ticker} Stock Price - Actual/Predicted Signals',
        #xaxis_title='Date',
        yaxis_title='Price (USD)',
        template='plotly_dark',
        height=600,
        legend=dict(orientation="h", yanchor="bottom", y=1.02)
    )

    fig.show()

# Call the modified function
plot_combined_predictions(merged_df, 'PLTR')

In [None]:
from Components.BackTesting import BackTesting
import pandas as pd

#merged_df = pd.read_csv('Data/NASDAQ_100_PredictictionsData.csv')

initial_capital = 10000.0
ticker = 'PLTR'
backtester = BackTesting(merged_df, ticker, initial_capital)
results, _ = backtester.run_simulation()
trades_fig, value_fig, exposure_fig = backtester.plot_performance()
trades_fig.show()
value_fig.show()
exposure_fig.show()

In [None]:
class TCNBlock(nn.Module):
    def __init__(self, input_dim, output_dim, kernel_size, dilation, padding, dropout=0.2):
        super(TCNBlock, self).__init__()

        self.conv1 = nn.Conv1d(
            in_channels=input_dim,
            out_channels=output_dim,
            kernel_size=kernel_size,
            dilation=dilation,
            padding=padding
        )
        self.norm1 = nn.BatchNorm1d(output_dim)
        self.relu1 = nn.ReLU()
        self.dropout1 = nn.Dropout(dropout)

        self.conv2 = nn.Conv1d(
            in_channels=output_dim,
            out_channels=output_dim,
            kernel_size=kernel_size,
            dilation=dilation,
            padding=padding
        )
        self.norm2 = nn.BatchNorm1d(output_dim)
        self.relu2 = nn.ReLU()  # Added missing relu2 activation
        self.dropout2 = nn.Dropout(dropout)

        # Residual connection if dimensions don't match
        self.residual = nn.Conv1d(input_dim, output_dim, 1) if input_dim != output_dim else nn.Identity()

    def forward(self, x):

        # First conv block
        # Residual input
        residual = self.residual(x)

        # First conv block
        out = self.conv1(x)
        out = self.norm1(out)
        out = self.relu1(out)
        out = self.dropout1(out)

        # Second conv block
        out = self.conv2(out)
        out = self.norm2(out)
        out = self.relu2(out)
        out = self.relu2(out)  # Correctly use relu2
        out = self.dropout2(out)

        # Return to original shape
        # Add the residual and pass through final activation
        return self.relu1(out + residual)  # Fixed to use relu1 for the final activation

In [None]:
class EchoStateNetwork(nn.Module):
    def __init__(self, input_size, reservoir_size, output_size, spectral_radius=0.9,
                 sparsity=0.1, noise=0.001, bidirectional=False):
        super(EchoStateNetwork, self).__init__()

        self.input_size = input_size
        self.reservoir_size = reservoir_size
        self.output_size = output_size
        self.spectral_radius = spectral_radius
        self.sparsity = sparsity
        self.noise = noise
        self.bidirectional = bidirectional

        # Input weights (fixed)
        self.register_buffer('W_in', self._initialize_input_weights())

        # Reservoir weights (fixed)
        self.register_buffer('W', self._initialize_reservoir_weights())

        # Output weights (trainable)
        self.W_out = nn.Linear(reservoir_size, output_size)

        if bidirectional:
            # Second set of weights for backward direction
            self.register_buffer('W_in_reverse', self._initialize_input_weights())
            self.register_buffer('W_reverse', self._initialize_reservoir_weights())
            self.W_out_reverse = nn.Linear(reservoir_size, output_size)
            # Combined output
            self.W_combined = nn.Linear(output_size * 2, output_size)

    def _initialize_input_weights(self):
        W_in = torch.zeros(self.reservoir_size, self.input_size)
        W_in = torch.nn.init.xavier_uniform_(W_in)
        return W_in

    def _initialize_reservoir_weights(self):
        # Create sparse matrix
        W = torch.zeros(self.reservoir_size, self.reservoir_size)
        num_connections = int(self.sparsity * self.reservoir_size * self.reservoir_size)
        indices = torch.randperm(self.reservoir_size * self.reservoir_size)[:num_connections]
        rows = indices // self.reservoir_size
        cols = indices % self.reservoir_size
        values = torch.randn(num_connections)
        W[rows, cols] = values

        # Scale to desired spectral radius
        eigenvalues = torch.linalg.eigvals(W)
        max_eigenvalue = torch.max(torch.abs(eigenvalues))
        W = W * (self.spectral_radius / max_eigenvalue)
        return W

    def _reservoir_step(self, x, h_prev, W_in, W):
        """Execute one step of the reservoir"""
        # h_new = tanh(W_in @ x + W @ h_prev + noise)
        h_new = torch.tanh(torch.mm(x, W_in.t()) + torch.mm(h_prev, W.t()) +
                           self.noise * torch.randn(h_prev.shape, device=h_prev.device))
        return h_new

    def forward(self, x):
        """
        x: input tensor of shape (batch_size, seq_len, input_size)
        """
        batch_size, seq_len, _ = x.size()

        # Forward pass
        h = torch.zeros(batch_size, self.reservoir_size, device=x.device)
        outputs_forward = []

        for t in range(seq_len):
            h = self._reservoir_step(x[:, t], h, self.W_in, self.W)
            outputs_forward.append(self.W_out(h))

        outputs_forward = torch.stack(outputs_forward, dim=1)  # (batch_size, seq_len, output_size)

        if not self.bidirectional:
            return outputs_forward

        # Backward pass for bidirectional ESN
        h_reverse = torch.zeros(batch_size, self.reservoir_size, device=x.device)
        outputs_reverse = []

        for t in range(seq_len - 1, -1, -1):
            h_reverse = self._reservoir_step(x[:, t], h_reverse, self.W_in_reverse, self.W_reverse)
            outputs_reverse.insert(0, self.W_out_reverse(h_reverse))

        outputs_reverse = torch.stack(outputs_reverse, dim=1)  # (batch_size, seq_len, output_size)

        # Combine forward and backward outputs
        combined = torch.cat((outputs_forward, outputs_reverse), dim=2)
        return self.W_combined(combined)

In [None]:
#ticker.get_balance_sheet(freq='quarterly')
#ticker.get_calendar()
#ticker.get_cash_flow(freq='quarterly')
#earnings_data = ticker.get_earnings_dates()
#income_statement = ticker.get_income_stmt(freq='yearly').T
#ticker.get_institutional_holders()
#ticker.get_recommendations()
#ticker.get_sustainability()

In [None]:
# define a function to fetch the options data for a given ticker symbol
#def fetch_options_data(ticker_symbol):
    #ticker = yf.Ticker(ticker_symbol)
#    options_dates = ticker.options
#    options_data = ticker.option_chain(date='2025-03-21')
#    return options_data.calls, options_data.puts
##ionq_stock_data = ionq_stock_data.sort_values(by='Date', ascending=False)