In [1]:
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
from altair.vegalite.v5.theme import theme
from plotly.subplots import make_subplots
import matplotlib.pyplot as plt
from sympy.printing.dot import template

# Custom libraries
from Components.TrainModel import DataModule, TEMPUS, torchscript_predict
from Components.TickerData import TickerData, upload_data_sql, fetch_sql_data
from Components.BackTesting import BackTesting

# Torch ML libraries
import torch
import torch.nn as nn
from torch.optim import AdamW

device = "cuda" if torch.cuda.is_available() else "cpu"
if device == "cuda":
    torch.backends.cuda.matmul.allow_tf32 = True
    torch.backends.cudnn.allow_tf32 = True

In [None]:
#TODO: Feature importance with SHAP values and plot
#TODO: Add a Echo State Networks (ESN) layer to the model
#TODO: randomly sample 50 tickers, run backtest for all of them, and plot. take average sharpe ratio, and other metrics
#TODO: Streamlit Page for future prediction
#TODO: Explore more features for model

In [8]:
# Set the Wikipedia page title and section header
tickers = pd.read_html("https://en.wikipedia.org/wiki/Nasdaq-100")[4]
# Clean up the dataframe
nasdaq_tickers = tickers.iloc[:, [1]].to_numpy().flatten()

In [21]:
# Set the Wikipedia page title and section header
tickers = pd.read_html("https://en.wikipedia.org/wiki/Russell_1000_Index")[3]
# Clean up the dataframe
rusell_tickers = tickers.iloc[:, [1]].to_numpy().flatten()

In [65]:
# Set the Wikipedia page title and section header
tickers = pd.read_html("https://en.wikipedia.org/wiki/List_of_S%26P_500_companies")[0]
# Clean up the dataframe
SnP_tickers = tickers.iloc[:, [0]].to_numpy().flatten()

In [2]:
# Set the Wikipedia page title and section header
tickers = pd.read_html("https://en.wikipedia.org/wiki/List_of_S%26P_600_companies")[0]
# Clean up the dataframe
SnP600_tickers = tickers.iloc[:, [0]].to_numpy().flatten()

In [None]:
tickers = np.concatenate((nasdaq_tickers, SnP_tickers))

In [2]:
tickers = ['IONQ','QBTS','RGTI']
training_dfs = []
stocks_dfs = []
for ticker in tickers:
    training_data, raw_stock_data = TickerData(ticker,years=4,prediction_window=5).process_all()
    training_dfs.append(training_data)
    stocks_dfs.append(raw_stock_data)

training_data = pd.concat(training_dfs, ignore_index=False)
stock_data = pd.concat(stocks_dfs, ignore_index=False)

In [3]:
training_data

Unnamed: 0_level_0,Ticker,shifted_prices,ema_20,ema_50,ema_200,stoch_rsi,macd,b_percent,State,adx,keltner_upper,keltner_lower,Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2021-05-13 00:00:00-04:00,IONQ,9.94,10.376058,10.522860,10.640468,0.014079,-0.143110,0.165770,2,46.110095,10.773494,9.933493,10.08
2021-05-14 00:00:00-04:00,IONQ,10.00,10.345005,10.504317,10.634593,0.000000,-0.150827,0.168324,2,47.249459,10.734723,9.910722,10.05
2021-05-17 00:00:00-04:00,IONQ,10.00,10.312147,10.484540,10.628279,0.000000,-0.159143,0.148785,2,47.597354,10.676202,9.904202,10.00
2021-05-18 00:00:00-04:00,IONQ,9.97,10.277657,10.463577,10.621530,0.000000,-0.167833,0.139752,2,47.989313,10.604109,9.908108,9.95
2021-05-19 00:00:00-04:00,IONQ,10.00,10.242642,10.441868,10.614450,0.000000,-0.175920,0.148204,2,48.504499,10.517596,9.925595,9.91
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-03-24 00:00:00-04:00,RGTI,7.92,9.645730,9.859334,6.206588,0.963041,-0.261180,0.684199,1,35.936467,11.780730,7.510729,9.78
2025-03-25 00:00:00-04:00,RGTI,7.81,9.662327,9.857791,6.242542,0.926631,-0.218570,0.681606,1,29.882775,11.767327,7.557326,9.82
2025-03-26 00:00:00-04:00,RGTI,8.49,9.616391,9.831211,6.271771,0.726009,-0.233749,0.529544,1,25.738092,11.753391,7.479391,9.18
2025-03-27 00:00:00-04:00,RGTI,8.15,9.507211,9.777830,6.293644,0.439021,-0.299616,0.351471,1,22.246792,11.592211,7.422211,8.47


In [5]:
# %%
# Import stock_data dataframe into an Azure SQL database table using SQLAlchemy
upload_data_sql(stock_data,"SNP600_1day")

Sample data to be uploaded:
        Date Ticker       Open       High        Low      Close  Volume
0 2005-04-08    AAP  29.404375  29.438700  29.043973  29.118341  207750
1 2005-04-11    AAP  29.141222  29.244197  29.089738  29.175547  358050
2 2005-04-12    AAP  29.164119  29.661818  28.946732  29.495918  550350
3 2005-04-13    AAP  29.415814  29.558833  28.860906  28.918116  676350
4 2005-04-14    AAP  29.021105  29.175564  28.408990  28.466196  715650
Successfully uploaded 2372517 records to SNP600_1day table


In [None]:
import os
fetch_sql_data('SNP600_1day')

In [4]:
#Best config: {'lr': 4.390449033248878e-05, 'hidden_size': 256, 'num_layers': 1, 'dropout': 0.3477694988633191, 'weight_decay': 0.0001801390872725824, 'batch_size': 16, 'window_size': 10, 'grad_clip_norm': 0.8393802881451728}

config = {
    "lr": 4.390449033248878e-05,
    "weight_decay": 0.0001801390872725824,
    "hidden_size": 256,
    "num_layers": 1,
    "dropout": 0.3477694988633191,
    "batch_size": 16,
    "window_size": 50,
    "clip_size": 0.8393802881451728,
    "epochs": 20,
    "device": "cuda" if torch.cuda.is_available() else "cpu"
}

data_module = DataModule(training_data, window_size=config["window_size"], batch_size=config["batch_size"])
config["input_size"] = data_module.num_features

# Instantiate the model
model = TEMPUS(config,scaler=data_module.scaler)
# Set up loss and optimizer
criterion = nn.MSELoss()
optimizer = AdamW(model.parameters(), lr=config["lr"], weight_decay=config["weight_decay"])
# Train Model
history = model.train_model(data_module.train_loader, data_module.test_loader, criterion, optimizer, config["epochs"])

  param_schemas = callee.param_schemas()
  param_schemas = callee.param_schemas()


Training Epochs:   0%|          | 0/20 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [None]:
training_fig = model.plot_training_history()
training_fig.show()

In [None]:
# Export the trained TEMPUS model
script_path = model.export_model_to_torchscript(
    save_path="Models/Tempus_v2.pt",
    data_loader=data_module.test_loader,
    device="cpu"
)

In [66]:
import random
# Randomly sample 50 tickers from the SnP600_tickers list
sampled_tickers = random.sample(list(SnP_tickers), 50)
initial_capital = 1000.0

preds_dfs = []
returns = []
for ticker in sampled_tickers:
    out_of_sample_data, raw_stock_data = TickerData(ticker, years=4, prediction_window=5,prediction_mode=True).process_all()

    # Check if raw_stock_data is NoneType, if so, skip this iteration
    if out_of_sample_data is not None:
        # Load the model and make predictions
        preds_df = torchscript_predict(
            model_path="Models/Tempus_v2.pt",
            input_df=out_of_sample_data,
            device="cpu",
            window_size=50,
            target_col="shifted_prices"
        )
        preds_df = pd.merge(preds_df, raw_stock_data[['Open', 'High', 'Low', 'Volume','Close']], left_index=True, right_index=True, how='left')
        preds_dfs.append(preds_df)

        backtester = BackTesting(preds_df, ticker, initial_capital, pct_change_entry=0.05, pct_change_exit=0.03)
        backtester.run_simulation()
        bt_results = pd.DataFrame(backtester.pf.returns())
        bt_results['cumulative_return'] = np.array(((1 + bt_results[0]).cumprod() - 1)*100)
        bt_results['ticker'] = ticker
        returns.append(bt_results)

preds_dfs = pd.concat(preds_dfs, ignore_index=False)
returns = pd.concat(returns, ignore_index=False)

$BF.B: possibly delisted; no price data found  (1d 2021-04-05 -> 2025-04-04)
BF.B: $BF.B: possibly delisted; no earnings dates found


AttributeError: Could not fetch data for ticker BF.B, returning empty dataframes
Error while processing the data for BF.B
Error while merging data for BF.B; error: "['Date', 'State', 'shifted_prices'] not in index"
Error while processing the data for NWS
Error while merging data for NWS; error: "['State'] not in index"


In [67]:
# Calculate cumulative returns for each ticker and visualize them using Plotly
# Group data by 'ticker' and calculate cumulative returns

# Create an interactive plot using Plotly
fig = px.line(
    returns.reset_index(),
    x='index',
    y='cumulative_return',
    color='ticker',
    title='Cumulative Returns by Ticker',
    labels={'index': 'Date', 'cumulative_return': 'Cumulative Return'}
)

fig.update_layout(
    xaxis_title='Date',
    yaxis_title='Cumulative Return (%)',
    showlegend=False,
    height=600,
    template='ggplot2',
    xaxis=dict(
        rangeselector=dict(
            buttons=list([
                dict(count=1, label="1m", step="month", stepmode="backward"),
                dict(count=6, label="6m", step="month", stepmode="backward"),
                dict(count=1, label="YTD", step="year", stepmode="todate"),
                dict(count=1, label="1y", step="year", stepmode="backward"),
                dict(step="all")
            ])
        ),
        rangeslider=dict(visible=False),
        type="date"
    )
)
fig.show()

last_returns = returns.groupby('ticker')['cumulative_return'].last()

# Count positive and negative returns
positive_count = sum(last_returns > 0)
negative_count = sum(last_returns <= 0)
total_count = len(last_returns)

# Convert to DataFrame for visualization
last_returns_df = pd.DataFrame(last_returns).reset_index()
last_returns_df.columns = ['Ticker', 'Final Return']
last_returns_df.sort_values('Final Return', ascending=False, inplace=True)

# Create a simple pie chart showing the proportion
fig_pie = px.pie(
    values=[positive_count, negative_count],
    names=['Positive', 'Negative'],
    title='Proportion of Tickers with Positive vs Negative Returns',
    color_discrete_sequence=['green', 'red'],
    template='ggplot2',
)

fig_pie.update_traces(textinfo='percent+label').update_layout(showlegend=False)
fig_pie.show()

# Calculate the proportion of tickers with positive returns
if total_count > 0:
    positive_proportion = positive_count / total_count
    print(f"Proportion of tickers with positive cumulative returns: {positive_proportion:.2%}")
    print(f"Positive tickers: {positive_count} out of {total_count}")
    print(f"Negative tickers: {negative_count} out of {total_count}")
else:
    print("No ticker data available for analysis")

Proportion of tickers with positive cumulative returns: 72.92%
Positive tickers: 35 out of 48
Negative tickers: 13 out of 48


In [43]:
# Randomly select a ticker from the `preds_dfs` object
selected_ticker = random.choice(preds_dfs['Ticker'].unique())

# Filter the `preds_dfs` DataFrame for the selected ticker
preds_df = preds_dfs[preds_dfs['Ticker'] == selected_ticker]

# Update the plot to reflect the filtered data
fig = go.Figure()
fig.add_trace(go.Scatter(y=preds_df['Predicted'], x=preds_df.index, mode='lines', name='Predicted', line=dict(color="Grey")))
fig.add_trace(go.Scatter(y=preds_df['Close'], x=preds_df.index, mode='lines', name='Close (Unshifted)', line=dict(color="Blue")))
fig.add_trace(go.Scatter(y=preds_df['Actual'], x=preds_df.index, mode='lines', name='Close (Shifted)'))
fig.update_layout(
    title=f'Prediction for {selected_ticker}',
    xaxis_title='Date',
    yaxis_title='Price (USD)',
    height=600,
    legend=dict(orientation="h", yanchor="bottom", y=1.02),
    template='ggplot2'
)
fig.show()

In [11]:
from Components.BackTesting import BackTesting
import pandas as pd
ticker = 'PLTR'
out_of_sample_data, raw_stock_data = TickerData(ticker, years=1, prediction_window=5).process_all()

preds_df = torchscript_predict(
    model_path="Models/Tempus_v2.pt",
    input_df=out_of_sample_data,
    device="cpu",
    window_size=50,
    target_col="shifted_prices"
)
preds_df = pd.merge(preds_df, raw_stock_data[['Open', 'High', 'Low', 'Volume','Close']], left_index=True, right_index=True, how='left')

initial_capital = 1000.0
backtester = BackTesting(preds_df, ticker, initial_capital,pct_change_entry=0.05,pct_change_exit=0.02)
backtester.run_simulation()
trades_fig, value_fig, exposure_fig = backtester.plot_performance()
#trades_fig.show()
#value_fig.show()
#exposure_fig.show()

Running vectorbt backtest for PLTR


In [None]:
import quantstats as qs

returns = backtester.pf.returns()

#html = qs.reports.full(returns, "NDAQ")
pd.DataFrame(qs.reports.metrics(returns, "NDAQ",mode='full',rf=0.0025, display=False))


In [None]:
class TCNBlock(nn.Module):
    def __init__(self, input_dim, output_dim, kernel_size, dilation, padding, dropout=0.2):
        super(TCNBlock, self).__init__()

        self.conv1 = nn.Conv1d(
            in_channels=input_dim,
            out_channels=output_dim,
            kernel_size=kernel_size,
            dilation=dilation,
            padding=padding
        )
        self.norm1 = nn.BatchNorm1d(output_dim)
        self.relu1 = nn.ReLU()
        self.dropout1 = nn.Dropout(dropout)

        self.conv2 = nn.Conv1d(
            in_channels=output_dim,
            out_channels=output_dim,
            kernel_size=kernel_size,
            dilation=dilation,
            padding=padding
        )
        self.norm2 = nn.BatchNorm1d(output_dim)
        self.relu2 = nn.ReLU()  # Added missing relu2 activation
        self.dropout2 = nn.Dropout(dropout)

        # Residual connection if dimensions don't match
        self.residual = nn.Conv1d(input_dim, output_dim, 1) if input_dim != output_dim else nn.Identity()

    def forward(self, x):

        # First conv block
        # Residual input
        residual = self.residual(x)

        # First conv block
        out = self.conv1(x)
        out = self.norm1(out)
        out = self.relu1(out)
        out = self.dropout1(out)

        # Second conv block
        out = self.conv2(out)
        out = self.norm2(out)
        out = self.relu2(out)
        out = self.relu2(out)  # Correctly use relu2
        out = self.dropout2(out)

        # Return to original shape
        # Add the residual and pass through final activation
        return self.relu1(out + residual)  # Fixed to use relu1 for the final activation

In [None]:
class EchoStateNetwork(nn.Module):
    def __init__(self, input_size, reservoir_size, output_size, spectral_radius=0.9,
                 sparsity=0.1, noise=0.001, bidirectional=False):
        super(EchoStateNetwork, self).__init__()

        self.input_size = input_size
        self.reservoir_size = reservoir_size
        self.output_size = output_size
        self.spectral_radius = spectral_radius
        self.sparsity = sparsity
        self.noise = noise
        self.bidirectional = bidirectional

        # Input weights (fixed)
        self.register_buffer('W_in', self._initialize_input_weights())

        # Reservoir weights (fixed)
        self.register_buffer('W', self._initialize_reservoir_weights())

        # Output weights (trainable)
        self.W_out = nn.Linear(reservoir_size, output_size)

        if bidirectional:
            # Second set of weights for backward direction
            self.register_buffer('W_in_reverse', self._initialize_input_weights())
            self.register_buffer('W_reverse', self._initialize_reservoir_weights())
            self.W_out_reverse = nn.Linear(reservoir_size, output_size)
            # Combined output
            self.W_combined = nn.Linear(output_size * 2, output_size)

    def _initialize_input_weights(self):
        W_in = torch.zeros(self.reservoir_size, self.input_size)
        W_in = torch.nn.init.xavier_uniform_(W_in)
        return W_in

    def _initialize_reservoir_weights(self):
        # Create sparse matrix
        W = torch.zeros(self.reservoir_size, self.reservoir_size)
        num_connections = int(self.sparsity * self.reservoir_size * self.reservoir_size)
        indices = torch.randperm(self.reservoir_size * self.reservoir_size)[:num_connections]
        rows = indices // self.reservoir_size
        cols = indices % self.reservoir_size
        values = torch.randn(num_connections)
        W[rows, cols] = values

        # Scale to desired spectral radius
        eigenvalues = torch.linalg.eigvals(W)
        max_eigenvalue = torch.max(torch.abs(eigenvalues))
        W = W * (self.spectral_radius / max_eigenvalue)
        return W

    def _reservoir_step(self, x, h_prev, W_in, W):
        """Execute one step of the reservoir"""
        # h_new = tanh(W_in @ x + W @ h_prev + noise)
        h_new = torch.tanh(torch.mm(x, W_in.t()) + torch.mm(h_prev, W.t()) +
                           self.noise * torch.randn(h_prev.shape, device=h_prev.device))
        return h_new

    def forward(self, x):
        """
        x: input tensor of shape (batch_size, seq_len, input_size)
        """
        batch_size, seq_len, _ = x.size()

        # Forward pass
        h = torch.zeros(batch_size, self.reservoir_size, device=x.device)
        outputs_forward = []

        for t in range(seq_len):
            h = self._reservoir_step(x[:, t], h, self.W_in, self.W)
            outputs_forward.append(self.W_out(h))

        outputs_forward = torch.stack(outputs_forward, dim=1)  # (batch_size, seq_len, output_size)

        if not self.bidirectional:
            return outputs_forward

        # Backward pass for bidirectional ESN
        h_reverse = torch.zeros(batch_size, self.reservoir_size, device=x.device)
        outputs_reverse = []

        for t in range(seq_len - 1, -1, -1):
            h_reverse = self._reservoir_step(x[:, t], h_reverse, self.W_in_reverse, self.W_reverse)
            outputs_reverse.insert(0, self.W_out_reverse(h_reverse))

        outputs_reverse = torch.stack(outputs_reverse, dim=1)  # (batch_size, seq_len, output_size)

        # Combine forward and backward outputs
        combined = torch.cat((outputs_forward, outputs_reverse), dim=2)
        return self.W_combined(combined)

In [None]:
#ticker.get_balance_sheet(freq='quarterly')
#ticker.get_calendar()
#ticker.get_cash_flow(freq='quarterly')
#earnings_data = ticker.get_earnings_dates()
#income_statement = ticker.get_income_stmt(freq='yearly').T
#ticker.get_institutional_holders()
#ticker.get_recommendations()
#ticker.get_sustainability()

In [None]:
# define a function to fetch the options data for a given ticker symbol
#def fetch_options_data(ticker_symbol):
    #ticker = yf.Ticker(ticker_symbol)
#    options_dates = ticker.options
#    options_data = ticker.option_chain(date='2025-03-21')
#    return options_data.calls, options_data.puts
##ionq_stock_data = ionq_stock_data.sort_values(by='Date', ascending=False)