In [None]:
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
import matplotlib.pyplot as plt

# Custom libraries
from Components.TrainModel import DataModule, TEMPUS, torchscript_predict
from Components.TickerData import TickerData, upload_data_sql, fetch_sql_data
from Components.BackTesting import BackTesting
from Components.MarketRegimes import MarketRegimes

# Torch ML libraries
import torch
import torch.nn as nn
from torch.optim import AdamW

device = "cuda" if torch.cuda.is_available() else "cpu"
if device == "cuda":
    torch.backends.cuda.matmul.allow_tf32 = True
    torch.backends.cudnn.allow_tf32 = True

In [None]:
#TODO: Feature importance with SHAP values and plot
#TODO: Streamlit Page for future prediction
#TODO: Add Williams %R to the TickerData
#TODO:  Data is typically split into rolling time windows – e.g. training on 2005–2015 data, validating on 2016–2017, and testing on 2018–2019. Walk-forward (rolling) validation is used to account for time-dependence and non-stationarity (models may be retrained periodically as new data comes in

In [None]:
# Set the Wikipedia page title and section header
tickers = pd.read_html("https://en.wikipedia.org/wiki/Nasdaq-100")[4]
# Clean up the dataframe
nasdaq_tickers = tickers.iloc[:, [1]].to_numpy().flatten()
nasdaq_tickers = np.random.choice(nasdaq_tickers, size=50, replace=False)

In [None]:
# Set the Wikipedia page title and section header
tickers = pd.read_html("https://en.wikipedia.org/wiki/Russell_1000_Index")[3]
# Clean up the dataframe
rusell_tickers = tickers.iloc[:, [1]].to_numpy().flatten()
rusell_tickers = np.random.choice(rusell_tickers, size=50, replace=False)

In [None]:
# Set the Wikipedia page title and section header
tickers = pd.read_html("https://en.wikipedia.org/wiki/List_of_S%26P_500_companies")[0]
# Clean up the dataframe
SnP_tickers = tickers.iloc[:, [0]].to_numpy().flatten()
SnP_tickers = np.random.choice(SnP_tickers, size=50, replace=False)

In [None]:
# Set the Wikipedia page title and section header
tickers = pd.read_html("https://en.wikipedia.org/wiki/List_of_S%26P_600_companies")[0]
# Clean up the dataframe
SnP600_tickers = tickers.iloc[:, [0]].to_numpy().flatten()
SnP600_tickers = np.random.choice(SnP600_tickers, size=50, replace=False)

In [None]:
tickers = np.concatenate((nasdaq_tickers, SnP_tickers,rusell_tickers,SnP600_tickers))
tickers = np.unique(tickers)

In [None]:
#tickers = ['IONQ','QBTS','RGTI']
training_dfs = []
stocks_dfs = []
indicators = ['ema_20', 'ema_50', 'ema_200', 'stoch_rsi', 'macd', 'b_percent', 'keltner_lower', 'keltner_upper','adx','pcf',
              'dte','roe','roa','pts','pe','eps_surprise','Close']
for ticker in tickers:
    training_data, raw_stock_data = TickerData(ticker,years=2,prediction_window=5,indicator_list=indicators).process_all()
    training_dfs.append(training_data)
    stocks_dfs.append(raw_stock_data)

training_data = pd.concat(training_dfs, ignore_index=False)
#stock_data = pd.concat(stocks_dfs, ignore_index=False)
training_data

In [None]:
#Best config: {'lr': 4.390449033248878e-05, 'hidden_size': 256, 'num_layers': 1, 'dropout': 0.3477694988633191, 'weight_decay': 0.0001801390872725824, 'batch_size': 16, 'window_size': 10, 'grad_clip_norm': 0.8393802881451728}

config = {
    "lr": 4.390449033248878e-05,
    "weight_decay": 0.0001801390872725824,
    "hidden_size": 256, # old was 256
    "num_layers": 1, # old was 1
    "dropout": 0.3477694988633191,
    "batch_size": 16, # old was 16
    "window_size": 5,
    "clip_size": 0.8393802881451728,
    "epochs": 20,
    "device": "cuda" if torch.cuda.is_available() else "mps"
}

data_module = DataModule(training_data, window_size=config["window_size"], batch_size=config["batch_size"])
config["input_size"] = data_module.num_features

# Instantiate the model
model = TEMPUS(config,scaler=data_module.scaler)
# Train Model
history = model.train_model(data_module.train_loader, data_module.val_loader, data_module.test_loader, config["epochs"])

In [None]:
training_fig = model.plot_training_history()
training_fig.show()

In [None]:
# Export the trained TEMPUS model
script_path = model.export_model_to_torchscript(
    save_path="Models/Echo_v1.0.pt",
    data_loader=data_module.test_loader,
    device="cpu"
)

In [None]:
import random
# Randomly sample 50 tickers from the SnP600_tickers list
sampled_tickers = random.sample(list(nasdaq_tickers), 10)
initial_capital = 1000.0

preds_dfs = []
returns = []
for idx, ticker in enumerate(sampled_tickers, start=1):
    out_of_sample_data, raw_stock_data = TickerData(ticker, years=4, prediction_window=5).process_all()

    # Check if raw_stock_data is NoneType, if so, skip this iteration
    if out_of_sample_data is not None:
        # Load the model and make predictions
        preds_df = torchscript_predict(
            model_path="Models/Tempus_v2.1.pt",
            input_df=out_of_sample_data,
            device="cpu",
            window_size=50,
            target_col="shifted_prices"
        )
        preds_df = pd.merge(preds_df, raw_stock_data[['Open', 'High', 'Low', 'Volume','Close']], left_index=True, right_index=True, how='left')
        preds_dfs.append(preds_df)

        backtester = BackTesting(preds_df, ticker, initial_capital, pct_change_entry=0.05, pct_change_exit=0.03)
        backtester.run_simulation()
        bt_results = pd.DataFrame(backtester.pf.returns())
        bt_results['cumulative_return'] = np.array(((1 + bt_results[0]).cumprod() - 1)*100)
        bt_results['ticker'] = ticker
        returns.append(bt_results)

preds_dfs = pd.concat(preds_dfs, ignore_index=False)
returns = pd.concat(returns, ignore_index=False)

In [None]:
# Calculate cumulative returns for each ticker and visualize them using Plotly
# Group data by 'ticker' and calculate cumulative returns

# Create an interactive plot using Plotly
fig = px.line(
    returns.reset_index(),
    x='index',
    y='cumulative_return',
    color='ticker',
    title='Cumulative Returns by Ticker',
    labels={'index': 'Date', 'cumulative_return': 'Cumulative Return'}
)

fig.update_layout(
    xaxis_title='Date',
    yaxis_title='Cumulative Return (%)',
    showlegend=False,
    height=600,
    template='ggplot2',
    xaxis=dict(
        rangeselector=dict(
            buttons=list([
                dict(count=1, label="1m", step="month", stepmode="backward"),
                dict(count=6, label="6m", step="month", stepmode="backward"),
                dict(count=1, label="YTD", step="year", stepmode="todate"),
                dict(count=1, label="1y", step="year", stepmode="backward"),
                dict(step="all")
            ])
        ),
        rangeslider=dict(visible=False),
        type="date"
    )
)
fig.show()

last_returns = returns.groupby('ticker')['cumulative_return'].last()

# Count positive and negative returns
positive_count = sum(last_returns > 0)
negative_count = sum(last_returns <= 0)
total_count = len(last_returns)

# Convert to DataFrame for visualization
last_returns_df = pd.DataFrame(last_returns).reset_index()
last_returns_df.columns = ['Ticker', 'Final Return']
last_returns_df.sort_values('Final Return', ascending=False, inplace=True)

# Create a simple pie chart showing the proportion
fig_pie = px.pie(
    values=[positive_count, negative_count],
    names=['Positive', 'Negative'],
    title='Proportion of Tickers with Positive vs Negative Returns',
    color_discrete_sequence=['green', 'red'],
    template='ggplot2',
)

fig_pie.update_traces(textinfo='percent+label').update_layout(showlegend=False)
fig_pie.show()

# Calculate the proportion of tickers with positive returns
if total_count > 0:
    positive_proportion = positive_count / total_count
    print(f"Proportion of tickers with positive cumulative returns: {positive_proportion:.2%}")
    print(f"Positive tickers: {positive_count} out of {total_count}")
    print(f"Negative tickers: {negative_count} out of {total_count}")
else:
    print("No ticker data available for analysis")

In [None]:
# Randomly select a ticker from the `preds_dfs` object
selected_ticker = random.choice(preds_dfs['Ticker'].unique())

# Filter the `preds_dfs` DataFrame for the selected ticker
preds_df = preds_dfs[preds_dfs['Ticker'] == selected_ticker]

# Update the plot to reflect the filtered data
fig = go.Figure()
fig.add_trace(go.Scatter(y=preds_df['Predicted'], x=preds_df.index, mode='lines', name='Predicted', line=dict(color="Grey")))
fig.add_trace(go.Scatter(y=preds_df['Close'], x=preds_df.index, mode='lines', name='Close (Unshifted)', line=dict(color="Blue")))
fig.add_trace(go.Scatter(y=preds_df['Actual'], x=preds_df.index, mode='lines', name='Close (Shifted)'))
fig.update_layout(
    title=f'Prediction for {selected_ticker}',
    xaxis_title='Date',
    yaxis_title='Price (USD)',
    height=600,
    legend=dict(orientation="h", yanchor="bottom", y=1.02),
    template='ggplot2'
)
fig.show()

In [None]:
from Components.BackTesting import BackTesting
import pandas as pd
ticker = 'PLTR'
out_of_sample_data, raw_stock_data = TickerData(ticker, years=1, prediction_window=5,prediction_mode=True).process_all()

preds_df = torchscript_predict(
    model_path="Models/Tempus_v2.2.pt",
    input_df=out_of_sample_data,
    device="cpu",
    window_size=50,
    prediction_mode=True
)
preds_df = pd.merge(preds_df, raw_stock_data[['Open', 'High', 'Low', 'Volume','Close']], left_index=True, right_index=True, how='left')
preds_df['shifted_prices'] = preds_df['Close'].shift(-abs(5))

In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(y=preds_df['Predicted'], x=preds_df.index, mode='lines', name='Predicted', line=dict(color="Grey")))
fig.add_trace(go.Scatter(y=preds_df['shifted_prices'], x=preds_df.index, mode='lines', name='Close (Shifted)', line=dict(color="Blue")))
fig.add_trace(go.Scatter(y=preds_df['Close'], x=preds_df.index, mode='lines', name='Close (Unshifted)', line=dict(color="Orange")))
fig.update_layout(template='ggplot2')
fig.show()

In [None]:
import quantstats as qs

backtester = BackTesting(preds_df, ticker, initial_capital, pct_change_entry=0.05,pct_change_exit=0.02)
backtester.run_simulation()
returns = backtester.pf.returns()
returns.index = returns.index.tz_localize(None)

#html = qs.reports.full(returns, "NDAQ")
qs.reports.basic(returns, "PLTR",rf=0.0025, display=False)


In [None]:
earnings_data = ticker_obj.get_earnings_dates()
earnings_data = earnings_data.reset_index().rename(
    columns={"Earnings Date": "Date", "EPS Estimate": "eps_estimate", "Reported EPS": "eps",
             "Surprise(%)": "eps_surprise"}).sort_values('Date')
earnings_data

In [None]:
import yfinance as yf

ticker_obj = yf.Ticker('CCEP')
q_income_stmt = ticker_obj.get_income_stmt(freq='quarterly').T
q_income_stmt = q_income_stmt.reset_index().rename(columns={"index": "Date"}).sort_values('Date')

q_balance_sheet = ticker_obj.get_balance_sheet(freq='quarterly').T
q_balance_sheet = q_balance_sheet.reset_index().rename(columns={"index": "Date"}).sort_values('Date')

# Combine all metrics into a DataFrame
financial_metrics = pd.DataFrame({
    'ttm_eps' : q_income_stmt['NetIncome'] / q_income_stmt['BasicAverageShares'],
    'pcf': q_balance_sheet['TotalCapitalization'] / q_income_stmt['OperatingIncome'],
    'dte': q_balance_sheet['CurrentLiabilities'] / q_balance_sheet['StockholdersEquity'],
    'roe': q_income_stmt['NetIncome'] / q_balance_sheet['StockholdersEquity'],
    'roa': q_income_stmt['NetIncome'] / q_balance_sheet['TotalAssets'],
    'pts': q_balance_sheet['TotalCapitalization'] / q_income_stmt['TotalRevenue'],
    'evEBITDA': (q_balance_sheet['TotalCapitalization'] + q_balance_sheet['TotalDebt'] - q_balance_sheet[
    'CashAndCashEquivalents']) / q_income_stmt['EBITDA']
})
financial_metrics['Date'] = q_balance_sheet['Date'].dt.tz_localize('America/New_York')
financial_metrics.dropna()

In [None]:
import os
# %%
# Import stock_data dataframe into an Azure SQL database table using SQLAlchemy
#upload_data_sql(stock_data,"SNP600_1day")
#SNP500_1day = fetch_sql_data('SNP500_1day')
#SNP600_1day = fetch_sql_data('SNP600_1day')
#russell2000_1day = fetch_sql_data('russell2000_1day')
#dowjones_1day = fetch_sql_data('dowjones_1day')
#nasdaq_1day = fetch_sql_data('nasdaq_1day')
#stock_data = pd.concat([SNP500_1day, SNP600_1day, dowjones_1day, nasdaq_1day], ignore_index=True)
# Remove duplicates based on the 'Date' and 'Ticker' columns
#stock_data = stock_data[~stock_data.index.duplicated(keep='first')]
# Before conversion
#print("Column types before:", [type(col).__name__ for col in training_data.columns])

# Apply conversion
#training_data.columns = [str(col) for col in training_data.columns]

# After conversion
#print("Column types after:", [type(col).__name__ for col in training_data.columns])