In [None]:
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
from sklearn.utils.class_weight import compute_class_weight

# Custom libraries
from Components.ModelDataset import DataModule
from Components.TrainModel import LSTMClassifierModel
from Components.TickerData import TickerData
from Components.BackTesting import BackTesting

# Torch ML libraries
import torch
import torch.nn as nn
from torch.optim import AdamW

In [None]:
#TODO: Feature importance with SHAP values and plot
#TODO: hyperparameter tuning
#TODO: Class weights

In [None]:
device = "mps" if torch.backends.mps.is_available() else "cpu"
print(f"Using {device} device")

In [None]:
# Set the Wikipedia page title and section header
tickers = pd.read_html("https://en.wikipedia.org/wiki/Nasdaq-100")[4]
# Clean up the dataframe
tickers = tickers.iloc[:, [1]].to_numpy().flatten()

In [None]:
tickers = ['IONQ','QBTS','RGTI']
training_dfs = []
stocks_dfs = []
for ticker in tickers:
    training_data, raw_stock_data = TickerData(ticker,days=365).process_all()
    training_dfs.append(training_data)
    stocks_dfs.append(raw_stock_data)

training_data = pd.concat(training_dfs, ignore_index=False)
stock_data = pd.concat(stocks_dfs, ignore_index=False)

In [None]:
training_data = pd.read_csv("Data/NASDAQ_100_TrainingData_2022-3-24_2025_03_24.csv")
training_data = training_data.set_index(training_data['Date']).drop(columns=['Date'])

In [None]:
stock_data = pd.read_csv("Data/NASDAQ_100_StockData_2022-3-24_2025_03_24.csv")
stock_data = stock_data.set_index(stock_data['Date']).drop(columns=['Date'])

In [None]:
data_module = DataModule(training_data, seq_length=10, batch_size=32)
train_loader = data_module.train_loader
train_labels = np.array(data_module.train_dataset.labels) 
classes = np.unique(train_labels)
class_weights = compute_class_weight(class_weight='balanced', classes=classes, y=train_labels)
weights_tensor = torch.FloatTensor(class_weights).to(device)

eval_loader = data_module.eval_loader
test_loader = data_module.test_loader

In [None]:
 # Define model parameters
input_size = 33  # number of features
hidden_size = 64
num_layers = 2
num_classes = 3   # multiclass classification

# Instantiate the unified model
model = LSTMClassifierModel(input_size, hidden_size, num_layers, num_classes)
#model = CNNClassifierModel(input_size, num_classes)
#model = ConvLSTMClassifierModel(input_size, hidden_size, num_layers, num_classes)

# Set up loss and optimizer
unique_classes = np.unique(training_data['Target'].values)
class_weights = compute_class_weight('balanced', classes=unique_classes, y=training_data['Target'].values)
class_weights_tensor = torch.FloatTensor(class_weights)

# Compute class weights
criterion = nn.CrossEntropyLoss(weight=weights_tensor)
optimizer = AdamW(model.parameters(), lr=0.0001, weight_decay=1e-5)

history = model.train_model(train_loader, eval_loader, test_loader, criterion, optimizer, epochs=20)

In [None]:
# Function to get predictions on the entire dataset
def get_predictions(model, df, seq_length=10):
    predictions = []
    dates = []
    actuals = []
    tickers = []
    confidences = []

    for i in range(seq_length, len(df)):
        # Get sequence
        sequence = df.iloc[i - seq_length:i].drop(columns=['Ticker']).values.astype(np.float32)
        sequence_tensor = torch.tensor(sequence, dtype=torch.float32).unsqueeze(0)

        # Get date, actual value, and ticker for the current index
        date = df.index[i]
        actual = df['Target'].iloc[i]
        ticker = df['Ticker'].iloc[i]

        # Make prediction
        model.eval()
        with torch.no_grad():
            output = model(sequence_tensor)
            probabilities = torch.softmax(output, dim=1)  # Convert outputs to probabilities
            confidence, pred = torch.max(probabilities, 1)  # Get confidence and predicted class

        predictions.append(pred.item())
        confidences.append(confidence.item())  # Store the confidence score
        dates.append(date)
        actuals.append(actual)
        tickers.append(ticker)

    # Create DataFrame with predictions
    preds_df = pd.DataFrame({
        'Date': dates,
        'Ticker': tickers,
        'Actual': actuals,
        'Predicted': predictions,
        'Confidence': confidences  # Add confidence scores to the DataFrame
    })
    preds_df['entry_signal'] = preds_df['Predicted'] == 2  # Buy signal
    preds_df['exit_signal'] = preds_df['Predicted'] == 1  # Sell signal

    return preds_df

# Get predictions
preds_df = get_predictions(model, training_data)

merged_df = pd.merge(stock_data, preds_df, on=['Date', 'Ticker'], how='inner')

In [None]:
# Plot training, evaluation and testing metrics
def plot_training_history(history, y_true=None, y_pred=None):
    # Create subplots for loss and accuracy
    fig = make_subplots(rows=1, cols=2, subplot_titles=('Loss', 'F1 Score'))

    # Plot losses
    fig.add_trace(go.Scatter(y=history['train_loss'], name='Train Loss', line=dict(color='blue')), row=1, col=1)
    fig.add_trace(go.Scatter(y=history['eval_loss'], name='Eval Loss', line=dict(color='orange')), row=1, col=1)
    fig.add_trace(go.Scatter(y=history['test_loss'], name='Test Loss', line=dict(color='green')), row=1, col=1)

    # Plot f1
    fig.add_trace(go.Scatter(y=history['eval_f1'], name='Eval F1 Score', line=dict(color='orange')), row=1, col=2)
    fig.add_trace(go.Scatter(y=history['test_f1'], name='Test F1 Score', line=dict(color='green')), row=1, col=2)


    fig.update_layout(
        title='Training Metrics',
        xaxis_title='Epochs',
        height=700,
        template='plotly_white',
        legend=dict(orientation="h", yanchor="bottom", y=1.02)
    )

    fig.update_yaxes(title_text="Loss", row=1, col=1)
    fig.update_yaxes(title_text="F1", row=1, col=2)

    return fig
# Plot metrics
plot_training_history(history)

In [None]:
# Create a combined plot with stock prices and prediction markers
def plot_combined_predictions(data, ticker):
    # Filter for a particular ticker
    if type(ticker) == str:
        data = data[data['Ticker'] == ticker]
    else:
        return "Ticker provided is not a valid value"

    # Create figure
    fig = go.Figure()

    # Plot stock price trend line
    fig.add_trace(go.Scatter(
        x=data['Date'],
        y=data['Close'],
        mode='lines',
        name='Stock Price',
        line=dict(width=1)
    ))

    # Split signals by type and correctness
    buy_signals = data[data['Predicted'] == 2]
    sell_signals = data[data['Predicted'] == 1]
    hold_signals = data[data['Predicted'] == 0]

    # Correct/incorrect buy signals
    correct_buy = buy_signals[buy_signals['Predicted'] == buy_signals['Actual']]
    incorrect_buy = buy_signals[buy_signals['Predicted'] != buy_signals['Actual']]

    # Correct/incorrect sell signals
    correct_sell = sell_signals[sell_signals['Predicted'] == sell_signals['Actual']]
    incorrect_sell = sell_signals[sell_signals['Predicted'] != sell_signals['Actual']]

    # Correct/incorrect hold signals
    correct_hold = hold_signals[hold_signals['Predicted'] == hold_signals['Actual']]
    incorrect_hold = hold_signals[hold_signals['Predicted'] != hold_signals['Actual']]

    # Plot buy signals
    fig.add_trace(go.Scatter(
        x=correct_buy['Date'],
        y=data.loc[correct_buy.index]['Close'],
        mode='markers',
        name='Correct Buy Signal',
        marker=dict(symbol='triangle-up', size=10, color='green')
    ))

    fig.add_trace(go.Scatter(
        x=incorrect_buy['Date'],
        y=data.loc[incorrect_buy.index]['Close'],
        mode='markers',
        name='Incorrect Buy Signal',
        marker=dict(symbol='triangle-up', size=8, color='gray', opacity=0.2)
    ))

    # Plot sell signals
    fig.add_trace(go.Scatter(
        x=correct_sell['Date'],
        y=data.loc[correct_sell.index]['Close'],
        mode='markers',
        name='Correct Sell Signal',
        marker=dict(symbol='triangle-down', size=10, color='red')
    ))

    fig.add_trace(go.Scatter(
        x=incorrect_sell['Date'],
        y=data.loc[incorrect_sell.index]['Close'],
        mode='markers',
        name='Incorrect Sell Signal',
        marker=dict(symbol='triangle-down', size=8, color='gray', opacity=0.2)
    ))

    # Plot hold signals (using a different symbol)
    fig.add_trace(go.Scatter(
        x=correct_hold['Date'],
        y=data.loc[correct_hold.index]['Close'],
        mode='markers',
        name='Correct Hold Signal',
        marker=dict(symbol='circle', size=8, color='blue')
    ))

    fig.add_trace(go.Scatter(
        x=incorrect_hold['Date'],
        y=data.loc[incorrect_hold.index]['Close'],
        mode='markers',
        name='Incorrect Hold Signal',
        marker=dict(symbol='circle', size=6, color='gray', opacity=0.2)
    ))

    # Update layout
    fig.update_layout(
        title=f'{ticker} Stock Price - Actual/Predicted Signals',
        #xaxis_title='Date',
        yaxis_title='Price (USD)',
        template='plotly_dark',
        height=600,
        legend=dict(orientation="h", yanchor="bottom", y=1.02)
    )

    fig.show()

# Call the modified function
plot_combined_predictions(merged_df, 'PLTR')

In [None]:
from Components.BackTesting import BackTesting
import pandas as pd

#merged_df = pd.read_csv('Data/NASDAQ_100_PredictictionsData.csv')

initial_capital = 10000.0
ticker = 'CRWD'
backtester = BackTesting(merged_df, ticker, initial_capital)
results, _ = backtester.run_simulation()
trades_fig, value_fig, exposure_fig = backtester.plot_performance()
trades_fig.show()
value_fig.show()
exposure_fig.show()

In [None]:
# %%
import ray
from ray import tune
from ray.air import session
#from ray.air.checkpoint import Checkpoint
from ray.tune.schedulers import ASHAScheduler
from Components.TrainModel import TunableLSTMClassifier
import os
from functools import partial


# Define a training function for Ray Tune
def train_lstm(config, input_size=33, num_classes=3, train_data=None, val_data=None, test_data=None):
    # Set up device
    device = "mps" if torch.backends.mps.is_available() else "cpu"

    # Create model with the hyperparameter configuration
    model = TunableLSTMClassifier({
        "input_size": input_size,
        "hidden_size": config["hidden_size"],
        "num_layers": config["num_layers"],
        "num_classes": num_classes,
        "dropout_rate": config["dropout_rate"]
    }).to(device)

    # Set up data loaders
    data_module = DataModule(train_data, seq_length=10, batch_size=config["batch_size"])
    train_loader = data_module.train_loader
    val_loader = data_module.eval_loader
    test_loader = data_module.test_loader

    # Set up loss and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = AdamW(
        model.parameters(),
        lr=config["lr"],
        weight_decay=config["weight_decay"]
    )

    # Training loop
    for epoch in range(10):  # Limit epochs for tuning
        # Training
        model.train()
        train_loss = 0.0
        train_correct = 0
        train_total = 0

        for inputs, targets in train_loader:
            inputs, targets = inputs.to(device), targets.to(device)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()

            train_loss += loss.item()
            _, predicted = outputs.max(1)
            train_total += targets.size(0)
            train_correct += predicted.eq(targets).sum().item()

        # Validation
        model.eval()
        val_loss = 0.0
        val_correct = 0
        val_total = 0

        with torch.no_grad():
            for inputs, targets in val_loader:
                inputs, targets = inputs.to(device), targets.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, targets)

                val_loss += loss.item()
                _, predicted = outputs.max(1)
                val_total += targets.size(0)
                val_correct += predicted.eq(targets).sum().item()

        val_accuracy = val_correct / val_total

        # Report metrics to Ray Tune
        session.report({
            "val_accuracy": val_accuracy,
            "val_loss": val_loss / len(val_loader),
            "train_accuracy": train_correct / train_total,
            "train_loss": train_loss / len(train_loader),
            "epoch": epoch
        })


In [None]:
# %%
# Initialize Ray
ray.init()

# Define the hyperparameter search space
config = {
    "lr": tune.loguniform(1e-5, 1e-2),
    "hidden_size": tune.choice([32, 64, 128, 256]),
    "num_layers": tune.choice([1, 2, 3]),
    "dropout_rate": tune.uniform(0.1, 0.5),
    "weight_decay": tune.loguniform(1e-6, 1e-3),
    "batch_size": tune.choice([16, 32, 64, 128])
}

# Configure the ASHA scheduler
scheduler = ASHAScheduler(
    max_t=10,  # Maximum number of epochs
    grace_period=1,
    reduction_factor=2
)

# Set up the tuner
tuner = tune.Tuner(
    tune.with_resources(
        partial(
            train_lstm,
            input_size=33,
            num_classes=3,
            train_data=training_data,
            val_data=None,
            test_data=None
        ),
        resources={"cpu": 2, "gpu": 0}  # Adjust based on your hardware
    ),
    tune_config=tune.TuneConfig(
        metric="val_accuracy",
        mode="max",
        scheduler=scheduler,
        num_samples=50,  # Number of hyperparameter combinations to try
        trial_dirname_creator=lambda trial: f"{trial.trainable_name}_{trial.trial_id[:4]}"
    ),
    param_space=config
)

# Run the hyperparameter search
results = tuner.fit()


In [None]:
# %%
# Get the best hyperparameters
best_result = results.get_best_result("val_accuracy", "max")
best_config = best_result.config
print("Best config:", best_config)

# Extract the best hyperparameters
best_lr = best_config["lr"]
best_hidden_size = best_config["hidden_size"]
best_num_layers = best_config["num_layers"]
best_dropout = best_config["dropout_rate"]
best_weight_decay = best_config["weight_decay"]
best_batch_size = best_config["batch_size"]

# Plot results
df_results = results.get_dataframe()

import matplotlib.pyplot as plt

plt.figure(figsize=(15, 10))

# Plot learning rate vs validation accuracy
plt.subplot(2, 3, 1)
plt.scatter(df_results["config/lr"], df_results["val_accuracy"])
plt.xscale("log")
plt.xlabel("Learning Rate")
plt.ylabel("Validation Accuracy")

# Plot hidden size vs validation accuracy
plt.subplot(2, 3, 2)
plt.scatter(df_results["config/hidden_size"], df_results["val_accuracy"])
plt.xlabel("Hidden Size")
plt.ylabel("Validation Accuracy")

# Plot num_layers vs validation accuracy
plt.subplot(2, 3, 3)
plt.scatter(df_results["config/num_layers"], df_results["val_accuracy"])
plt.xlabel("Number of Layers")
plt.ylabel("Validation Accuracy")

# Plot dropout vs validation accuracy
plt.subplot(2, 3, 4)
plt.scatter(df_results["config/dropout_rate"], df_results["val_accuracy"])
plt.xlabel("Dropout Rate")
plt.ylabel("Validation Accuracy")

# Plot weight decay vs validation accuracy
plt.subplot(2, 3, 5)
plt.scatter(df_results["config/weight_decay"], df_results["val_accuracy"])
plt.xscale("log")
plt.xlabel("Weight Decay")
plt.ylabel("Validation Accuracy")

# Plot batch size vs validation accuracy
plt.subplot(2, 3, 6)
plt.scatter(df_results["config/batch_size"], df_results["val_accuracy"])
plt.xlabel("Batch Size")
plt.ylabel("Validation Accuracy")

plt.tight_layout()
plt.show()


In [None]:
#ticker.get_balance_sheet(freq='quarterly')
#ticker.get_calendar()
#ticker.get_cash_flow(freq='quarterly')
#earnings_data = ticker.get_earnings_dates()
#income_statement = ticker.get_income_stmt(freq='yearly').T
#ticker.get_institutional_holders()
#ticker.get_recommendations()
#ticker.get_sustainability()

In [None]:
# define a function to fetch the options data for a given ticker symbol
#def fetch_options_data(ticker_symbol):
    #ticker = yf.Ticker(ticker_symbol)
#    options_dates = ticker.options
#    options_data = ticker.option_chain(date='2025-03-21')
#    return options_data.calls, options_data.puts
##ionq_stock_data = ionq_stock_data.sort_values(by='Date', ascending=False)