# Deeptech M&A Momentum: Backtest Engine and Evaluation

## Phase 5: Backtesting, metric calculation, and final reporting

This notebook represents the culmination of the project, linking the derived trading signals (Phase 4) with real-world market prices (using Yahoo Finance) to calculate the strategy's historical performance.

**Key Deliverables:**
1. Full backtest execution using the optimal frequency signal.
2. Calculation of key financial metrics: Sharpe Ratio, Cumulative Return, and Max Drawdown.
3. Visualization of strategy performance vs. S\&P 500 benchmark.

---

### Setup and configuration

In [13]:
# --- Imports ---
from pathlib import Path
import sys
from typing import Dict, List, Tuple

import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import polars as pl 

sys.path.append(str(Path.cwd().parent.parent / "src"))
import market_data
import metrics

In [14]:
# --- Configuration ---
# Initial Capital for P&L tracking (fixed notional concept)
INITIAL_CAPITAL_USD = 100_000_000 
print(f"Initial Capital for Backtest: ${INITIAL_CAPITAL_USD:,.0f}")

# The user-defined mapping linking Deeptech Sectors to tradable ETFs/Indexes
SECTOR_TICKER_MAP: Dict[str, str] = {
    "Gene Therapy / CRISPR": "ARKG",
    "Advanced Computing / Quantum Computing": "QTUM",
    "Green Hydrogen Infrastructure": "HYDR",
    "Energy Transition Metals": "REMX",
    "Advanced Battery Chemistry / Storage": "LIT",
    "Advanced Manufacturing": "PRNT",
    "Aerospace Defense Systems Integration": "ITA",
    "Advanced Materials": "FXZ"
}
BENCHMARK_TICKER = market_data.BENCHMARK_TICKER # ^GSPC (S&P 500)

# Set the optimal frequency based on Phase 3/4 exploration (default to 3mo/quarterly)
OPTIMAL_FREQ = "3mo" 
# CORRECTED: Load from data/outputs/ with the new filename format
SIGNAL_FILE = Path(f"../../data/outputs/4.0_discrete_signals_{OPTIMAL_FREQ}.csv") 

# The historical period for the backtest
START_DATE = "2018-01-01"
END_DATE = "2024-01-01"
# Use a weekly interval ('1wk') for market data to align with the periodic signals
MARKET_INTERVAL = '1wk' 

print(f"Optimal Signal Frequency Selected: {OPTIMAL_FREQ}")
print(f"Backtesting Interval: {MARKET_INTERVAL}")

Initial Capital for Backtest: $100,000,000
Optimal Signal Frequency Selected: 3mo
Backtesting Interval: 1wk


### Utility function: backtest engine and metric calculation


In [15]:
# ### Utility Function: Backtest Engine (Fixed Capital)

# %%
def simple_backtest_engine(
    df_signal: pl.DataFrame,
    df_prices: pl.DataFrame,
    initial_capital: float
) -> Tuple[pl.DataFrame, Dict[str, float]]:
    """
    Executes a simplified, equally weighted, long-only backtest using simple returns 
    and tracking P&L based on fixed initial capital.
    """
    
    # 1. Calculate Simple Returns
    df_returns = df_prices.sort("Ticker", "Date").with_columns(
        (pl.col("Adj_Close") / pl.col("Adj_Close").shift(1).over("Ticker") - 1).alias("simple_return")
    ).drop_nulls()

    # 2. Prepare the signal data (Rename and map)
    df_signal_map = df_signal.with_columns([
        pl.col("announced_date").str.to_date().alias("Date"),
        pl.col("deeptech_sector").replace(SECTOR_TICKER_MAP).alias("Ticker")
    ]).select(["Date", "Ticker", "discrete_signal"])

    # 3. Merge signals with returns (event alignment)
    df_returns_filtered = df_returns.filter(pl.col("Date").is_in(df_signal_map.get_column("Date").to_list()))

    df_merged = df_signal_map.join(
        df_returns_filtered,
        on=["Date", "Ticker"],
        how="inner"
    ).sort("Date")
    
    print(f"  Merged {len(df_merged):,} periods of signals and returns.")

    # 4. Calculate Strategy Weights and Returns
    df_long_count = df_merged.group_by("Date").agg(
        pl.sum("discrete_signal").alias("long_position_count")
    )
    df_merged = df_merged.join(df_long_count, on="Date", how="left")

    df_merged = df_merged.with_columns(
        pl.when(pl.col("long_position_count") > 0)
        .then(pl.col("discrete_signal") / pl.col("long_position_count"))
        .otherwise(0.0)
        .alias("asset_weight")
    )

    df_strategy_returns = df_merged.with_columns(
        (pl.col("asset_weight") * pl.col("simple_return")).alias("weighted_return")
    ).group_by("Date").agg(
        pl.sum("weighted_return").alias("Strategy_Return")
    ).sort("Date")
    
    # 5. Benchmark Return (S&P 500)
    df_benchmark_returns = df_returns.filter(pl.col("Ticker") == BENCHMARK_TICKER).select("Date", pl.col("simple_return").alias("Benchmark_Return"))
    
    # Final consolidated returns
    df_consolidated = df_strategy_returns.join(df_benchmark_returns, on="Date", how="inner").drop_nulls()
    
    # 6. Calculate Metrics
    strategy_metrics = metrics.calculate_all_metrics(df_consolidated.get_column("Strategy_Return"))
    
    # 7. Cumulative Return with guard
    strategy_cum_series = (1 + df_consolidated.get_column("Strategy_Return")).cum_prod()
    strategy_metrics["cumulative_return"] = strategy_cum_series[-1] - 1 if len(strategy_cum_series) else 0.0
    
    return df_consolidated, strategy_metrics

### Step 5.1 & 5.2: Execute backtest and calculate metrics

In [None]:
from importlib import reload
reload(metrics)
print("=" * 60)
print(f"PHASE 5 | Running Backtest for Optimal Frequency: {OPTIMAL_FREQ}")
print("=" * 60)

if not SIGNAL_FILE.exists():
    raise FileNotFoundError(f"Discrete signal file not found at {SIGNAL_FILE}. Please run Phase 4 first.")

# 1. Load the discrete signal file
df_signal = pl.read_csv(SIGNAL_FILE)
print(f"✓ Loaded {len(df_signal):,} discrete signals.")

# 2. Define the tickers to fetch
tickers_to_fetch = list(SECTOR_TICKER_MAP.values()) + [BENCHMARK_TICKER]

# 3. Fetch historical market data
df_prices = market_data.get_historical_prices(
    tickers_to_fetch, 
    START_DATE, 
    END_DATE,
    interval=MARKET_INTERVAL
)
if df_prices.is_empty():
    sys.exit("Market data retrieval failed. Cannot run backtest.")

# 4. Run the backtest engine
df_returns, strategy_metrics = simple_backtest_engine(df_signal, df_prices, INITIAL_CAPITAL_USD)

# 5. Benchmark Metrics (S&P 500)
benchmark_returns = df_returns.get_column("Benchmark_Return")
benchmark_metrics = metrics.calculate_all_metrics(benchmark_returns)
benchmark_cum_series = (1 + benchmark_returns).cum_prod()
benchmark_metrics["cumulative_return"] = benchmark_cum_series[-1] - 1 if len(benchmark_cum_series) else 0.0

# Helpers
pct = lambda v: f"{(0.0 if v is None else v) * 100:.2f}%"
num = lambda v: f"{(0.0 if v is None else v):.2f}"

# --- Strategy Performance ---
print("\n--- Strategy Performance Metrics (Deeptech M&A Momentum) ---")
print(f"Time Horizon: {df_returns.get_column('Date').min()} to {df_returns.get_column('Date').max()}")
print(f"Total Periods: {len(df_returns):,}")
print(f"Annualization Factor: {metrics.ANNUALIZATION_FACTOR} (Weekly Data)")
print(f"Strategy Cumulative Return: {pct(strategy_metrics.get('cumulative_return'))}")
print(f"Strategy Sharpe Ratio:      {num(strategy_metrics.get('sharpe_ratio'))}")
print(f"Strategy Sortino Ratio:     {num(strategy_metrics.get('sortino_ratio'))}")
print(f"Strategy Max Drawdown:      {pct(strategy_metrics.get('max_drawdown'))}")
print(f"Strategy Hit Ratio:         {pct(strategy_metrics.get('hit_ratio'))}")

# --- Benchmark Performance ---
print("\n--- Benchmark Performance Metrics (S&P 500) ---")
print(f"Benchmark Cumulative Return: {pct(benchmark_metrics.get('cumulative_return'))}")
print(f"Benchmark Sharpe Ratio:      {num(benchmark_metrics.get('sharpe_ratio'))}")
print(f"Benchmark Sortino Ratio:     {num(benchmark_metrics.get('sortino_ratio'))}")
print(f"Benchmark Max Drawdown:      {pct(benchmark_metrics.get('max_drawdown'))}")
print(f"Benchmark Hit Ratio:         {pct(benchmark_metrics.get('hit_ratio'))}" )

PHASE 5 | Running Backtest for Optimal Frequency: 3mo
✓ Loaded 248 discrete signals.
Fetching 1wk data for 9 tickers...
✓ Fetched 2,598 market data points.
  Merged 21 periods of signals and returns.
✓ Fetched 2,598 market data points.
  Merged 21 periods of signals and returns.


TypeError: unsupported operand type(s) for *: 'NoneType' and 'float'