In [26]:
import pandas as pd
import numpy as np
import itertools
import os
from datetime import datetime
from statsmodels.tsa.stattools import coint, adfuller
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
import seaborn as sns
import itertools
from scipy.stats import linregress
from joblib import Parallel, delayed
from tqdm import tqdm
import numba

In [27]:
class PairSelector:
    def __init__(self, pair_metadata_df, df_main, coint_threshold=0.05, corr_threshold=0.7, half_life_threshold=20):
        """
        Parameters:
        - pair_metadata_df: DataFrame containing precomputed pair info (cointegration p-values, correlations, half-life).
        - coint_threshold: p-value threshold for cointegration.
        - corr_threshold: correlation threshold for correlation.
        - half_life_threshold: Max allowed half-life for selected pairs.
        """
        self.pair_df = pair_metadata_df
        self.df = df_main
        self.coint_threshold = coint_threshold
        self.corr_threshold = corr_threshold
        self.half_life_threshold = half_life_threshold

    def select_pairs(self, current_date):
        """
        Filters pairs based on hybrid selection: 
        Cointegration p-value, correlation threshold, and half-life constraint.
        Returns a list of tuples: (permno_black, permno_white)
        """
        # Correct way to extract group_ids based on trading_start date
        group_ids = self.df[
            (self.df['trading_start'] <= current_date) & 
            (self.df['trading_start'] > current_date - pd.offsets.QuarterEnd(1))
        ]['group_id'].unique()
        
        if len(group_ids) == 0:
            return []  # No group found for this date
    
        # Filter pairs based on group_ids
        pairs_on_group = self.pair_df[self.pair_df['group_id'].isin(group_ids)]
        
        selected = pairs_on_group[
            (pairs_on_group['p_value'] < self.coint_threshold) &
            (pairs_on_group['correlation'] >= self.corr_threshold) &
            (pairs_on_group['half_life'] <= self.half_life_threshold)
        ]
        
        return list(zip(selected['permno_black'], selected['permno_white']))


In [50]:
def generate_signals(z_diff, threshold):
    signals = np.empty(z_diff.shape, dtype=object)
    for i in range(len(z_diff)):
        if z_diff[i] >= threshold:
            signals[i] = 'short_black_long_white'
        elif z_diff[i] <= -threshold:
            signals[i] = 'long_black_short_white'
        else:
            signals[i] = ''
    return signals

def process_group_signal(group_id, group_df_main_dict, df_pairs_group, z_col, zscore_threshold, df_main):
    if group_id not in group_df_main_dict:
        return pd.DataFrame()  # Safe exit if group_id missing

    group_df_main = group_df_main_dict[group_id]
    z_map = group_df_main.set_index('permno')[z_col].to_dict()

    # Fast mapping instead of merging
    df_pairs_group['z_black'] = df_pairs_group['permno_black'].map(z_map)
    df_pairs_group['z_white'] = df_pairs_group['permno_white'].map(z_map)
    df_pairs_group['z_diff'] = df_pairs_group['z_black'] - df_pairs_group['z_white']

    # ✅ Assign 'date' directly from df_main using group_id mapping
    group_dates = df_main[df_main['group_id'] == group_id]['date'].unique()
    if len(group_dates) == 1:
        df_pairs_group['date'] = group_dates[0]
    elif len(group_dates) > 1:
        df_pairs_group['date'] = min(group_dates)  # or use max() depending on your logic
    else:
        df_pairs_group['date'] = pd.NaT

    # Generate signals efficiently
    z_diff_arr = df_pairs_group['z_diff'].values
    df_pairs_group['signal'] = generate_signals(z_diff_arr, zscore_threshold)
    df_pairs_group = df_pairs_group[df_pairs_group['signal'] != '']  # Filter valid signals only

    return df_pairs_group.dropna(subset=['signal', 'date'])  # Ensure 'date' is available

class SignalGenerator:
    def __init__(self, df_main, df_pairs, zscore_method='ou', zscore_threshold=1.5, lookback_period=20):
        self.df_main = df_main
        self.df_pairs = df_pairs
        self.zscore_method = zscore_method
        self.zscore_threshold = zscore_threshold
        self.lookback_period = lookback_period
        self.precomputed_signals = None

    def precompute_signals_parallel(self, horizon=5, n_jobs=4):
        z_col = f'z_{self.zscore_method}_{horizon}d_lb{self.lookback_period}'
        
        # Precompute the group dictionary to avoid repeated filtering
        group_df_main_dict = {
            group_id: df_group[['permno', z_col]].dropna()
            for group_id, df_group in self.df_main.groupby('group_id')
        }

        group_ids = self.df_pairs['group_id'].unique()

        parallel_results = Parallel(n_jobs=n_jobs)(
            delayed(process_group_signal)(
                group_id,
                group_df_main_dict,
                self.df_pairs[self.df_pairs['group_id'] == group_id],
                z_col,
                self.zscore_threshold,
                self.df_main  # Pass df_main for correct date assignment
            )
            for group_id in tqdm(group_ids, desc="Processing Groups")
        )


        # Concatenate non-empty results only
        results = [df for df in parallel_results if not df.empty]
        if results:
            self.precomputed_signals = pd.concat(results).reset_index(drop=True)
        else:
            self.precomputed_signals = pd.DataFrame()  # Handle case when no signals generated

    def generate_signals(self, date):
        if self.precomputed_signals is None or self.precomputed_signals.empty:
            raise ValueError("Signals not precomputed. Run `precompute_signals_parallel()` first.")

        signals_today = self.precomputed_signals[self.precomputed_signals['date'] == date]
        return signals_today[['date', 'permno_black', 'permno_white', 'signal', 'z_diff']].to_dict('records')

    def get_current_z_diff(self, trade, date, horizon):
        z_col = f'z_{self.zscore_method}_{horizon}d_lb{self.lookback_period}'

        z_black = self.df_main.loc[
            (self.df_main['permno'] == trade['permno_black']) & 
            (self.df_main['date'] == date), 
            z_col
        ]
        z_white = self.df_main.loc[
            (self.df_main['permno'] == trade['permno_white']) & 
            (self.df_main['date'] == date), 
            z_col
        ]

        if z_black.empty or z_white.empty:
            return None

        return z_black.values[0] - z_white.values[0]


In [48]:
class WeightManager:
    def __init__(self, total_capital=1_000_000_000, liquidity_constraint=0.1):
        """
        Parameters:
        - total_capital: Total capital available for the quarter.
        - liquidity_constraint: Maximum 10% of 20-day average volume.
        """
        self.total_capital = total_capital
        self.liquidity_constraint = liquidity_constraint

    def allocate_portfolio_weights(self, portfolios):
        """
        Allocates capital across portfolios based on the number of pairs.
        portfolios: Dict of {portfolio_id: list_of_pairs}
        Returns: {portfolio_id: allocated_capital}
        """
        total_pairs = sum(len(pairs) for pairs in portfolios.values())
        allocation = {}
        for portfolio_id, pairs in portfolios.items():
            allocation[portfolio_id] = (len(pairs) / total_pairs) * self.total_capital
        return allocation

    def allocate_pair_weights(self, pairs_volatility, available_capital):
        """
        Allocates capital within a portfolio based on inverse volatility of pairs.
        pairs_volatility: Dict of {pair: volatility}
        available_capital: Capital allocated to the portfolio.
        Returns: {pair: allocated_investment}
        """
        inv_vols = {pair: 1 / vol if vol > 0 else 0 for pair, vol in pairs_volatility.items()}
        total_inv_vol = sum(inv_vols.values())
        allocation = {}
        for pair, inv_vol in inv_vols.items():
            allocation[pair] = (inv_vol / total_inv_vol) * available_capital
        return allocation

    def apply_liquidity_constraint(self, investment, adv20):
        """
        Adjust investment if it exceeds the liquidity constraint.
        """
        max_investment = adv20 * self.liquidity_constraint
        return min(investment, max_investment)

In [30]:
class BudgetManager:
    def __init__(self, initial_capital=1_000_000_000,
                 t_cost_per_share=0.01,
                 long_spread=0.015, short_spread=0.010,
                 base_rate=0.05):                       # ↖ Fed-funds proxy
        self.cash   = initial_capital
        self.t_cost = t_cost_per_share
        self.long_spread  = long_spread
        self.short_spread = short_spread

    def _transaction_cost(self, shares):                 # cost = shares*fee
        return shares * self.t_cost

    def _fin_cost(self, notional, days_held, side, fed_rate):
        spread = self.long_spread if side == "long" else self.short_spread
        daily_r = (self.base_rate + spread) / 252        # trading-day basis
        return notional * daily_r * days_held

    # public helpers
    def book_entry(self, cash_out):  
        self.cash -= cash_out
        
    def book_exit (self, cash_in ):  
        self.cash += cash_in
        
    def available_cash(self):        
        return self.cash

In [31]:
class ExitManager:
    def __init__(self, max_holding_days=20):
        """
        Parameters:
        - max_holding_days: Maximum number of days a trade can remain open.
        """
        self.max_holding_days = max_holding_days

    def check_exit_conditions(self, trade, current_zscore, current_date):
        """
        Checks exit conditions for a trade.
        - trade: Dictionary containing trade details.
        - current_zscore: Current z-diff value for the pair.
        - current_date: Current trading date.
        
        Returns:
        - exit_flag: Boolean indicating if trade should be closed.
        - exit_reason: Reason for exit ('mean_reversion', 'max_holding', 'other').
        """
        holding_days = (current_date - trade['entry_date']).days

        # Condition 1: Z-score mean reversion toward zero
        if trade['side'] == 'short_black_long_white' and current_zscore <= 0:
            return True, 'mean_reversion'
        elif trade['side'] == 'long_black_short_white' and current_zscore >= 0:
            return True, 'mean_reversion'

        # Condition 2: Max holding period reached
        if holding_days >= self.max_holding_days:
            return True, 'max_holding'

        return False, 'hold'

In [32]:
class TradeLog:
    def __init__(self):
        self.records = []

    def record_trade(self, trade_details):
        """
        Stores trade information.
        """
        self.records.append(trade_details)

    def to_dataframe(self):
        """
        Converts trade log to pandas DataFrame.
        """
        return pd.DataFrame(self.records)

    def record(self, rec_dict): 
        self._log.append(rec_dict)
        
    def df(self):              
        return pd.DataFrame(self._log)

In [33]:
class BacktestEngine:
    def __init__(self, df, pair_selector, signal_generator, weight_manager, budget_manager, exit_manager, trade_log, horizon=5):
        self.df = df
        self.pair_selector = pair_selector
        self.signal_generator = signal_generator
        self.weight_manager = weight_manager
        self.budget_manager = budget_manager
        self.exit_manager = exit_manager
        self.trade_log = trade_log
        self.open_trades = []
        self.horizon = horizon

    def run(self, start_date, end_date):
        self.df['trading_start'] = pd.to_datetime(self.df['trading_start'], errors='coerce')

        for cur_dt in pd.date_range(start_date, end_date):  # business days
            # 1) Select pairs based on cointegration, correlation, and half-life filters
            pairs = self.pair_selector.select_pairs(cur_dt)
            print("pairs: ", pairs)

            # Use precomputed signals directly from SignalGenerator (much faster)
            signals = self.signal_generator.generate_signals(cur_dt)
            print("signals: ", signals)

            # 2) Determine position sizes for each signal based on volatility
            vol_map = {(s['permno_black'], s['permno_white']):
                       self._spread_vol(s['permno_black'], s['permno_white'],
                                        cur_dt, self.signal_generator.lookback_period)
                       for s in signals}

            portfolio_cap = self.budget_manager.available_cash()
            alloc_per_pair = self.weight_manager.allocate_pair_weights(vol_map, portfolio_cap)
            print("allocation: ", alloc_per_pair)

            # 3) Enter New Positions
            for sig in signals:
                if self.budget_manager.available_cash() <= 0:
                    break
                key = (sig['permno_black'], sig['permno_white'])
                print("preparing to enter: ", key)
                if any((t['permno_black'], t['permno_white']) == key for t in self.open_trades):
                    continue  # already open

                notional = alloc_per_pair.get(key, 0)
                if notional <= 0:
                    continue

                # 50/50 capital split between both legs
                inv_black = inv_white = notional / 2

                px_black = self._price(sig['permno_black'], cur_dt)
                px_white = self._price(sig['permno_white'], cur_dt)

                if np.isnan(px_black) or np.isnan(px_white):
                    continue

                sh_black = inv_black / px_black
                sh_white = inv_white / px_white

                # Transaction costs
                entry_tc = self.budget_manager._transaction_cost(sh_black) + self.budget_manager._transaction_cost(sh_white)

                print("Costs: ", entry_tc)

                self.budget_manager.book_entry(notional + entry_tc)

                trade = dict(
                    entry_date=cur_dt,
                    permno_black=sig['permno_black'],
                    permno_white=sig['permno_white'],
                    side=sig['signal_side'],  # Use precomputed 'signal_side'
                    z_diff_entry=sig['z_diff'],
                    investment_black=inv_black,
                    investment_white=inv_white,
                    shares_black=sh_black,
                    shares_white=sh_white,
                    entry_price_black=px_black,
                    entry_price_white=px_white,
                    entry_transaction_cost=entry_tc,
                    status='open'
                )
                self.open_trades.append(trade)
                self.trade_log.record(trade)

                print("Entered: ", trade)

            # 4) Exit Logic
            for trade in self.open_trades[:]:
                cur_z = self.signal_generator.get_current_z_diff(trade, cur_dt, self.horizon)
                print("z score: ", cur_z)
                exit_flag, reason = self.exit_manager.check_exit_conditions(trade, cur_z, cur_dt)

                if not exit_flag:
                    continue

                # Fetch exit prices
                px_b = self._price(trade['permno_black'], cur_dt)
                px_w = self._price(trade['permno_white'], cur_dt)
                if np.isnan(px_b) or np.isnan(px_w):
                    continue  # Cannot price; defer exit

                days_held = (cur_dt - trade['entry_date']).days or 1

                # Calculate exit transaction cost
                exit_tc = (self.budget_manager._transaction_cost(trade['shares_black']) + 
                           self.budget_manager._transaction_cost(trade['shares_white']))

                # Financing Costs
                fed_rate_today = self._get_fed_rate(cur_dt)
                fin_cost_long = self.budget_manager._fin_cost(trade['investment_white'], days_held, 'long', fed_rate_today)
                fin_cost_short = self.budget_manager._fin_cost(trade['investment_black'], days_held, 'short', fed_rate_today)
                total_fin_cost = fin_cost_long + fin_cost_short

                # Calculate PnL
                if trade['side'] == 'short_black_long_white':
                    pnl_black = (trade['entry_price_black'] - px_b) * trade['shares_black']
                    pnl_white = (px_w - trade['entry_price_white']) * trade['shares_white']
                else:  # 'long_black_short_white'
                    pnl_black = (px_b - trade['entry_price_black']) * trade['shares_black']
                    pnl_white = (trade['entry_price_white'] - px_w) * trade['shares_white']

                gross_pnl = pnl_black + pnl_white
                net_pnl = gross_pnl - exit_tc - total_fin_cost

                self.budget_manager.book_exit(trade['investment_black'] + trade['investment_white'] + gross_pnl - exit_tc - total_fin_cost)

                # Update Trade Record
                trade.update(dict(
                    exit_date=cur_dt,
                    exit_price_black=px_b,
                    exit_price_white=px_w,
                    exit_transaction_cost=exit_tc,
                    total_financing_cost=total_fin_cost,
                    gross_pnl=gross_pnl,
                    net_pnl=net_pnl,
                    z_diff_exit=cur_z,
                    exit_reason=reason,
                    status='closed'
                ))
                self.trade_log.record(trade)
                self.open_trades.remove(trade)
                print(trade)
                print("------------------------")

    # ──────────────────────────────#
    # Helpers
    # ──────────────────────────────#
    def _price(self, permno, date):
        row = self.df[(self.df.permno == permno) & (self.df.date == date)]
        return row.adj_prc.values[0] if not row.empty else np.nan

    def _spread_vol(self, p1, p2, date, lookback=20):
        end = pd.to_datetime(date)
        start = end - pd.Timedelta(days=lookback * 2)
        sub = self.df[(self.df.date.between(start, end - pd.Timedelta(days=1))) & 
                      (self.df.permno.isin([p1, p2]))]
        if sub.empty:
            return np.nan
        s1 = sub[sub.permno == p1].sort_values('date').adj_prc
        s2 = sub[sub.permno == p2].sort_values('date').adj_prc
        if len(s1) < lookback or len(s2) < lookback:
            return np.nan
        return np.std(s1.values[-lookback:] - s2.values[-lookback:])

    def _get_fed_rate(self, date):
        row = self.df[self.df.date == date]
        return row.fed_funds_rate.values[0] if not row.empty else 0.0

In [34]:
df_pairs = pd.read_csv('corr_coin.csv')
df_merged = pd.read_csv('final_backtest_data.csv')

In [35]:
df_merged[['date', 'trading_start', 'group_id', 'permno', 'fed_funds_rate']]

Unnamed: 0,date,trading_start,group_id,permno,fed_funds_rate
0,2015-04-01,2015-04-01,2015-Q1-08,10001,0.12
1,2015-04-02,2015-04-01,2015-Q1-08,10001,0.12
2,2015-04-06,2015-04-01,2015-Q1-08,10001,0.13
3,2015-04-07,2015-04-01,2015-Q1-08,10001,0.12
4,2015-04-08,2015-04-01,2015-Q1-08,10001,0.12
...,...,...,...,...,...
4789498,2024-12-24,2024-10-01,2024-Q3-00,93436,4.33
4789499,2024-12-26,2024-10-01,2024-Q3-00,93436,4.33
4789500,2024-12-27,2024-10-01,2024-Q3-00,93436,4.33
4789501,2024-12-30,2024-10-01,2024-Q3-00,93436,4.33


In [36]:
df_pairs

Unnamed: 0,group_id,permno_1,permno_2,correlation,adf_stat,p_value,n_obs,half_life
0,2015-Q1-08,10158,78693,0.558967,-1.482951,0.541928,63,4.292377
1,2015-Q1-08,10158,79909,0.515229,-1.884244,0.339509,63,4.895030
2,2015-Q1-08,10158,89307,0.574179,-1.674454,0.444320,63,15.371619
3,2015-Q1-08,10158,89828,0.521051,-2.202160,0.205462,63,4.476190
4,2015-Q1-08,10158,89946,0.511613,-1.394363,0.584975,63,13.421107
...,...,...,...,...,...,...,...,...
2853355,2024-Q2-07,14694,15585,0.576794,-1.665551,0.448944,64,13.012950
2853356,2024-Q2-07,14694,18102,0.560853,-1.491468,0.537728,64,4.654117
2853357,2024-Q2-07,17000,91184,0.538966,-1.926046,0.319909,64,5.775842
2853358,2024-Q2-07,22290,83762,0.508963,-0.396444,0.910691,64,43.719888


In [37]:
def filter_columns_for_backtest(df, zscore_methods, lookback_periods, horizons):
    # Always Required Columns
    base_cols = ['date', 'permno', 'trading_start', 'group_id', 'adj_prc', 'fed_funds_rate', 'adv20']
    
    # Dynamically Generate Required Z-Score Columns
    zscore_cols = [
        f'z_{method}_{horizon}d_lb{lb}' 
        for method in zscore_methods 
        for horizon in horizons 
        for lb in lookback_periods
    ]
    
    # Add Future Return Columns for Each Horizon (if needed for signal generation or evaluation)
    future_return_cols = [f'future_cumret_{horizon}d' for horizon in horizons]

    # Final List of Columns Needed
    required_cols = base_cols + zscore_cols + future_return_cols

    # Filter Only Existing Columns (in case some z-scores haven't been computed yet)
    existing_cols = [col for col in required_cols if col in df.columns]

    return df[existing_cols].copy()

In [38]:
# Add random half_life values between 1 and 20
df_pairs.rename(columns={'permno_1': 'permno_black', 'permno_2': 'permno_white'}, inplace=True)

In [39]:
df_pairs

Unnamed: 0,group_id,permno_black,permno_white,correlation,adf_stat,p_value,n_obs,half_life
0,2015-Q1-08,10158,78693,0.558967,-1.482951,0.541928,63,4.292377
1,2015-Q1-08,10158,79909,0.515229,-1.884244,0.339509,63,4.895030
2,2015-Q1-08,10158,89307,0.574179,-1.674454,0.444320,63,15.371619
3,2015-Q1-08,10158,89828,0.521051,-2.202160,0.205462,63,4.476190
4,2015-Q1-08,10158,89946,0.511613,-1.394363,0.584975,63,13.421107
...,...,...,...,...,...,...,...,...
2853355,2024-Q2-07,14694,15585,0.576794,-1.665551,0.448944,64,13.012950
2853356,2024-Q2-07,14694,18102,0.560853,-1.491468,0.537728,64,4.654117
2853357,2024-Q2-07,17000,91184,0.538966,-1.926046,0.319909,64,5.775842
2853358,2024-Q2-07,22290,83762,0.508963,-0.396444,0.910691,64,43.719888


In [40]:
# -------------------------------
# Parameter Options
# -------------------------------
COINTEGRATION_THRESHOLDS = [0.01]#, 0.05, 0.10]  # p-values
CORRELATION_THRESHOLDS = [0.5]#, 0.6, 0.7, 0.8, 0.9]  # correlation values
ZSCORE_METHODS = ['ou', 'classical']
ZSCORE_THRESHOLDS = [1.0]#, 1.5, 2.0]
LOOKBACK_PERIODS = [5]#, 10, 20]
HORIZONS = [5]#, 10, 20]
MAX_HOLDING_DAYS_LIST = [5]#, 10, 20]
INITIAL_CAPITAL = 1_000_000_000

df_merged_filtered = filter_columns_for_backtest(
    df_merged,
    ZSCORE_METHODS,
    LOOKBACK_PERIODS,
    HORIZONS
)

In [41]:
df_merged_filtered.dropna(inplace=True)

# Ensure df_pairs only contains group_ids that exist in df_merged_filtered
valid_group_ids = df_merged_filtered['group_id'].unique()
df_pairs = df_pairs[df_pairs['group_id'].isin(valid_group_ids)]

In [51]:
# -------------------------------
output_dir = "backtest_results"
os.makedirs(output_dir, exist_ok=True)

run_counter = 1

param_combinations = itertools.product(
    COINTEGRATION_THRESHOLDS,
    CORRELATION_THRESHOLDS,
    ZSCORE_METHODS,
    ZSCORE_THRESHOLDS,
    LOOKBACK_PERIODS,
    HORIZONS,
    MAX_HOLDING_DAYS_LIST
)

for (
    COINT_THRESHOLD,
    CORR_THRESHOLD,
    ZSCORE_METHOD,
    ZSCORE_THRESHOLD,
    LOOKBACK_PERIOD,
    HORIZON,
    MAX_HOLDING_DAYS
) in param_combinations:

    print(f"\nRunning Backtest with Parameters:")
    print(f"Cointegration Threshold (p-value): {COINT_THRESHOLD}, Correlation Threshold: {CORR_THRESHOLD}")
    print(f"Z-Score Method: {ZSCORE_METHOD}, Threshold: {ZSCORE_THRESHOLD}")
    print(f"Lookback: {LOOKBACK_PERIOD}, Horizon: {HORIZON}, Max Holding Days: {MAX_HOLDING_DAYS}")

    # Initialize Hybrid Pair Selector
    pair_selector = PairSelector(
        pair_metadata_df=df_pairs,
        df_main=df_merged_filtered,
        coint_threshold=COINT_THRESHOLD,
        corr_threshold=CORR_THRESHOLD,
        half_life_threshold=MAX_HOLDING_DAYS
    )

    signal_generator = SignalGenerator(
        df_main=df_merged_filtered,
        df_pairs=df_pairs,
        zscore_method=ZSCORE_METHOD,
        zscore_threshold=ZSCORE_THRESHOLD,
        lookback_period=LOOKBACK_PERIOD
    )
    signal_generator.precompute_signals_parallel(horizon=HORIZON, n_jobs=4)  # adjust n_jobs based on your CPU cores

    weight_manager = WeightManager(total_capital=INITIAL_CAPITAL)
    budget_manager = BudgetManager(initial_capital=INITIAL_CAPITAL)
    exit_manager = ExitManager(max_holding_days=MAX_HOLDING_DAYS)
    trade_log = TradeLog()

    backtest_engine = BacktestEngine(
        df=df_merged_filtered,
        pair_selector=pair_selector,
        signal_generator=signal_generator,
        weight_manager=weight_manager,
        budget_manager=budget_manager,
        exit_manager=exit_manager,
        trade_log=trade_log,
        horizon=HORIZON
    )

    # Training date cutoff
    backtest_engine.run(start_date='2015-01-01', end_date='2021-12-31')

    # Retrieve Results
    results_df = trade_log.to_dataframe()

    # Create a DataFrame for Hyperparameters
    hyperparams = {
        'COINT_THRESHOLD': COINT_THRESHOLD,
        'CORR_THRESHOLD': CORR_THRESHOLD,
        'ZSCORE_METHOD': ZSCORE_METHOD,
        'ZSCORE_THRESHOLD': ZSCORE_THRESHOLD,
        'LOOKBACK_PERIOD': LOOKBACK_PERIOD,
        'HORIZON': HORIZON,
        'MAX_HOLDING_DAYS': MAX_HOLDING_DAYS,
        'INITIAL_CAPITAL': INITIAL_CAPITAL
    }
    hyperparam_df = pd.DataFrame([hyperparams])

    # Concatenate Hyperparameters and Trade Log
    combined_df = pd.concat([hyperparam_df, results_df], ignore_index=True)

    # Save to CSV
    filename = f"backtest_run_{run_counter}.csv"
    combined_df.to_csv(os.path.join(output_dir, filename), index=False)
    run_counter += 1

    print(f"✅ Results saved to {filename}")


Running Backtest with Parameters:
Cointegration Threshold (p-value): 0.01, Correlation Threshold: 0.5
Z-Score Method: ou, Threshold: 1.0
Lookback: 5, Horizon: 5, Max Holding Days: 5







[A[A[A[A[Aps:   0%|                                | 0/217 [00:00<?, ?it/s]




[A[A[A[A[Aps:   1%|▏                       | 2/217 [00:00<00:12, 17.11it/s]




[A[A[A[A[Aps:   2%|▍                       | 4/217 [00:00<00:12, 17.20it/s]




[A[A[A[A[Aps:   3%|▋                       | 6/217 [00:00<00:15, 13.27it/s]




[A[A[A[A[Aps:   4%|▉                       | 8/217 [00:00<00:15, 13.38it/s]




[A[A[A[A[Aps:   5%|█                      | 10/217 [00:00<00:14, 14.30it/s]




[A[A[A[A[Aps:   6%|█▎                     | 12/217 [00:00<00:18, 11.34it/s]




[A[A[A[A[Aps:   6%|█▍                     | 14/217 [00:01<00:17, 11.64it/s]




[A[A[A[A[Aps:   7%|█▋                     | 16/217 [00:01<00:19, 10.56it/s]




[A[A[A[A[Aps:   8%|█▉                     | 18/217 [00:01<00:17, 11.38it/s]




[A[A[A[A[Aps:   9%|██                     | 20/217 [00:01<00:20,  9.49it/s]




[A[A[A[A[Aps:  10%|██▎                    | 22/217 [00:

pairs:  [(10104, 11154), (10104, 12558), (10104, 41355), (10104, 44601), (10104, 46674), (10104, 73139), (10104, 75460), (10104, 82581), (10104, 89070), (10107, 41355), (10107, 93295), (10138, 10302), (10138, 11600), (10138, 11896), (10138, 14076), (10138, 14702), (10138, 21135), (10138, 34032), (10138, 36468), (10138, 38659), (10138, 39642), (10138, 43772), (10138, 44601), (10138, 44644), (10138, 45751), (10138, 47466), (10138, 62148), (10138, 75853), (10138, 76230), (10138, 76261), (10138, 76804), (10138, 77037), (10138, 80286), (10138, 80320), (10138, 82581), (10138, 84769), (10138, 86211), (10138, 86356), (10138, 86889), (10138, 87487), (10138, 88873), (10138, 89070), (10138, 91041), (10138, 91063), (10138, 91277), (10138, 92852), (10145, 11370), (10145, 12340), (10145, 13314), (10145, 14256), (10145, 16126), (10145, 17005), (10145, 23975), (10145, 31500), (10145, 32870), (10145, 42534), (10145, 52230), (10145, 57809), (10145, 68196), (10145, 75591), (10145, 76639), (10145, 77917),

KeyError: 'date'

In [None]:
def evaluate_performance(trade_log_df, risk_free_rate=0.0, market_returns=None):
    """
    Computes key performance metrics from a trade log DataFrame.

    Parameters:
    - trade_log_df: DataFrame with trade results. Must have 'pnl', 'entry_date', 'exit_date' columns.
    - risk_free_rate: Annualized risk-free rate (e.g., 0.02 for 2%).
    - market_returns: Optional Series or DataFrame with market returns aligned by date.

    Returns:
    - Dictionary of performance metrics.
    """

    # Ensure 'pnl' exists
    if 'pnl' not in trade_log_df.columns:
        raise ValueError("Trade log must contain 'pnl' column.")

    pnl_series = trade_log_df['pnl']
    returns = pnl_series / trade_log_df['investment']

    # Basic Metrics
    total_pnl = pnl_series.sum()
    hit_rate = (pnl_series > 0).mean()

    # Calculate Daily Returns for Time Series Metrics (if exit dates are available)
    if 'exit_date' in trade_log_df.columns and 'entry_date' in trade_log_df.columns:
        trade_log_df['holding_days'] = (pd.to_datetime(trade_log_df['exit_date']) - pd.to_datetime(trade_log_df['entry_date'])).dt.days
        annual_factor = 252  # Assuming 252 trading days

        # Sharpe Ratio
        avg_return = returns.mean()
        std_dev = returns.std()
        sharpe_ratio = (avg_return - risk_free_rate / annual_factor) / std_dev if std_dev > 0 else np.nan
        sharpe_ratio *= np.sqrt(annual_factor)  # Annualize

        # Sortino Ratio
        downside_std = returns[returns < 0].std()
        sortino_ratio = (avg_return - risk_free_rate / annual_factor) / downside_std if downside_std > 0 else np.nan
        sortino_ratio *= np.sqrt(annual_factor)
    else:
        sharpe_ratio = sortino_ratio = np.nan

    # Drawdown Calculations
    cumulative_returns = (1 + returns.fillna(0)).cumprod()
    peak = cumulative_returns.cummax()
    drawdown = (peak - cumulative_returns) / peak
    max_drawdown = drawdown.max()
    avg_drawdown = drawdown.mean()

    # CAPM Alpha & Beta (if market returns provided)
    if market_returns is not None and 'exit_date' in trade_log_df.columns:
        aligned_dates = pd.to_datetime(trade_log_df['exit_date'])
        market_aligned = market_returns.reindex(aligned_dates).fillna(0)

        if len(market_aligned) == len(returns):
            beta, alpha, _, _, _ = linregress(market_aligned.values, returns.values)
        else:
            alpha = beta = np.nan
    else:
        alpha = beta = np.nan

    return {
        'Total PnL': total_pnl,
        'Hit Rate': hit_rate,
        'Sharpe Ratio': sharpe_ratio,
        'Sortino Ratio': sortino_ratio,
        'Max Drawdown': max_drawdown,
        'Average Drawdown': avg_drawdown,
        'CAPM Alpha': alpha,
        'CAPM Beta': beta
    }