In [1]:
import pandas as pd
import numpy as np
from scipy.optimize import minimize
from tqdm import tqdm
import seaborn as sns
import datetime

import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
df = pd.read_csv('final_dataset_backtest.csv')
df

Unnamed: 0,date,permno,ticker,prc,retx,shrout,cfacpr,vol,vwretd,vwretx,...,ou_forecast_20d,z_score_20d,fed_funds_rate,actual_vol_1d_lag1,actual_vol_5d,actual_vol_5d_lag1,actual_vol_10d,actual_vol_10d_lag1,actual_vol_20d,actual_vol_20d_lag1
0,1986-04-01,10008,GACO,-18.50,0.042254,2945.0,1.0,47.95,-0.011717,-0.011773,...,,0.000000,7.49,,,,,,,
1,1986-04-02,10008,GACO,-18.00,-0.027027,2945.0,1.0,231.00,0.001289,0.001186,...,-0.002341,0.000000,7.45,,,,,,,
2,1986-04-03,10008,GACO,-18.25,0.013889,2945.0,1.0,3.50,-0.009560,-0.009571,...,-0.002341,0.000000,7.44,,,,,,,
3,1986-04-04,10008,GACO,-18.25,0.000000,2945.0,1.0,39.50,-0.013887,-0.014239,...,-0.002341,0.000000,6.97,,,,,,,
4,1986-04-07,10008,GACO,-18.25,0.000000,2945.0,1.0,68.00,-0.002678,-0.002752,...,-0.002341,0.000000,7.09,,0.056338,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14686405,2024-12-24,93436,TSLA,462.28,0.073572,3210060.0,1.0,593515.06,0.010566,0.010521,...,0.001722,-0.114998,4.33,,0.131718,0.106439,0.157805,0.145334,0.170305,0.167895
14686406,2024-12-26,93436,TSLA,454.13,-0.017630,3210060.0,1.0,763922.73,0.000346,0.000282,...,0.001722,-0.317544,4.33,,0.095352,0.131718,0.152724,0.157805,0.172878,0.170305
14686407,2024-12-27,93436,TSLA,431.66,-0.049479,3210060.0,1.0,823703.45,-0.010692,-0.010775,...,0.001722,0.117134,4.33,,0.111126,0.095352,0.161946,0.152724,0.182332,0.172878
14686408,2024-12-30,93436,TSLA,417.41,-0.033012,3210060.0,1.0,647054.52,-0.009878,-0.009900,...,0.001722,0.186010,4.33,,0.110521,0.111126,0.159511,0.161946,0.186401,0.182332


In [3]:
# Column-name helper (to get relevant columns based on the horizon (1/5/10/20 days)
def get_forecast_columns(horizon:int, include_actual_vol:bool=False):
    garch  = f"garch_vol_{horizon}d_lag1"
    ou     = f"ou_forecast_{horizon}d"
    z      = f"z_score_{horizon}d"
    if include_actual_vol:
        av        = f"actual_vol_{horizon}d"
        av_lag    = f"{av}_lag1"
        return garch, ou, z, av, av_lag
    return garch, ou, z

In [4]:
def prepare_trade_data(df: pd.DataFrame,
                       horizon: int = 5,
                       z_threshold: float = 1.5,
                       keep_actual_vol: bool = False) -> pd.DataFrame:
    """
    Clean the raw DataFrame so it is ready for the back-test.

    Parameters
    ----------
    df              : raw input DataFrame
    horizon         : forecast horizon in days (1 / 5 / 10 / 20 …)
    keep_actual_vol : if True also require the realised-volatility columns
                      (actual_vol_{horizon}d and its lag-1)

    Returns
    -------
    df_clean : tidy DataFrame with
               • date → datetime64
               • no NaNs in the columns needed by the trading engine
    """
    
    # --- figure-out which columns are needed ------------------------------
    garch_col, ou_col, z_col = get_forecast_columns(horizon)
    required_cols = ['date', 'permno', 'group_id', 'adj_prc',
                     garch_col, ou_col, z_col]

    if keep_actual_vol:
        _, _, _, act_vol, act_vol_lag = get_forecast_columns(horizon,
                                                             include_actual_vol=True)
        required_cols += [act_vol, act_vol_lag]

    # --- validate the DataFrame -------------------------------------------
    missing = [c for c in required_cols if c not in df.columns]
    if missing:
        raise ValueError(f"Missing required column(s) for horizon {horizon}: {missing}")

    # --- drop rows with any NaN in those columns ---------------------------
    df_clean = (df
                .dropna(subset=required_cols)
                .copy())

    # --- make sure 'date' is proper datetime -------------------------------
    if not pd.api.types.is_datetime64_any_dtype(df_clean['date']):
        df_clean['date'] = pd.to_datetime(df_clean['date'])

    # Vectorized signal assignment
    df_clean['signal'] = 0
    df_clean.loc[df_clean[z_col] <= -z_threshold, 'signal'] = 1    # Long signal
    df_clean.loc[df_clean[z_col] >= z_threshold, 'signal'] = -1    # Short signal

    return df_clean

In [5]:
# Constants (parameters)
TCOST_PER_SHARE = 0.10          # $0.10 in & $0.10 out
FIN_SPREAD_LONG = 0.015         # +1.5 % over fed-funds when long
FIN_SPREAD_SHORT = 0.010        # +1.0 % rebate when short
CALENDAR_DAYS   = 365           # for financing accrual
TRADING_DAYS    = 252           # for Sharpe etc.

In [6]:
# Handle cash, costs, portfolios and stock weights
class Finance:
    def __init__(self, initial_aum: float, garch_col: str, ou_col: str):
        self.cash = initial_aum
        self.positions = {}
        self.port_w = {}
        self.prev_q = None
        self.garch_col = garch_col
        self.ou_col = ou_col

    def debit(self, amount: float):
        self.cash = max(self.cash - amount, 0)

    def credit(self, amount: float):
        self.cash += amount

    def calc_cost(self, side: int, shares: int, price: float, ff_rate: float, event: str):
        spread = FIN_SPREAD_LONG if side == 1 else FIN_SPREAD_SHORT
        fin_cost = t_cost = 0.0
        if event in ("open", "hold"):
            fin_cost = price * shares * ((ff_rate + spread) / CALENDAR_DAYS)
        if event in ("open", "close"):
            t_cost = TCOST_PER_SHARE * shares
        return fin_cost, t_cost

    # ────────────────────────────────────────────────────────────────────────────────
    def quarter_weights(self, day_slice: pd.DataFrame, method: str = 'inv_vol', short_bias_factor: float = 1.5):
        """
        Portfolio-level capital allocation with optional short bias.
        
        - method: 'inv_vol' (Inverse Volatility), 'rsk_par' (Risk Parity), 'mean_var' (Mean-Variance)
        - short_bias_factor: Multiplier to favor groups with more short signals (default: 1.5)
        """
        risk = day_slice.groupby('group_id')[self.garch_col].mean()
        groups = risk.index.tolist()
    
        # Calculate the average signal per group to detect short dominance
        signal_avg = day_slice.groupby('group_id')['signal'].mean()  # Negative avg → more shorts
    
        # Compute bias adjustment: Increase weight for groups with negative average signals
        bias_adjustment = 1 + (-signal_avg.clip(upper=0)) * (short_bias_factor - 1)
        # Example: 
        # If avg signal = -1 → bias_adjustment = 1 + (1) * (1.5 - 1) = 1.5
        # If avg signal = 0.5 → bias_adjustment = 1 + 0 = 1
    
        if method in ['inv_vol', 'rsk_par']:
            inv = 1 / risk.replace(0, np.nan)
            adjusted_weights = inv * bias_adjustment
            return (adjusted_weights / adjusted_weights.sum()).to_dict()
    
        elif method == 'mean_var':
            mu = day_slice.groupby('group_id')[self.ou_col].mean().values
            cov = np.diag(risk.values ** 2)
    
            n = len(mu)
            def objective(w, lam=0.1):
                return w @ cov @ w - lam * (w @ mu)
    
            constraints = [{'type': 'eq', 'fun': lambda w: np.sum(w) - 1}]
            bounds = [(0, 1) for _ in range(n)]
    
            res = minimize(objective, x0=np.ones(n) / n, bounds=bounds, constraints=constraints)
            weights = res.x if res.success else np.ones(n) / n
            weights *= bias_adjustment.values  # Apply bias adjustment to optimized weights
            weights /= weights.sum()  # Normalize to sum to 1
    
            return dict(zip(groups, weights))
    
        else:
            raise ValueError(f"Unknown weighting method: {method}")


    # ────────────────────────────────────────────────────────────────────────────────
    def stock_weights(self, group_slice: pd.DataFrame, method: str = 'inv_vol'):
        """
        Stock-level capital allocation within a group:
        - method: 'inv_vol', 'rsk_par', 'mean_var'
        """
        score = group_slice[self.ou_col] / group_slice[self.garch_col]
        score = score.replace([np.inf, -np.inf], np.nan).fillna(0)

        long_m = (group_slice['signal'] == 1) & (score > 0)
        short_m = (group_slice['signal'] == -1) & (score < 0)

        w = {}
        if method == 'inv_vol' or method == 'rsk_par':
            if long_m.any():
                s = score[long_m]
                w.update(dict(zip(
                    group_slice.loc[long_m, 'permno'], s / s.sum()
                )))
            if short_m.any():
                s = -score[short_m]
                w.update(dict(zip(
                    group_slice.loc[short_m, 'permno'], s / s.sum()
                )))
            return w

        elif method == 'mean_var':
            mu = group_slice[self.ou_col].values
            sigma = group_slice[self.garch_col].values
            cov = np.diag(sigma ** 2)

            n = len(mu)
            def objective(weights, lam=0.1):
                return weights @ cov @ weights - lam * (weights @ mu)

            constraints = [{'type': 'eq', 'fun': lambda weights: np.sum(weights) - 1}]
            bounds = [(0, 1) for _ in range(n)]

            res = minimize(objective, x0=np.ones(n) / n, bounds=bounds, constraints=constraints)
            if res.success:
                return dict(zip(group_slice['permno'], res.x))
            else:
                return dict(zip(group_slice['permno'], np.ones(n) / n))

        else:
            raise ValueError(f"Unknown weighting method: {method}")

In [7]:
# ────────────────────────────────────────────────────────────────────────────────
def should_invest_and_shares(alloc, price, adv, liquidity_cap=0.1):
    """
    Decide if investment should be made and calculate shares to trade.

    Parameters:
    - alloc: Allocated capital for the stock.
    - price: Current price of the stock.
    - adv: 20-day average daily volume.
    - liquidity_cap: Max percentage of ADV allowed to trade (default: 10%).

    Returns:
    - should_invest (bool): True if the position meets liquidity requirements.
    - shares (int): Number of shares to trade.
    """
    shares_cap = int(adv * liquidity_cap)
    shares = int(alloc / price)
    shares = min(shares, shares_cap)

    return (shares > 0), shares

def exit_signal(z: float, side: int, thr: float, pnl_pct: float,
                hold_days: int, max_hold: int, min_profit: float) -> bool:
    """
    Exit decision rule combining z-score, profit threshold, and max holding period.

    Parameters
    ----------
    z          : Current z–score.
    side       : +1 for long, –1 for short.
    thr        : Z-score threshold for exit signal.
    pnl_pct    : Current % PnL (signed based on position side).
    hold_days  : Number of days the position has been held.
    max_hold   : Maximum holding days allowed before forced exit.
    min_profit : Minimum % profit required to exit on z-signal (default 10%).

    Returns
    -------
    bool : True if the position should be exited.
    """

    # 1. Exit if z-score suggests mean reversion AND profit condition is met
    z_exit = (side == 1 and z < thr) or (side == -1 and z > -thr)
    profit_exit = pnl_pct >= min_profit
    exit_on_signal = z_exit and profit_exit

    if exit_on_signal:
        return True, 'z_signal_profit'
        
    if hold_days >= max_hold:
        return True, 'max_hold'
    
    return False, 'none'  # Keep position open

In [8]:
# Back-test (only 3 args!)
def backtest(df: pd.DataFrame, 
              aum: float = 100_000_000.0, 
              horizon: int = 5, 
              z_threshold: float = 1.5, 
              portfolio_weight_method: str = 'inv_vol', 
              stock_weight_method: str = 'inv_vol',
             max_hold: int = 5, min_profit: float = 0.05):

    df = prepare_trade_data(df, horizon=horizon, z_threshold = z_threshold)
    # df columns cleaned. Example: ['date', 'permno', 'group_id', 'adj_prc', 'z_score_5d', ...]
    print('Data prep complete')
    garch_col, ou_col, z_col = get_forecast_columns(horizon)
    print("column names: ", garch_col, ou_col, z_col)
    # Example: garch_col = 'garch_vol_5d_lag1', ou_col = 'ou_forecast_5d', z_col = 'z_score_5d'

    ff_col = 'fed_funds_rate'

    df = df.sort_values('date').copy()
    # df is now sorted by date. Example: df['date'].head() → [1975-01-02, 1975-01-02, 1975-01-02, ...]

    df['date'] = pd.to_datetime(df['date'])
    # Ensures 'date' is datetime. Example: df['date'].dtype → datetime64[ns]

    fin = Finance(aum, garch_col, ou_col)
    # Example: fin.cash → 100,000,000.0

    log = []

    for date, day in tqdm(df.groupby('date', sort=True), total=df['date'].nunique()):
        # Example: date = Timestamp('1975-01-02')
        #          day = DataFrame with today's trading data

        if date.to_period('Q') != fin.prev_q:
            fin.prev_q = date.to_period('Q')
            fin.port_w = fin.quarter_weights(day, method = portfolio_weight_method, 
                                             short_bias_factor = 1.5)
            # Example: fin.port_w = {'Tech': 0.25, 'Finance': 0.35, 'Energy': 0.40}

        price = day.set_index('permno')['adj_prc'].to_dict()
        # Example: price = {10001: 45.5, 10002: 60.3, 10003: 75.2}

        z = day.set_index('permno')[z_col].to_dict()
        # Example: z = {10001: 1.6, 10002: -2.0, 10003: 0.8}

        ff = day['fed_funds_rate'].mean()
        # Example: ff = 5.5

        # Exit Logic
        for perm_id, position in list(fin.positions.items()):
        
            if perm_id not in price:
                continue
        
            px   = price[perm_id]
            zz   = z.get(perm_id, np.nan)
            ffD  = ff
            side = position['side']
        
            # Daily financing ---------------------------------------------------
            fin_cost, _ = fin.calc_cost(side, position['sh'], px, ffD, 'hold')
            fin.debit(fin_cost)
            position['financing_cost_total'] += fin_cost
            position['cumulative_cost']      += fin_cost
        
            pnl_pct   = (px - position['entry_price']) / (position['entry_price'] + position['cumulative_cost']) * side
            hold_days = (date - position['entry_date']).days
        
            should_exit, exit_reason = exit_signal(zz, side, z_threshold, pnl_pct, hold_days, 
                                                   max_hold, min_profit)
            if not should_exit:
                continue  # Keep position open 
                
            # Transaction cost to close ----------------------------------------
            _, t_cost = fin.calc_cost(side, position['sh'], px, ffD, 'close')
            fin.debit(t_cost)
            position['transaction_cost_close'] = t_cost
            position['cumulative_cost']       += t_cost
        
            # Cash-flow on close -----------------------------------------------
            if side == 1:                                    # LONG
                proceeds  = position['sh'] * px
                fin.credit(proceeds)
                gross_pnl = proceeds - position['invest']
            else:                                            # SHORT
                buyback   = position['sh'] * px
                fin.debit(buyback)
                proceeds  = position['invest']               # cash received at entry
                gross_pnl = proceeds - buyback
        
            # Re-compute AUM **after** the trade --------------------------------
            current_aum = fin.cash + sum(
                open_pos['side'] * open_pos['sh'] *
                price.get(pn, px)            # px fallback if intraday missing
                for pn, open_pos in fin.positions.items()
            )
        
            # Log trade ---------------------------------------------------------
            log.append({
                **position,
                'exit_date'     : date,
                'exit_price'    : px,
                'gross_pnl'     : gross_pnl,
                'net_pnl'       : gross_pnl - position['cumulative_cost'],
                'holding_days'  : hold_days,
                'gross_return'  : pnl_pct,
                'net_return'    : (gross_pnl - position['cumulative_cost']) /
                                  abs(position['invest']),
                'exit_reason'   : exit_reason,
                'status'        : 'closed',
                'aum_after'     : current_aum
            })
        
            del fin.positions[perm_id]

        
        # Entry logic
        for gid, grp in day.groupby('group_id'):
        
            sig = grp[grp['signal'] != 0]
            if sig.empty:
                continue
        
            cap_grp = fin.cash * fin.port_w.get(gid, 0)        # $ allocated to group
            weights = fin.stock_weights(sig, method = stock_weight_method)                   # {permno: w}
        
            for _, row in sig.iterrows():
                perm = row.permno
                if perm in fin.positions or weights.get(perm, 0) == 0:
                    continue
            
                alloc = cap_grp * weights[perm]  # Allocated capital to this stock
                should_invest, sh = should_invest_and_shares(alloc, row.adj_prc, row.adv20)
            
                if not should_invest:
                    continue  # Skip this stock if investment isn't feasible under liquidity constraint
            
                side = int(np.sign(row.signal))
                px = row.adj_prc
        
                # *** entry day: transaction cost only, NO financing yet ***
                fin_cost_open = 0.0
                _, t_cost_open = fin.calc_cost(side, sh, px, ff, 'open')
        
                if side == 1:          # LONG → we pay for the shares up-front
                    cash_need = sh * px + t_cost_open          # shares + open cost
                    if fin.cash < cash_need:
                        continue
                    fin.debit(cash_need)                       # ↓ cash
                    invested_or_proceeds = sh * px             # store purchase cost
                else:                  # SHORT → we *receive* sale proceeds today
                    proceeds = sh * px
                    fin.credit(proceeds)                       # ↑ cash
                    fin.debit(t_cost_open)                     # pay open cost
                    invested_or_proceeds = proceeds            # store sale proceeds
        
                # record position
                fin.positions[perm] = dict(
                    entry_date            = date,
                    group_id              = gid,
                    side                  = side,
                    entry_price           = px,
                    sh                    = sh,
                    invest                = invested_or_proceeds,
                    financing_cost_total  = 0.0,
                    transaction_cost_open = t_cost_open,
                    transaction_cost_close= 0.0,  # fill on exit
                    cumulative_cost       = t_cost_open,  # Starts with open transaction cost
                    status                = 'open'
                )


    # Append still-open positions to log
    for perm, pos in fin.positions.items():
        log.append({**pos, 'exit_date': pd.NaT, 'exit_price': np.nan, 
                    'transaction_cost_close': 0.0, 'status': 'open'})

    trades = pd.DataFrame(log)

    print(f"Final cash: ${fin.cash:,.0f}")
    return trades

In [9]:
def summarize_positions(trades: pd.DataFrame):
    """
    Summarizes the trade book by side (Long/Short) and status (Open/Closed).
    Prints count, total investment, and total PnL.

    Parameters:
    - trades: DataFrame containing trade records with columns:
      ['entry_date', 'group_id', 'side', 'entry_price', 'sh', 'invest', 
       'fin_cost', 'transaction_cost_open', 'status', 'exit_date', 
       'exit_price', 'transaction_cost_close', 'gross_pnl']
    """
    summary = []

    for side_val, side_name in [(-1, 'Short'), (1, 'Long')]:
        for status_val in ['open', 'closed']:
            filtered = trades[(trades['side'] == side_val) & (trades['status'] == status_val)]
            
            count = len(filtered)
            total_investment = filtered['invest'].sum() + filtered['cumulative_cost'].sum()
            total_pnl = filtered['net_pnl'].sum() if 'net_pnl' in filtered.columns else 0.0

            summary.append({
                'Side': side_name,
                'Status': status_val.capitalize(),
                'Count': count,
                'Total Investment': f"${total_investment:,.2f}",
                'Total PnL': f"${total_pnl:,.2f}"
            })

    summary_df = pd.DataFrame(summary)
    display(summary_df)  
    return summary_df

In [10]:
trades_10 = backtest(df, aum=100_000_000_000, horizon=10, z_threshold=1.2, 
                     portfolio_weight_method='rsk_par', stock_weight_method='rsk_par', 
                     max_hold = 25, min_profit = 0.08)

trades_10

Data prep complete
column names:  garch_vol_10d_lag1 ou_forecast_10d z_score_10d


100%|████████████████████████████████████| 12546/12546 [00:36<00:00, 344.05it/s]


Final cash: $96,739,384,289


Unnamed: 0,entry_date,group_id,side,entry_price,sh,invest,financing_cost_total,transaction_cost_open,transaction_cost_close,cumulative_cost,status,exit_date,exit_price,gross_pnl,net_pnl,holding_days,gross_return,net_return,exit_reason,aum_after
0,1975-05-15,1975-Q1-01,1,0.720588,5,3.602940,0.586585,0.5,0.5,1.586585,closed,1975-06-03,0.926470,1.029412,-0.557173,19.0,0.113925,-0.154644,z_signal_profit,1.000000e+11
1,1975-06-26,1975-Q1-01,1,0.926470,23,21.308819,3.548453,2.3,2.3,8.148453,closed,1975-07-10,1.544117,14.205879,6.057426,14.0,0.091167,0.284268,z_signal_profit,1.000000e+11
2,1975-08-07,1975-Q2-07,1,13.500000,10,135.000000,34.919041,1.0,1.0,36.919041,closed,1975-09-02,11.500000,-20.000000,-56.919041,26.0,-0.040470,-0.421623,max_hold,1.000000e+11
3,1975-08-19,1975-Q2-00,-1,3.475961,21,72.995179,22.352532,2.1,2.1,26.552532,closed,1975-09-15,3.318894,3.298407,-23.254125,27.0,0.005624,-0.318571,max_hold,1.000000e+11
4,1975-08-20,1975-Q2-06,-1,13.000000,14,182.000000,57.550324,1.4,1.4,60.350324,closed,1975-09-15,14.416667,-19.833333,-80.183658,26.0,-0.019690,-0.440570,max_hold,1.000000e+11
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
37344,2024-12-24,2024-Q3-01,1,9.240000,156,1441.440000,79.945619,15.6,0.0,95.545619,open,NaT,,,,,,,,
37345,2024-12-26,2024-Q3-08,1,3.580000,5161,18476.380000,477.980713,516.1,0.0,994.080713,open,NaT,,,,,,,,
37346,2024-12-30,2024-Q3-01,1,0.485000,1652,801.220000,6.882956,165.2,0.0,172.082956,open,NaT,,,,,,,,
37347,2024-12-30,2024-Q3-03,1,0.687400,1821,1251.755400,14.352596,182.1,0.0,196.452596,open,NaT,,,,,,,,


In [11]:
summary_table_exit = trades_10.groupby(['exit_reason', 'side']).agg(
    total_count=('side', 'count'),
    total_net_pnl=('net_pnl', 'sum'),
    total_investment=('invest', 'sum'),
    total_cost=('cumulative_cost', 'sum')
).reset_index()

summary_table_exit['total_investment_sum'] = summary_table_exit['total_investment'] + summary_table_exit['total_cost']

# Optional: Replace side values for better readability if side is stored as 1/-1
summary_table_exit['side'] = summary_table_exit['side'].map({1: 'Long', -1: 'Short'})

summary_table_exit

Unnamed: 0,exit_reason,side,total_count,total_net_pnl,total_investment,total_cost,total_investment_sum
0,max_hold,Short,11665,-968245400.0,15834790000.0,1949657000.0,17784450000.0
1,max_hold,Long,22521,-2580758000.0,12157030000.0,1223461000.0,13380490000.0
2,z_signal_profit,Short,1127,41442860.0,211787500.0,1224770.0,213012200.0
3,z_signal_profit,Long,1278,237187100.0,554381100.0,3442728.0,557823900.0


In [12]:
summary = summarize_positions(trades_10)

Unnamed: 0,Side,Status,Count,Total Investment,Total PnL
0,Short,Open,421,"$42,729,031.69",$0.00
1,Short,Closed,12792,"$17,997,461,337.24","$-926,802,568.37"
2,Long,Open,337,"$28,760,508.66",$0.00
3,Long,Closed,23799,"$13,938,316,408.40","$-2,343,570,681.06"


In [13]:
# Add Year Column
trades_10['year'] = trades_10['entry_date'].dt.year

# summary calculation function
def calculate_summary(df_year):
    # Filter out NaT and ensure dates belong to the current year
    entry_dates = df_year['entry_date'].dropna().dt.normalize()
    exit_dates = df_year['exit_date'].dropna().dt.normalize()

    # Keep only dates within the year being processed
    year_value = df_year['year'].iloc[0]
    entry_dates = entry_dates[entry_dates.dt.year == year_value]
    exit_dates = exit_dates[exit_dates.dt.year == year_value]

    # Calculate unique trading days correctly
    trading_days = pd.concat([entry_dates, exit_dates]).drop_duplicates().nunique()

    
    gross_return_mean = df_year['gross_return'].mean()
    gross_return_std = df_year['gross_return'].std(ddof=0)

    sharpe_ratio = (gross_return_mean / gross_return_std) * np.sqrt(trading_days) if gross_return_std != 0 else np.nan

    return pd.Series({
        'num_long': (df_year['side'] == 1).sum(),
        'num_short': (df_year['side'] == -1).sum(),
        'total_trades': len(df_year),
        'total_inv': df_year['invest'].sum(),
        'financing_cost': df_year['financing_cost_total'].sum(),
        'transaction_entry_cost': df_year['transaction_cost_open'].sum(),
        'transaction_exit_cost': df_year['transaction_cost_close'].sum(),
        'cumulative_cost': df_year[['transaction_cost_open', 'transaction_cost_close', 'financing_cost_total']].sum(axis=1).sum(),
        'total_income': df_year['exit_price'].multiply(df_year['sh'], fill_value=0).sum(),
        'trading_days': trading_days,
        'sharpe_ratio': sharpe_ratio,
        'total_closed': (df_year['status'] == 'closed').sum(),
        'total_open': (df_year['status'] == 'open').sum(),
        'net_pnl': (df_year['net_pnl'].sum())
    })

# Group by Year and Apply Summary Function
summary_table = trades_10.groupby('year').apply(calculate_summary).reset_index()

# Round Sharpe Ratio for readability
summary_table['sharpe_ratio'] = summary_table['sharpe_ratio'].round(2)

# Final Summary Table
summary_table

  summary_table = trades_10.groupby('year').apply(calculate_summary).reset_index()


Unnamed: 0,year,num_long,num_short,total_trades,total_inv,financing_cost,transaction_entry_cost,transaction_exit_cost,cumulative_cost,total_income,trading_days,sharpe_ratio,total_closed,total_open,net_pnl
0,1975,4.0,4.0,8.0,771.5271,200.2916,16.6,16.6,233.4916,758.333,14.0,1.55,8.0,0.0,-232.2451
1,1976,4.0,15.0,19.0,4146.74,916.5508,45.3,42.9,1004.751,3637.332,28.0,0.97,17.0,2.0,-972.8237
2,1977,5.0,7.0,12.0,78400.81,20958.09,23.5,23.5,21005.09,74878.28,19.0,-1.82,12.0,0.0,-17675.59
3,1978,13.0,10.0,23.0,2692.303,1115.581,51.3,51.3,1218.181,2500.711,33.0,1.66,23.0,0.0,-1175.8
4,1979,15.0,3.0,18.0,1414.144,895.1936,24.4,24.4,943.9936,1495.28,31.0,1.08,18.0,0.0,-857.4994
5,1980,21.0,17.0,38.0,23151.16,15361.27,139.9,139.9,15641.07,23448.48,57.0,1.33,38.0,0.0,-15531.33
6,1981,29.0,16.0,45.0,18442.4,13112.28,126.7,126.7,13365.68,18056.91,69.0,2.37,45.0,0.0,-13036.31
7,1982,31.0,16.0,47.0,35356.03,15060.59,249.3,249.3,15559.19,28524.65,73.0,1.44,47.0,0.0,-7889.071
8,1983,114.0,71.0,185.0,147216.5,63763.61,897.9,894.7,65556.21,146760.9,158.0,1.0,184.0,1.0,-66022.34
9,1984,126.0,100.0,226.0,1740858.0,747050.6,1164.0,1154.4,749369.0,1493231.0,195.0,2.44,220.0,6.0,-515328.0
