#### This backtest is simple. Provides $1 million for every trade and doesn't have any constrant. 

#### This is being worked on to test with proper fix AUM and trading constraint. 

#### The trend following backtest in the repo is latest with AUM, trading constraints and transactions costs.

In [1]:
import pandas as pd
import numpy as np
from scipy.optimize import minimize
from tqdm import tqdm
import seaborn as sns
import datetime

import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
df = pd.read_csv('final_dataset_backtest.csv')
df

Unnamed: 0,date,permno,ticker,prc,retx,shrout,cfacpr,vol,vwretd,vwretx,...,ou_forecast_20d,z_score_20d,fed_funds_rate,actual_vol_1d_lag1,actual_vol_5d,actual_vol_5d_lag1,actual_vol_10d,actual_vol_10d_lag1,actual_vol_20d,actual_vol_20d_lag1
0,1986-04-01,10008,GACO,-18.50,0.042254,2945.0,1.0,47.95,-0.011717,-0.011773,...,,0.000000,7.49,,,,,,,
1,1986-04-02,10008,GACO,-18.00,-0.027027,2945.0,1.0,231.00,0.001289,0.001186,...,-0.002341,0.000000,7.45,,,,,,,
2,1986-04-03,10008,GACO,-18.25,0.013889,2945.0,1.0,3.50,-0.009560,-0.009571,...,-0.002341,0.000000,7.44,,,,,,,
3,1986-04-04,10008,GACO,-18.25,0.000000,2945.0,1.0,39.50,-0.013887,-0.014239,...,-0.002341,0.000000,6.97,,,,,,,
4,1986-04-07,10008,GACO,-18.25,0.000000,2945.0,1.0,68.00,-0.002678,-0.002752,...,-0.002341,0.000000,7.09,,0.056338,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14686923,2024-12-24,93436,TSLA,462.28,0.073572,3210060.0,1.0,593515.06,0.010566,0.010521,...,0.001722,-0.114998,4.33,,0.131718,0.106439,0.157805,0.145334,0.170305,0.167895
14686924,2024-12-26,93436,TSLA,454.13,-0.017630,3210060.0,1.0,763922.73,0.000346,0.000282,...,0.001722,-0.317544,4.33,,0.095352,0.131718,0.152724,0.157805,0.172878,0.170305
14686925,2024-12-27,93436,TSLA,431.66,-0.049479,3210060.0,1.0,823703.45,-0.010692,-0.010775,...,0.001722,0.117134,4.33,,0.111126,0.095352,0.161946,0.152724,0.182332,0.172878
14686926,2024-12-30,93436,TSLA,417.41,-0.033012,3210060.0,1.0,647054.52,-0.009878,-0.009900,...,0.001722,0.186010,4.33,,0.110521,0.111126,0.159511,0.161946,0.186401,0.182332


In [3]:
def prepare_trade_data_bef(df, z_score_col='z_score_5d'):
    """
    Prepares and cleans the DataFrame for backtesting by removing rows with NaNs 
    in critical trading columns.

    Parameters:
    - df: Raw DataFrame.
    - z_score_col: The Z-Score column to use for trading signals.

    Returns:
    - Cleaned DataFrame ready for backtest.
    """    
    required_cols = ['date', 'permno', 'group_id', 'adj_prc', z_score_col]

    # Validate required columns exist
    missing_cols = [col for col in required_cols if col not in df.columns]
    if missing_cols:
        raise ValueError(f"Missing required columns: {missing_cols}")

    # Remove rows with NaNs in required columns
    df_clean = df.dropna(subset=required_cols).copy()

    # Ensure 'date' column is datetime type
    if not pd.api.types.is_datetime64_any_dtype(df_clean['date']):
        df_clean['date'] = pd.to_datetime(df_clean['date'])

    return df_clean

In [4]:
def get_daily_groups(df, z_score_col):
    required_cols = ['permno', 'group_id', 'adj_prc', 'date', z_score_col]
    if z_score_col not in df.columns:
        raise ValueError(f"Column '{z_score_col}' not found in DataFrame.")
    df = df.dropna(subset=required_cols)
    return dict(tuple(df.groupby('date')))

def trade_signal(z_score, threshold=1.5):
    if z_score <= -threshold:
        return 'Long'
    elif z_score >= threshold:
        return 'Short'
    return None

def exit_signal(z_score, side):
    return (side == 'Long' and z_score >= 0) or (side == 'Short' and z_score <= 0)

def backtest(df, z_score_col='z_score_5d', investment_per_trade=1_000_000, z_threshold=1.5):
    df = df.sort_values('date')
    daily_groups = get_daily_groups(df, z_score_col)
    unique_dates = sorted(daily_groups.keys())

    open_positions = {}
    trade_log = []

    for date in tqdm(unique_dates):
        day_data = daily_groups[date].set_index('permno')

        # Exit Logic
        exiting_permnso = [permno for permno in open_positions if permno in day_data.index]
        for permno in exiting_permnso:
            row = day_data.loc[permno]
            z = row[z_score_col]
            pos = open_positions[permno]

            if exit_signal(z, pos['side']):
                exit_price = row.adj_prc
                shares = pos['shares']
                investment = pos['investment']
                total_return = shares * exit_price - investment if pos['side'] == 'Long' else investment - shares * exit_price

                trade_log.append({
                    'entry_date': pos['entry_date'], 'exit_date': date,
                    'permno': permno, 'group_id': pos['group_id'], 'side': pos['side'],
                    'entry_price': pos['entry_price'], 'exit_price': exit_price,
                    'shares': shares, 'investment': investment,
                    'total_return': total_return
                })
                del open_positions[permno]

        # Entry Logic
        for permno, row in day_data.iterrows():
            if permno in open_positions:
                continue
            z = row[z_score_col]
            signal = trade_signal(z, z_threshold)
            if not signal:
                continue
            price = row.adj_prc
            shares = int(investment_per_trade / price)
            if shares <= 0:
                continue

            open_positions[permno] = {
                'entry_date': date, 'group_id': row.group_id,
                'entry_price': price, 'shares': shares,
                'investment': shares * price, 'side': signal
            }

    return pd.DataFrame(trade_log)

In [5]:
z_sc = 'z_score_5d'

# Prepare Data Once Before Backtesting
df_prepared = prepare_trade_data_bef(df, z_score_col=z_sc)

# Run Backtest
trade_results = backtest(df_prepared, z_score_col=z_sc, 
                                  z_threshold = 1.5)

# Show Trade Log
trade_results

100%|█████████████████████████████████████| 12547/12547 [02:23<00:00, 87.30it/s]


Unnamed: 0,entry_date,exit_date,permno,group_id,side,entry_price,exit_price,shares,investment,total_return
0,1975-04-11,1975-04-14,43721,1975-Q1-10,Short,11.093750,11.218750,90140,999990.62500,-11267.500000
1,1975-04-14,1975-04-15,41486,1975-Q1-05,Long,3.875000,3.750000,258064,999998.00000,-32258.000000
2,1975-04-18,1975-04-21,53612,1975-Q1-07,Long,9.093750,9.062500,109965,999994.21875,-3436.406250
3,1975-04-25,1975-04-28,53575,1975-Q1-01,Short,3.937500,3.906250,253968,999999.00000,7936.500000
4,1975-04-30,1975-05-01,46674,1975-Q1-01,Long,4.938272,5.004115,202500,1000000.00000,13333.333333
...,...,...,...,...,...,...,...,...,...,...
101737,2024-12-26,2024-12-27,22659,2024-Q3-08,Long,3.580000,2.820000,279329,999997.82000,-212290.040000
101738,2024-12-26,2024-12-30,17701,2024-Q3-00,Short,56.030000,56.020000,17847,999967.41000,178.470000
101739,2024-12-27,2024-12-30,23415,2024-Q3-09,Long,6.640000,6.270000,150602,999997.28000,-55722.740000
101740,2024-12-27,2024-12-30,22424,2024-Q3-01,Short,34.190000,34.210000,29248,999989.12000,-584.960000
