In [684]:
import pandas as pd
import numpy as np
import yfinance as yf
import seaborn as sns

from sklearn.cluster import DBSCAN
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import plotly.graph_objects as go

import statsmodels.api as sm
from statsmodels.tsa.stattools import coint

import os

I have picked the NIFTY Midcap 100 stocks to find suitable trading pairs from.

Step 1: Load Data

In [685]:
def download_hourly_data(tickers, start, end, interval):
    data = {}
    for ticker in tickers:
        print(f"Downloading data for {ticker}...")
        try:
            stock_data = yf.download(ticker, start=start, end=end, interval=interval)
            if not stock_data.empty:
                data[ticker] = stock_data
        except Exception as e:
            print(f"Error downloading {ticker}: {e}")
    return data

def save_to_csv(data, folder):
    os.makedirs(folder, exist_ok=True)
    for ticker, df in data.items():
        filepath = os.path.join(folder, f"{ticker}.csv")
        df.to_csv(filepath)
        print(f"Saved {ticker} data to {filepath}")

def load_data_from_folder(folder):
    data = {}
    for filename in os.listdir(folder):
        if filename.endswith(".csv"):
            ticker = filename.replace(".NS.csv", "")  # Use the filename (without extension) as the ticker
            filepath = os.path.join(folder, filename)
            df = pd.read_csv(filepath, index_col="Datetime", parse_dates=True)
            data[ticker] = df["Adj Close"]  # Only keep the 'Adj Close' column
    df_combined = pd.DataFrame(data)
    return df_combined.dropna()  # Drop rows with NaN values

# Define folder path and tickers
folder_path = "nifty_midcap_100_hourly_data"
nifty_midcap_100_tickers = [
    "ACC.NS", "ABCAPITAL.NS", "ABFRL.NS", "ALKEM.NS", "APLAPOLLO.NS", "APOLLOTYRE.NS", "ASHOKLEY.NS", 
    "ASTRAL.NS", "AUBANK.NS", "AUROPHARMA.NS", "BALKRISIND.NS", "BANDHANBNK.NS", "BANKINDIA.NS", "BANKBARODA.NS",  
    "BDL.NS", "BHARATFORG.NS", "BHARTIHEXACOM.NS", "BIOCON.NS", "CGPOWER.NS", "COCHINSHIP.NS", "COFORGE.NS", 
    "COLPAL.NS", "CONCOR.NS", "CUMMINSIND.NS", "DELHIVERY.NS", "DIXON.NS", "ESCORTS.NS", "EXIDEIND.NS", 
    "FEDERALBNK.NS", "FACT.NS", "NYKAA.NS", "GMRINFRA.NS", "GODREJPROP.NS", "HDFCAMC.NS", "HINDPETRO.NS", 
    "HINDZINC.NS", "HUDCO.NS", "IDBI.NS", "IDFCFIRSTB.NS", "INDIANB.NS", "INDHOTEL.NS", "IOB.NS", "INDRENEW.NS", 
    "IGL.NS", "INDUSTOWER.NS", "IRB.NS", "JSWINFRA.NS", "JUBLFOOD.NS", "KALYAN.NS", "KPITTECH.NS", "L&TFH.NS", 
    "LICHSGFIN.NS", "LUPIN.NS", "M&MFIN.NS", "MRPL.NS", "MANKIND.NS", "MARICO.NS", "MAXFIN.NS", "MAXHEALTH.NS",  
    "MAZDOCK.NS", "MPHASIS.NS", "MRF.NS", "MUTHOOTFIN.NS", "NLCINDIA.NS", "NMDC.NS", "OBEROIRLTY.NS", "OIL.NS", 
    "PAYTM.NS", "OFSS.NS", "PAGEIND.NS", "PATANJALI.NS", "POLICYBZR.NS", "PERSISTENT.NS", "PETRONET.NS", 
    "PHOENIXLTD.NS", "PIIND.NS", "POLYCAB.NS", "POONAWALLA.NS", "PRESTIGE.NS", "RVNL.NS", "SBICARD.NS", "SJVN.NS", 
    "SOLARINDS.NS", "SONACOMS.NS", "SRF.NS", "SAIL.NS", "SUNDARMFIN.NS", "SUPREMEIND.NS", "SUZLON.NS", "TATACHEM.NS", 
    "TATACOMM.NS", "TATAELXSI.NS", "TATATECH.NS", "TORNTPOWER.NS", "TUBEINVEST.NS", "UPL.NS", "IDEA.NS", "VOLTAS.NS",  
    "YESBANK.NS"
]
start_date = "2024-01-01"
end_date = "2024-12-31"
interval = "1h"

# Check if folder exists or download data
if not os.path.exists(folder_path):
    print(f"Folder {folder_path} not found. Downloading data...")
    price_data = download_hourly_data(nifty_midcap_100_tickers, start_date, end_date, interval)
    save_to_csv(price_data, folder_path)
else:
    print(f"Folder {folder_path} found. Loading data from folder...")
    price_data = load_data_from_folder(folder_path)

# Split data into training and testing sets
train_data = price_data[:"2024-06-30"]

Folder nifty_midcap_100_hourly_data found. Loading data from folder...


Step 2: Clustering closely related stocks through DBSCAN

In [686]:
# Compute Features for Clustering
def compute_features(price_data):
    returns = price_data.pct_change().dropna()
    volatility = returns.std()
    mean_returns = returns.mean()

    features = pd.DataFrame({
        "Mean Return": mean_returns,
        "Volatility": volatility
    })

    return features.dropna()

# Perform DBSCAN clustering
def dbscan_clustering(features, eps=0.05, min_samples=3):
    # Standardize features
    scaler = StandardScaler()
    scaled_features = scaler.fit_transform(features)

    # Apply DBSCAN
    dbscan = DBSCAN(eps=eps, min_samples=min_samples)
    clusters = dbscan.fit_predict(scaled_features)

    # Add cluster labels to the DataFrame
    features['Cluster'] = clusters

    return features, clusters

def visualize_clusters(features, clusters):
    features['Cluster'] = clusters

    # Create a scatter plot
    fig = go.Figure()

    for cluster_label in sorted(features['Cluster'].unique()):
        cluster_data = features[features['Cluster'] == cluster_label]
        fig.add_trace(go.Scatter(
            x=cluster_data['Mean Return'],
            y=cluster_data['Volatility'],
            mode='markers',
            marker=dict(size=10, opacity=0.7),
            name=f"Cluster {cluster_label}" if cluster_label != -1 else "Noise"
        ))

    # Update layout
    fig.update_layout(
        title="DBSCAN Clustering of NIFTY 100 Stocks",
        xaxis_title="Mean Return",
        yaxis_title="Volatility",
        legend=dict(title="Clusters"),
        template="plotly_white",
        height=600,
        width=800
    )

    # Show the plot
    fig.show()

# Compute features from the training data
train_features = compute_features(train_data)
train_features.dropna(inplace=True)

# Perform DBSCAN clustering on training data
clustered_features, cluster_labels = dbscan_clustering(train_features, eps=0.2, min_samples=2)

# Visualize clusters
print("Visualizing clusters...")
visualize_clusters(train_features, cluster_labels)

Visualizing clusters...


Step 3: Identifying Suitable Pairs Within Each Cluster

In [687]:
def plot_cluster_correlation_heatmaps(clustered_features, price_data):
    """
    Plot heatmaps for correlation matrices within each cluster using Plotly, with values displayed.
    
    Args:
        clustered_features (pd.DataFrame): DataFrame with cluster labels for each stock.
        price_data (pd.DataFrame): DataFrame with historical prices for all stocks.
    """
    # Group stocks by clusters
    clusters = clustered_features['Cluster'].unique()
    clusters = [c for c in clusters if c != -1]  # Exclude noise points (-1)

    for cluster_id in clusters:
        # Get stocks in the current cluster
        cluster_stocks = clustered_features[clustered_features['Cluster'] == cluster_id].index
        cluster_data = price_data[cluster_stocks]

        # Calculate the correlation matrix
        correlation_matrix = cluster_data.corr()

        # Create the heatmap
        fig = go.Figure(data=go.Heatmap(
            z=correlation_matrix.values,
            x=correlation_matrix.columns,
            y=correlation_matrix.index,
            colorscale='RdBu',
            zmin=-1,  # Set the minimum value of the colorscale
            zmax=1,  # Set the maximum value of the colorscale
            colorbar=dict(title="Correlation"),
            text=correlation_matrix.round(2).values,  # Add correlation values
            texttemplate="%{text}",  # Display the text values
            textfont=dict(size=10)  # Adjust text size
        ))

        # Update layout
        fig.update_layout(
            title=f"Correlation Heatmap for Cluster {cluster_id}",
            xaxis_title="Stocks",
            yaxis_title="Stocks",
            width=800,
            height=600,
            template="plotly_white"
        )

        # Show the plot
        fig.show()

# Plot heatmaps for each cluster
print("Plotting correlation heatmaps for each cluster...")
plot_cluster_correlation_heatmaps(clustered_features, train_data)

Plotting correlation heatmaps for each cluster...


In [688]:
def test_cointegration_within_clusters(clustered_features, price_data, significance_level=0.05):
    """
    Run cointegration tests for all stock pairs within each cluster.
    
    Args:
        clustered_features (pd.DataFrame): DataFrame with cluster labels for each stock.
        price_data (pd.DataFrame): DataFrame with historical prices for all stocks.
        significance_level (float): P-value threshold for determining cointegration.

    Returns:
        dict: Cointegration results for each cluster.
    """
    clusters = clustered_features['Cluster'].unique()
    clusters = [c for c in clusters if c != -1]  # Exclude noise points (-1)
    cointegration_results = {}

    for cluster_id in clusters:
        # Get stocks in the current cluster
        cluster_stocks = clustered_features[clustered_features['Cluster'] == cluster_id].index
        cluster_data = price_data[cluster_stocks]

        print(f"Running cointegration tests for Cluster {cluster_id}...")
        results = []

        # Test cointegration for all stock pairs
        for i, stock1 in enumerate(cluster_stocks):
            for stock2 in cluster_stocks[i + 1:]:
                series1 = cluster_data[stock1]
                series2 = cluster_data[stock2]
                score, p_value, _ = coint(series1, series2)
                if p_value < significance_level:
                    results.append((stock1, stock2, p_value))

        # Save results for the cluster
        cointegration_results[cluster_id] = results
        # print(f"Cluster {cluster_id} - Cointegrated Pairs: {results}")

    return cointegration_results

# Run cointegration tests within each cluster
print("Running cointegration tests for each cluster...")
cointegration_results = test_cointegration_within_clusters(clustered_features, train_data)

# Display results
for cluster_id, results in cointegration_results.items():
    if len(results) != 0:
        print(f"\nCluster {cluster_id} - Cointegrated Pairs:")
        for stock1, stock2, p_value in results:
            print(f"{stock1} - {stock2}, P-Value: {p_value}")

Running cointegration tests for each cluster...
Running cointegration tests for Cluster 0...
Running cointegration tests for Cluster 1...
Running cointegration tests for Cluster 2...
Running cointegration tests for Cluster 3...
Running cointegration tests for Cluster 4...
Running cointegration tests for Cluster 5...
Running cointegration tests for Cluster 6...
Running cointegration tests for Cluster 7...
Running cointegration tests for Cluster 8...
Running cointegration tests for Cluster 9...
Running cointegration tests for Cluster 10...
Running cointegration tests for Cluster 11...
Running cointegration tests for Cluster 12...
Running cointegration tests for Cluster 13...
Running cointegration tests for Cluster 14...
Running cointegration tests for Cluster 15...
Running cointegration tests for Cluster 16...
Running cointegration tests for Cluster 17...

Cluster 0 - Cointegrated Pairs:
ABCAPITAL - VOLTAS, P-Value: 0.0011543268173838817
BIOCON - SUPREMEIND, P-Value: 0.001045441601435077

Step 4: Picking the best pair to work with

In [689]:
def fetch_data(stock1, stock2):
    """
    Fetch price data for two stocks from the stored folder and create a combined DataFrame.
    
    Args:
        folder_path (str): Path to the folder containing stock CSV files.
        stock1 (str): Ticker for the first stock.
        stock2 (str): Ticker for the second stock.
        
    Returns:
        pd.DataFrame: Combined DataFrame with adjusted close prices for the two stocks.
    """
    # File paths for the stocks
    folder_path = "nifty_midcap_100_hourly_data"
    file1 = os.path.join(folder_path, f"{stock1}.NS.csv")
    file2 = os.path.join(folder_path, f"{stock2}.NS.csv")

    # Load data
    df1 = pd.read_csv(file1, index_col="Datetime", parse_dates=True)
    df2 = pd.read_csv(file2, index_col="Datetime", parse_dates=True)

    # Combine into a single DataFrame
    combined_df = pd.DataFrame({
        stock1: df1["Adj Close"],
        stock2: df2["Adj Close"]
    })

    # Drop rows with NaN values
    combined_df.dropna(inplace=True)

    return combined_df

def plot_normalized_prices(data, stock1, stock2):
    """
    Plot normalized prices of two stocks on the same graph using Plotly.

    Args:
        data (pd.DataFrame): DataFrame containing price data for the two stocks.
        stock1 (str): Ticker for the first stock.
        stock2 (str): Ticker for the second stock.
    """
    # Normalize prices: Divide each series by its first value
    normalized_data = data / data.iloc[0]

    # Create the Plotly figure
    fig = go.Figure()

    # Add Stock1 normalized prices
    fig.add_trace(go.Scatter(
        x=normalized_data.index,
        y=normalized_data[stock1],
        mode='lines',
        name=stock1,
        line=dict(width=2)
    ))

    # Add Stock2 normalized prices
    fig.add_trace(go.Scatter(
        x=normalized_data.index,
        y=normalized_data[stock2],
        mode='lines',
        name=stock2,
        line=dict(width=2)
    ))

    # Update layout
    fig.update_layout(
        title=f"Normalized Prices of {stock1} and {stock2}",
        xaxis_title="Date",
        yaxis_title="Normalized Price",
        legend=dict(font=dict(size=12)),
        template="plotly_white",
        height=600,
        width=1000
    )

    # Show the figure
    fig.show()

pairs = [['ABCAPITAL', 'VOLTAS'], ['BIOCON', 'SUPREMEIND'], ['PETRONET', 'SUNDARMFIN'], ['ACC', 'HDFCAMC'], ['ASTRAL', 'HDFCAMC'], ['HDFCAMC', 'IGL'], ['IGL', 'MUTHOOTFIN'], ['COFORGE', 'MPHASIS'], ['CONCOR', 'NMDC']]

for pair in pairs:
    test_data = fetch_data(pair[0], pair[1])["2024-01-01":"2024-07-01"]
    plot_normalized_prices(test_data, pair[0], pair[1])

Picked PETRONET and SUNDARMFIN after observing all pairs' P-values and mean reverting nature on graph.

Step 5: Backtest the pair-trading strategy

In [690]:

def strategy_backtest(data, stock1, stock2, entry_threshold=2, exit_threshold=0.5, initial_cash=100000, debug=False):
    """
    Perform signal generation, cointegration factor recalculation, and backtesting in one loop for hourly data.

    Args:
        data (pd.DataFrame): DataFrame with hourly price data for both stocks.
        stock1 (str): Ticker for the first stock.
        stock2 (str): Ticker for the second stock.
        entry_threshold (float): Z-score threshold for entering a position.
        exit_threshold (float): Z-score range for exiting a position.
        initial_cash (float): Starting wallet amount in rupees.

    Returns:
        pd.DataFrame: Portfolio value and trades over time.
    """
    def calculate_hedge_ratio(stock1_data, stock2_data):
        """
        Calculate the hedge ratio (beta) using linear regression.
        """
        X = sm.add_constant(stock2_data)
        model = sm.OLS(stock1_data, X).fit()
        hedge_ratio = model.params[1]
        return hedge_ratio

    def hurst_exponent(ts, max_lag=20):
        """
        Calculate the Hurst Exponent for a given time series.
        
        Args:
            ts (pd.Series): Time series data.
            max_lag (int): Maximum lag to consider for the calculation.
        
        Returns:
            float: Hurst Exponent value.
        """
        lags = range(2, max_lag)
        tau = [np.std(ts.diff(lag).dropna()) for lag in lags]
        hurst = np.polyfit(np.log(lags), np.log(tau), 1)[0]
        return hurst

    def rolling_hurst_exponent(ts, window_size=50, max_lag=20):
        """
        Calculate the rolling Hurst Exponent for a given time series.

        Args:
            ts (pd.Series): Time series data.
            window_size (int): Size of the rolling window.
            max_lag (int): Maximum lag to consider for the calculation.

        Returns:
            pd.Series: Rolling Hurst Exponent values.
        """
        hurst_values = []
        for i in range(len(ts)):
            if i < window_size:
                hurst_values.append(np.nan)  # Not enough data for calculation
            else:
                window_ts = ts.iloc[i - window_size:i]
                hurst = hurst_exponent(window_ts, max_lag)
                hurst_values.append(hurst)
        return pd.Series(hurst_values, index=ts.index)
    
    def plot_portfolio_vs_benchmark(results, title="Portfolio vs Benchmark Performance"):
        """
        Create and display a Plotly graph for portfolio value and benchmark returns.

        Args:
            results (pd.DataFrame): DataFrame containing backtesting results, including 'Portfolio Value'.
            benchmark_data (pd.Series): Series containing the benchmark index values.
            title (str): Title of the graph (default: "Portfolio vs Benchmark Performance").
        """

        # Fetch NIFTY Midcap 100 data
        file_path = "nifty_midcap_100_data.csv"
        if not os.path.exists(file_path):
            nifty_midcap_100_data = yf.download("^NSEMDCP50", start="2024-07-01", interval="1h")
            nifty_midcap_100_data["Adj Close"].to_csv(file_path)
        benchmark_data = pd.read_csv(file_path, index_col=0, parse_dates=True)[:"2025-01-01"]["Adj Close"]

        fig = go.Figure()

        # Add portfolio value line
        fig.add_trace(go.Scatter(
            x=results.index,
            y=results['Portfolio Value'],
            mode='lines',
            name='Portfolio Value',
            line=dict(width=2, color='blue')
        ))

        # Normalize benchmark data to start at the same value as the portfolio
        benchmark_normalized = benchmark_data / benchmark_data.iloc[0] * results['Portfolio Value'].iloc[0]

        # Add benchmark line
        fig.add_trace(go.Scatter(
            x=benchmark_data.index,
            y=benchmark_normalized,
            mode='lines',
            name='NIFTY Midcap 100 Benchmark',
            line=dict(width=2, color='black', dash='dash')
        ))

        # Update layout
        fig.update_layout(
            title=title,
            xaxis_title="Date",
            yaxis_title="Value (₹)",
            template="plotly_white",
            legend=dict(font=dict(size=12)),
            height=600,
            width=1000
        )

        # Display the plot
        fig.show()

        # Display final portfolio and benchmark values
        final_portfolio_value = results['Portfolio Value'].iloc[-1]
        final_benchmark_value = benchmark_normalized.iloc[-1]
        print(f"Final Portfolio Value: ₹{final_portfolio_value:,.2f}")
        print(f"Final Benchmark Value: ₹{final_benchmark_value:,.2f}")
    
    # Ensure the index is datetime
    data.index = pd.to_datetime(data.index)

    # Initialize portfolio and signals
    portfolio = pd.DataFrame(index=data.index)
    portfolio['Cash'] = initial_cash
    portfolio['Stock1 Holdings'] = 0
    portfolio['Stock2 Holdings'] = 0
    portfolio['Stock1 Value'] = 0
    portfolio['Stock2 Value'] = 0
    portfolio['Stock1 Short Price'] = 0
    portfolio['Stock2 Short Price'] = 0
    portfolio['Portfolio Value'] = initial_cash
    portfolio['Hedge Ratio'] = np.nan
    portfolio['Spread'] = np.nan
    portfolio['Z-Score'] = np.nan
    portfolio['Position'] = 0

    # Calculate rolling Hurst Exponent for both stocks
    portfolio['Hurst Stock1'] = rolling_hurst_exponent(data[stock1], window_size=50)
    portfolio['Hurst Stock2'] = rolling_hurst_exponent(data[stock2], window_size=50)


    # Add a variable to track the intended position
    intended_position = 0  # Keeps track of the desired position based on Z-score

    # Iterate through the data
    for i in range(1, len(data)):
        current_time = data.index[i]
        stock1_price = data[stock1].iloc[i]
        stock2_price = data[stock2].iloc[i]

        # Use Hurst Exponent to determine mean-reverting state
        hurst_stock1 = portfolio['Hurst Stock1'].iloc[i]
        hurst_stock2 = portfolio['Hurst Stock2'].iloc[i]

        # Check mean-reverting condition
        is_mean_reverting = (hurst_stock1 < 0.5) and (hurst_stock2 < 0.5)

        # Check if the current timestamp is the last of the day
        is_end_of_day = (i == len(data) - 1) or (data.index[i + 1].date() != current_time.date())

        # Recalculate hedge ratio at the first hour on Mondays
        if current_time.weekday() == 0 and current_time.hour == 9:  # Assuming 9 AM is the market opening hour
            # Use all data available until the current time
            available_data = data.loc[:current_time - pd.Timedelta(hours=1)]  # Exclude the current hour
            if len(available_data) > 1:  # Ensure there's enough data to calculate the hedge ratio
                hedge_ratio = calculate_hedge_ratio(available_data[stock1], available_data[stock2])
                portfolio.loc[current_time, 'Hedge Ratio'] = hedge_ratio

        # Use the most recent hedge ratio
        hedge_ratio = portfolio['Hedge Ratio'].ffill().iloc[i]
        spread = stock1_price - abs(hedge_ratio) * stock2_price
        portfolio.loc[current_time, 'Spread'] = spread

        # Ensure rolling mean and std calculations start only after sufficient data
        if i >= 20:  # Rolling window size is 50
            rolling_mean = portfolio['Spread'].iloc[:i].rolling(window=min(i,50)).mean().iloc[-1]
            rolling_std = portfolio['Spread'].iloc[:i].rolling(window=min(i,50)).std().iloc[-1]
            z_score = (spread - rolling_mean) / rolling_std
        else:
            z_score = 0  # Set Z-score to 0 until enough data is available

        portfolio.loc[current_time, 'Z-Score'] = z_score
        

        # Generate signals unless it's end of day
        if is_mean_reverting and not is_end_of_day:
            if z_score > entry_threshold:
                intended_position = -1
            elif z_score < -entry_threshold:
                intended_position = 1
            elif abs(z_score) < exit_threshold:
                intended_position = 0
        else:
            intended_position = 0  # Close all positions at the end of the day

        # Set the actual position to the intended position
        position = intended_position
        portfolio.loc[current_time, 'Position'] = position

        # Execute trades and update portfolio
        prev_row = portfolio.iloc[i - 1]

        if position != prev_row['Position']:
            cash = prev_row['Cash']
            # Handle exiting the previous position
            if prev_row['Position'] == 1:  # Exiting a long Stock1, short Stock2 position
                cash_from_stock1 = prev_row['Stock1 Holdings'] * stock1_price
                cash_from_stock2 = abs(prev_row['Stock2 Holdings']) * stock2_price

                # Add profits/losses from the short Stock2 position
                cash_from_stock2 += 2 * abs(prev_row['Stock2 Holdings']) * (prev_row['Stock2 Short Price'] - stock2_price)

                # Update cash with proceeds from squaring off
                portfolio.loc[current_time, 'Cash'] = prev_row['Cash'] + cash_from_stock1 + cash_from_stock2

            elif prev_row['Position'] == -1:  # Exiting a short Stock1, long Stock2 position
                cash_from_stock1 = abs(prev_row['Stock1 Holdings']) * stock1_price
                cash_from_stock2 = prev_row['Stock2 Holdings'] * stock2_price

                # Add profits/losses from the short Stock1 position
                cash_from_stock1 += 2 * abs(prev_row['Stock1 Holdings']) * (prev_row['Stock1 Short Price'] - stock1_price)

                # Update cash with proceeds from squaring off
                portfolio.loc[current_time, 'Cash'] = prev_row['Cash'] + cash_from_stock1 + cash_from_stock2

            else:  # prev_row['Position'] == 0 (No prior position)
                portfolio.loc[current_time, 'Cash'] = prev_row['Cash']

            # Reset holdings and short prices when exiting a position
            portfolio.loc[current_time, 'Stock1 Holdings'] = 0
            portfolio.loc[current_time, 'Stock2 Holdings'] = 0
            portfolio.loc[current_time, 'Stock1 Short Price'] = 0
            portfolio.loc[current_time, 'Stock2 Short Price'] = 0

            # Handle entering the new position
            if position == 1:  # Entering a long Stock1, short Stock2 position
                num_stock1 = ( 0.999 * portfolio.loc[current_time, 'Cash']) // (stock1_price + abs(hedge_ratio) * stock2_price)
                num_stock2 = -num_stock1 * abs(hedge_ratio)

                # Update holdings
                portfolio.loc[current_time, 'Stock1 Holdings'] = num_stock1
                portfolio.loc[current_time, 'Stock2 Holdings'] = num_stock2

                # Record the short price for Stock2
                portfolio.loc[current_time, 'Stock2 Short Price'] = stock2_price

                # Update cash after entering the position
                portfolio.loc[current_time, 'Cash'] -= 1.001 * ((num_stock1 * stock1_price) - (num_stock2 * stock2_price))

            elif position == -1:  # Entering a short Stock1, long Stock2 position
                num_stock1 = -((0.999 * portfolio.loc[current_time, 'Cash']) // (stock1_price + abs(hedge_ratio) * stock2_price))
                num_stock2 = -num_stock1 * abs(hedge_ratio)

                # Update holdings
                portfolio.loc[current_time, 'Stock1 Holdings'] = num_stock1
                portfolio.loc[current_time, 'Stock2 Holdings'] = num_stock2

                # Record the short price for Stock1
                portfolio.loc[current_time, 'Stock1 Short Price'] = stock1_price

                # Update cash after entering the position
                portfolio.loc[current_time, 'Cash'] -= 1.001 * ((num_stock2 * stock2_price) - (num_stock1 * stock1_price))

        else:
            # If no position change, carry forward previous holdings, cash, and short prices
            portfolio.loc[current_time, 'Stock1 Holdings'] = prev_row['Stock1 Holdings']
            portfolio.loc[current_time, 'Stock2 Holdings'] = prev_row['Stock2 Holdings']
            portfolio.loc[current_time, 'Cash'] = prev_row['Cash']
            portfolio.loc[current_time, 'Stock1 Short Price'] = prev_row['Stock1 Short Price']
            portfolio.loc[current_time, 'Stock2 Short Price'] = prev_row['Stock2 Short Price']

        # Explicitly update stock values
        if portfolio.loc[current_time, 'Stock1 Holdings'] < 0:
            portfolio.loc[current_time, 'Stock1 Value'] = -portfolio.loc[current_time, 'Stock1 Holdings'] * (stock1_price + 2 * (portfolio.loc[current_time, 'Stock1 Short Price'] - stock1_price))
        else:
            portfolio.loc[current_time, 'Stock1 Value'] = portfolio.loc[current_time, 'Stock1 Holdings'] * stock1_price

        if portfolio.loc[current_time, 'Stock2 Holdings'] < 0:
            portfolio.loc[current_time, 'Stock2 Value'] = -portfolio.loc[current_time, 'Stock2 Holdings'] * (stock2_price + 2 * (portfolio.loc[current_time, 'Stock2 Short Price'] - stock2_price))
        else:
            portfolio.loc[current_time, 'Stock2 Value'] = portfolio.loc[current_time, 'Stock2 Holdings'] * stock2_price

        # Update portfolio value
        portfolio.loc[current_time, 'Portfolio Value'] = portfolio.loc[current_time, 'Cash'] + \
                                                        portfolio.loc[current_time, 'Stock1 Value'] + \
                                                        portfolio.loc[current_time, 'Stock2 Value']
        
        # Print debug information for each iteration
        if debug:
            print(f"Date: {current_time}")
            print(f"Position: {portfolio.loc[current_time, 'Position']:.2f}")
            print(f"Stock1 Holdings: {portfolio.loc[current_time, 'Stock1 Holdings']}")
            print(f"Stock2 Holdings: {portfolio.loc[current_time, 'Stock2 Holdings']}")
            print(f"Cash in Hand: {portfolio.loc[current_time, 'Cash']:.2f}")
            print(f"Portfolio Value: {portfolio.loc[current_time, 'Portfolio Value']:.2f}")
            print("-" * 40)

    # Plot portfolio value vs benchmark
    plot_portfolio_vs_benchmark(portfolio)
        
    return portfolio

# Stocks for pairs trading
stock1 = "PETRONET"
stock2 = "SUNDARMFIN"

# Fetch data and create a DataFrame
test_data = fetch_data(stock1, stock2)["2024-07-01":]

# Backtest the strategy
results = strategy_backtest(test_data, stock1, stock2, entry_threshold=2, exit_threshold=0.5)

Final Portfolio Value: ₹110,114.16
Final Benchmark Value: ₹100,844.04
