In [33]:
import pandas as pd
import yfinance as yf
import seaborn as sns
from datetime import date, timedelta
import numpy as np
import matplotlib.pyplot as plt
import datetime
import time
import scipy.stats as stats
from statsmodels.tsa.stattools import adfuller
import empyrical as emp
# Removed duplicate datetime import

# Date calculation
d2 = date.today() - timedelta(days=200)
d2 = d2.strftime("%Y-%m-%d")

# Plot settings
plt.rcParams['figure.figsize'] = [20, 10]

# Fix for warnings (if you want to suppress them)
import warnings
warnings.filterwarnings('ignore', category=FutureWarning)

In [5]:
def lines():
    for y, color, linestyle in [(0, 'black', '-'),
                                (1.0, 'green', '-'),
                                (2.5, 'green', '--'),
                                (3, 'magenta', '--'),
                                (-1.0, 'red', '--'),
                                (-2.5, 'red', '--'),
                                (-3, 'magenta', '--')]:
        plt.axhline(y=y, color=color, linestyle=linestyle)

In [6]:
path =r"ind_nifty500list.csv"
tickers=pd.read_csv(path)
tickers['Symbol']=tickers['Symbol']+str('.NS')
download=(tickers['Symbol'].to_list())

In [7]:
df=yf.download(tickers=download,start=d2)['Close']

[***                    7%                       ]  37 of 499 completedHTTP Error 404: 
[*********************100%***********************]  499 of 499 completed

8 Failed downloads:
['TV18BRDCST.NS', 'GLS.NS', 'SUVENPHAR.NS', 'GMRINFRA.NS', 'HBLPOWER.NS', 'CENTURYTEX.NS', 'ZOMATO.NS', 'IDFC.NS']: YFTzMissingError('possibly delisted; no timezone found')


In [8]:
df.fillna(method='bfill',inplace=True)

In [9]:
# Calculate percentage change
df2 = df.pct_change().fillna(method='bfill')
# Calculate correlation matrix
corr = df2.corr()

# Print the columns of the correlation DataFrame
#print("Columns in correlation DataFrame:", corr.columns)
# Solution: Rename the index before resetting
corr.index.name = None  # Temporarily remove the index name
corr_long = corr.stack().reset_index()  # Now stack and reset index
corr_long.columns = ['stock1', 'stock2', 'correlation']  # Rename columns for clarity

# Optionally, drop duplicate pairs (if you want unique pairs)
corr_long = corr_long[corr_long['stock1'] < corr_long['stock2']]

# Display the long format DataFrame
print("\nLong Format Correlation DataFrame:")
print(corr_long)


Long Format Correlation DataFrame:
               stock1         stock2  correlation
1           360ONE.NS     3MINDIA.NS     0.165880
2           360ONE.NS    AARTIIND.NS     0.436480
3           360ONE.NS       AAVAS.NS     0.057410
4           360ONE.NS         ABB.NS     0.392783
5           360ONE.NS  ABBOTINDIA.NS     0.273614
...               ...            ...          ...
238628        ZEEL.NS   ZFCVINDIA.NS     0.172358
238629        ZEEL.NS   ZYDUSLIFE.NS     0.317949
239118  ZENSARTECH.NS   ZFCVINDIA.NS     0.141413
239119  ZENSARTECH.NS   ZYDUSLIFE.NS     0.264682
239609   ZFCVINDIA.NS   ZYDUSLIFE.NS     0.224323

[119805 rows x 3 columns]


In [10]:
##The final dataframe without repetitions
Finale=corr_long.loc[corr_long['correlation']!=1]
Finale=Finale.sort_values(by='correlation', ascending=False)
Finale=Finale.drop_duplicates('correlation')

In [11]:
##Selecting stocks with atleast 0.5 corrcoef value
Finale=Finale.loc[Finale['correlation']>0.5]
Finale

Unnamed: 0,stock1,stock2,correlation
172863,PFC.NS,RECLTD.NS,0.919762
112672,IOB.NS,UCOBANK.NS,0.888001
46289,CENTRALBK.NS,IOB.NS,0.882905
46522,CENTRALBK.NS,UCOBANK.NS,0.870669
114544,IRCON.NS,RAILTEL.NS,0.857993
...,...,...,...
84004,GMDCLTD.NS,IDFCFIRSTB.NS,0.500071
6931,ADANIPORTS.NS,BIOCON.NS,0.500067
129191,KEC.NS,NAM-INDIA.NS,0.500045
130190,KFINTECH.NS,OBEROIRLTY.NS,0.500022


In [12]:
# Function to get stock data for a given stock from the DataFrame
def get_stock_data(stock_name, df):
    return np.array(df[stock_name])

# Function to calculate correlation between two stocks
def calculate_correlation(stock1_name, stock2_name, df2):
    return df2[stock1_name].corr(df2[stock2_name])

# Function to retrieve industry names for two stocks
def get_industry_names(stock1_name, stock2_name, tickers):
    industry1_row = tickers.loc[tickers['Symbol'] == stock1_name]
    industry2_row = tickers.loc[tickers['Symbol'] == stock2_name]

    if industry1_row.empty or industry2_row.empty:
        raise ValueError(f"Industry info missing for {stock1_name} or {stock2_name}")

    industry1 = industry1_row.iloc[0]['Industry']
    industry2 = industry2_row.iloc[0]['Industry']
    return industry1, industry2

# Function to calculate residuals between two stocks
def calculate_residuals(stock1, stock2):
    return np.divide(stock1, stock2, out=np.zeros_like(stock1), where=stock2 != 0)

# Function to determine Target Ratio and Stop Loss Ratio
def calculate_ratios(residuals, stock1, stock2):
    current_ratio = stock1[-1] / stock2[-1] if stock2[-1] != 0 else 0
    mean_ratio = np.mean(residuals)
    std_ratio = np.std(residuals)

    if residuals[-1] > mean_ratio:
        target_ratio = mean_ratio + std_ratio
        sl_ratio = mean_ratio + 3 * std_ratio
    else:
        target_ratio = mean_ratio - std_ratio
        sl_ratio = mean_ratio - 3 * std_ratio

    return target_ratio, sl_ratio, current_ratio

# Function to perform ADF test on the residuals
def perform_adf_test(residuals, debug=False):
    if np.any(np.isnan(residuals)):
        raise ValueError("NaNs found in residuals before ADF test.")
        
    residuals_zscore = stats.zscore(residuals)
    result = adfuller(residuals_zscore)

    if debug:
        print('ADF Statistic:', result[0])
        print('p-value:', result[1])
        print('Critical Values:')
        for key, value in result[4].items():
            print(f'\t{key}: {value:.3f}')

    adf_passed = result[1] < 0.05
    return adf_passed, result[1]

# Main function to run pair trading logic for a stock pair
def PairTrade(i, Finale, df, df2, tickers, debug=False):
    stock1_name = Finale['stock1'].iloc[i]
    stock2_name = Finale['stock2'].iloc[i]

    stock1 = get_stock_data(stock1_name, df)
    stock2 = get_stock_data(stock2_name, df)

    correlation = calculate_correlation(stock1_name, stock2_name, df2)
    industry1, industry2 = get_industry_names(stock1_name, stock2_name, tickers)

    residuals = calculate_residuals(stock1, stock2)
    target_ratio, sl_ratio, current_ratio = calculate_ratios(residuals, stock1, stock2)

    adf_passed, p_value = perform_adf_test(residuals, debug)

    if adf_passed:
        if debug:
            print("ADF test passed")
            print("Target Distance from Current:", abs((target_ratio - current_ratio) / current_ratio))
    else:
        if debug:
            print("ADF test failed")

    return (
        stock1_name,
        stock2_name,
        p_value,
        residuals[-1],
        correlation,
        industry1,
        industry2,
        current_ratio,
        sl_ratio,
        target_ratio
    )

# Function to run pair trading for all pairs
def run_pair_trading(Finale, df, df2, tickers, debug=False):
    Trades = []
    for i in range(len(Finale)):
        try:
            trade_result = PairTrade(i, Finale, df, df2, tickers, debug)
            Trades.append(trade_result)
        except Exception as e:
            if debug:
                print(f"Error in pair {i}: {e}")
    return Trades

In [13]:
DailyTrades=pd.DataFrame(Trades,columns=['stock1','stock2','pvalue','zscore','correlation','Sector1','Sector2','Current Ratio','SL Ratio','Target Ratio'])
##Here we only select the pairs that have a pvalue of 0.05 or lesser 
DailyTrades=DailyTrades.loc[DailyTrades['pvalue']<=0.05]

NameError: name 'Trades' is not defined

In [None]:
DailyTrades

In [None]:
TheFinalTrades = DailyTrades.loc[
    (abs(DailyTrades['zscore']) > 4) &
    (abs(DailyTrades['pvalue']) < 0.05) &
    (DailyTrades['correlation'] > 0.7)
]

In [None]:
TheFinalTrades

In [None]:
def build_simple_portfolio(df):
    df = df.copy()

    df["Expected Return"] = abs(df["Target Ratio"] - df["Current Ratio"]) / df["Current Ratio"]
    df["Risk"] = abs(df["SL Ratio"] - df["Current Ratio"]) / df["Current Ratio"]
    df["Sharpe Score"] = df["Expected Return"] / df["Risk"].replace(0, np.nan)

    # 👇 Relaxed correlation threshold
    df = df[
        (df["correlation"] > 0.70) &
        (df["pvalue"] < 0.05) &
        (df["zscore"].abs() > 2)
    ]

    df = df.dropna(subset=["Sharpe Score"])
    df = df.sort_values(by="Sharpe Score", ascending=False).head(3).reset_index(drop=True)

    return df

In [None]:
def simulate_portfolio_performance(portfolio, capital_per_trade=1000000):
    results = []
    
    for _, row in portfolio.iterrows():
        entry = row["Current Ratio"]
        target = row["Target Ratio"]
        stop = row["SL Ratio"]
        z = row["zscore"]
        
        # Determine direction
        if z > 0:
            direction = "Short Ratio"  # Expect ratio to go down
            take_profit = entry - abs(entry - target)
            stop_loss = entry + abs(entry - stop)
        else:
            direction = "Long Ratio"  # Expect ratio to go up
            take_profit = entry + abs(entry - target)
            stop_loss = entry - abs(entry - stop)
        
        # Simulate outcome (hit target or SL)
        # For now, assume all trades hit target
        final_ratio = target  # or set to stop to simulate loss
        hit_target = True if final_ratio == target else False

        # Calculate return %
        pnl_percent = (final_ratio - entry) / entry
        if direction == "Short Ratio":
            pnl_percent *= -1

        profit = pnl_percent * capital_per_trade

        results.append({
            "stock1": row["stock1"],
            "stock2": row["stock2"],
            "direction": direction,
            "entry": entry,
            "target": target,
            "stop": stop,
            "hit_target": hit_target,
            "return_%": pnl_percent * 100,
            "profit": profit
        })
    
    result_df = pd.DataFrame(results)
    total_return = result_df["profit"].sum()
    avg_return = result_df["return_%"].mean()
    win_rate = result_df["hit_target"].mean() * 100

    print("📈 Portfolio Performance Summary")
    print("--------------------------------")
    print(f"Total PnL: ${total_return:.2f}")
    print(f"Average Return: {avg_return:.2f}%")
    print(f"Success Rate: {win_rate:.2f}%")
    
    return result_df
FinalPortfolio = build_simple_portfolio(DailyTrades)
PerformanceReport = simulate_portfolio_performance(FinalPortfolio, capital_per_trade=1000000)
print(PerformanceReport)

In [36]:
def simulate_portfolio_performance_fixed(
    portfolio, initial_capital=30_000_000, position_size_pct=0.33, 
    win_rate=0.5, seed=42, holding_days=30
):
    """Enhanced simulation with proper sequential trading and drawdown calculation."""
    np.random.seed(seed)
    
    # Initialize portfolio tracking
    total_days = len(portfolio) * holding_days
    dates = pd.date_range(start=pd.Timestamp.today(), periods=total_days, freq='D')
    portfolio_values = [initial_capital]
    current_capital = initial_capital
    trade_logs = []
    
    # Process each trade sequentially
    for i, row in portfolio.iterrows():
        entry, target, stop = row["Current Ratio"], row["Target Ratio"], row["SL Ratio"]
        direction = "Short Ratio" if row["zscore"] > 0 else "Long Ratio"
        position_value = current_capital * position_size_pct
        
        # Simulate trade outcome
        hit_target = np.random.rand() < win_rate
        final_ratio = target if hit_target else stop
        pnl_percent = (final_ratio - entry) / entry
        if direction == "Short Ratio": pnl_percent *= -1
        
        profit = pnl_percent * position_value
        current_capital += profit
        
        # Log trade details
        trade_logs.append({
            "Trade": i + 1, "Pair": f"{row['stock1'][:6]}-{row['stock2'][:6]}",
            "Direction": direction, "Hit Target": hit_target,
            "Return (%)": pnl_percent * 100, "Profit (₹L)": profit / 100_000,
            "Portfolio Value (₹Cr)": current_capital / 10_000_000
        })
        
        # Create daily portfolio evolution
        daily_change = profit / holding_days
        for day in range(holding_days):
            if len(portfolio_values) < len(dates):
                portfolio_values.append(portfolio_values[-1] + daily_change)
    
    # Ensure correct length
    portfolio_values = portfolio_values[:len(dates)]
    portfolio_ts = pd.Series(portfolio_values, index=dates)
    daily_returns = portfolio_ts.pct_change().dropna()
    
    # Calculate performance metrics
    total_return = (portfolio_ts.iloc[-1] / portfolio_ts.iloc[0] - 1) * 100
    metrics = {
        "Total Return (%)": total_return,
        "Annualized Return (%)": emp.annual_return(daily_returns) * 100,
        "Volatility (%)": emp.annual_volatility(daily_returns) * 100,
        "Sharpe Ratio": emp.sharpe_ratio(daily_returns),
        "Max Drawdown (%)": emp.max_drawdown(daily_returns) * 100,
        "Win Rate (%)": sum([t["Hit Target"] for t in trade_logs]) / len(trade_logs) * 100,
        "Final Value (₹Cr)": portfolio_ts.iloc[-1] / 10_000_000
    }
    
    return pd.DataFrame(trade_logs), pd.Series(metrics), portfolio_ts

# Run simulation
trade_log, performance, portfolio_series = simulate_portfolio_performance_fixed(FinalPortfolio)

print("📈 Portfolio Performance Summary")
print("=" * 35)
for metric, value in performance.items():
    print(f"{metric:20}: {value:8.2f}")

print(f"\n📊 Trade Details:")
print(trade_log)

📈 Portfolio Performance Summary
Total Return (%)    :     3.44
Annualized Return (%):    10.04
Volatility (%)      :     1.01
Sharpe Ratio        :     9.52
Max Drawdown (%)    :    -1.40
Win Rate (%)        :    33.33
Final Value (₹Cr)   :     3.10

📊 Trade Details:
   Trade           Pair    Direction  Hit Target  Return (%)  Profit (₹L)  \
0      1  LXCHEM-RENUKA  Short Ratio        True    5.279981     5.227181   
1      2  ADANIE-ADANIP  Short Ratio       False    9.434511     9.502908   
2      3  LICHSG-SJVN.N  Short Ratio       False   -4.397599    -4.567388   

   Portfolio Value (₹Cr)  
0               3.052272  
1               3.147301  
2               3.101627  
