In [None]:
# ============================================================
# CLEAN RESEARCH CODE
# NIFTY 50 Tracking with SNR + Transaction Costs
# ============================================================

# Requirements:
# pip install pandas numpy statsmodels scipy

import numpy as np
import pandas as pd
import statsmodels.api as sm
from pathlib import Path

# -------------------------------
# CONFIGURATION
# -------------------------------

ESTIMATION_WINDOW = 252        # 1 year
TC_COST = 0.002               # 20 bps round-trip
K_LIST = [10, 15, 20, 25, 30]  # portfolio sizes

# -------------------------------
# LOAD DATA
# -------------------------------

def load_data():
    cons = pd.read_csv("nifty50_cons.csv", parse_dates=["Date"])
    index = pd.read_csv(
        "nifty50_index_prices_2021_2025.csv",
        parse_dates=["date"]
    )

    index["ret"] = np.log(index["close"]).diff()
    index = index.dropna()

    return cons, index


# -------------------------------
# SNR COMPUTATION
# -------------------------------

def compute_snr(stock_returns, index_returns, beta):
    """
    SNR = (beta^2 * var(index)) / var(residual)
    """
    aligned = stock_returns.align(index_returns, join="inner")[0]
    y = aligned.values
    x = index_returns.loc[aligned.index].values

    resid = y - beta * x
    sigma_eps = np.var(resid, ddof=1)
    sigma_mkt = np.var(x, ddof=1)

    return (beta ** 2 * sigma_mkt) / sigma_eps


# -------------------------------
# PORTFOLIO WEIGHTS (PROJECTION)
# -------------------------------

def projection_weights(stock_returns, index_returns):
    """
    Closed-form via OLS:
    r_b = X w + e
    """
    X = stock_returns.values
    y = index_returns.loc[stock_returns.index].values

    model = sm.OLS(y, X).fit()
    w = model.params
    return w / w.sum()


# -------------------------------
# TURNOVER & TC
# -------------------------------

def turnover(prev_w, new_w):
    if prev_w is None:
        return 0.0
    return np.sum(np.abs(new_w - prev_w))


# -------------------------------
# MAIN BACKTEST
# -------------------------------

def run_backtest():
    cons, index = load_data()
    rebalance_dates = sorted(cons["Date"].unique())

    results = []

    prev_weights = {k: None for k in K_LIST}

    for i in range(1, len(rebalance_dates)):
        reb_date = rebalance_dates[i]
        est_end = rebalance_dates[i - 1]

        est_start = est_end - pd.Timedelta(days=400)

        idx_est = index[
            (index["date"] >= est_start) &
            (index["date"] <= est_end)
        ]

        idx_oos = index[
            (index["date"] > est_end) &
            (index["date"] <= reb_date)
        ]

        if len(idx_est) < ESTIMATION_WINDOW:
            continue

        snapshot = cons[cons["Date"] == est_end]

        # Placeholder: user must merge stock return data here
        # stock_returns: DataFrame [date x symbol]
        # ---------------------------------------------------
        # This is intentionally left modular for NSE price input
        # ---------------------------------------------------

        # Compute SNR ranking
        snr_scores = {}
        for _, row in snapshot.iterrows():
            symbol = row["Security Symbol"]
            beta = row["Beta"]

            # --- replace with actual stock returns ---
            stock_ret = idx_est["ret"]  # proxy placeholder
            snr_scores[symbol] = compute_snr(
                stock_ret,
                idx_est["ret"],
                beta
            )

        ranked = sorted(snr_scores, key=snr_scores.get, reverse=True)

        for k in K_LIST:
            selected = ranked[:k]

            # Equal-weight placeholder (projection step modular)
            w = np.ones(k) / k

            to = turnover(prev_weights[k], w)
            tc = TC_COST * to

            ret_p = idx_oos["ret"].mean() - tc
            ret_b = idx_oos["ret"].mean()

            results.append({
                "rebalance_date": reb_date,
                "K": k,
                "tracking_error": np.std(ret_p - ret_b),
                "turnover": to,
                "tc": tc,
                "net_return": ret_p
            })

            prev_weights[k] = w

    return pd.DataFrame(results)


# -------------------------------
# RUN
# -------------------------------

if __name__ == "__main__":
    df_results = run_backtest()
    
    output_dir = Path("/Users/siddharthadatta/Library/CloudStorage/OneDrive-Personal/MFE tasks/Capstone project/Project_code/WQU_Capstone_Project")
    df_results.to_csv(output_dir / "nifty50_tracking_results.csv", index=False)
    print(df_results.head())
