In [2]:
# ============================================================
# BEST INDEX TO INVEST (Final Error-Proof Version + Case A/B)
# ============================================================

import warnings
from datetime import datetime, timedelta
import numpy as np
import pandas as pd
import yfinance as yf

warnings.filterwarnings("ignore")

# ============================================================
# DATE RANGE & RISK-FREE RATE
# ============================================================
START_DATE = (datetime.today() - timedelta(days=365 * 10)).strftime("%Y-%m-%d")
END_DATE = datetime.today().strftime("%Y-%m-%d")
RISK_FREE_RATE = 0.05

# ============================================================
# ETF SYMBOLS (Reliable + Working)
# ============================================================
ETF_INDEX_SYMBOLS = {
    "NIFTY 50": "NIFTYBEES.NS",
    "NIFTY NEXT 50": "JUNIORBEES.NS",
    "NIFTY MIDCAP 100": "MIDCAPETF.NS",
    "NIFTY SMALLCAP 100": "SMALLCAP.NS"
}

# ============================================================
# SAFE PRICE EXTRACTOR
# ============================================================
def extract_price(df):
    if isinstance(df, pd.Series):
        return df.astype(float)

    if isinstance(df.columns, pd.MultiIndex):
        for col in ["Adj Close", "Close"]:
            try:
                ser = df.xs(col, level=1, axis=1).iloc[:, 0]
                return ser.astype(float)
            except:
                pass

    for col in ["Adj Close", "Close"]:
        if col in df.columns:
            return df[col].astype(float)

    num = df.select_dtypes(include="number")
    if not num.empty:
        return num.iloc[:, 0].astype(float)

    return pd.Series(dtype=float)

# ============================================================
# DOWNLOAD ETF
# ============================================================
def download_etf(symbol):
    df = yf.download(symbol, start=START_DATE, end=END_DATE, progress=False)
    if df.empty:
        return pd.Series(dtype=float)
    return extract_price(df).dropna().ffill().bfill().astype(float)

# ============================================================
# METRICS
# ============================================================
def compute_cagr(series):
    s = series.dropna()
    years = (s.index[-1] - s.index[0]).days / 365.25
    return float((s.iloc[-1] / s.iloc[0]) ** (1 / years) - 1)

def downside_std(returns):
    neg = returns[returns < 0]
    return float(neg.std()) if not neg.empty else 0.0

def rolling_recovery_days(series):
    series = series.dropna().astype(float)
    values = series.values
    dates = series.index

    peak = float(values[0])
    last_peak = dates[0]
    max_days = 0

    for i, val in enumerate(values):
        dt = dates[i]
        if val >= peak:
            peak = val
            last_peak = dt
        else:
            days = (dt - last_peak).days
            max_days = max(max_days, days)

    return float(max_days)

def calculate_metrics(price):
    ret = price.pct_change().dropna()

    ann_ret = float(ret.mean() * 252)
    ann_vol = float(ret.std() * np.sqrt(252))

    sharpe = (ann_ret - RISK_FREE_RATE) / ann_vol if ann_vol else np.nan
    ddown = downside_std(ret) * np.sqrt(252)
    sortino = (ann_ret - RISK_FREE_RATE) / ddown if ddown else np.nan

    rollmax = price.cummax()
    max_dd = float((price / rollmax - 1).min())

    calmar = ann_ret / abs(max_dd) if max_dd < 0 else np.nan

    return {
        "CAGR_10Y": compute_cagr(price),
        "Sharpe": sharpe,
        "Sortino": sortino,
        "Calmar": calmar,
        "Volatility": ann_vol,
        "Max_Drawdown": max_dd,
        "Recovery_Days": rolling_recovery_days(price)
    }

# ============================================================
# SCORING
# ============================================================
def normalize(v, cap): return max(0, min(v / cap, 1)) if pd.notna(v) else 0
def inverse(v, cap): return max(0, min(1 - v / cap, 1)) if pd.notna(v) else 0
def dd_norm(v): return max(0, min(1 - abs(v) / 0.5, 1))

def score(m):
    w = {"CAGR_10Y":25, "Sharpe":15, "Sortino":10, "Calmar":10,
         "Volatility":10, "Max_Drawdown":10, "Recovery_Days":5}

    s = 0
    s += normalize(m["CAGR_10Y"], 0.20) * w["CAGR_10Y"]
    s += normalize(m["Sharpe"], 1.5) * w["Sharpe"]
    s += normalize(m["Sortino"], 2.0) * w["Sortino"]
    s += normalize(m["Calmar"], 1.5) * w["Calmar"]
    s += inverse(m["Volatility"], 0.30) * w["Volatility"]
    s += dd_norm(m["Max_Drawdown"]) * w["Max_Drawdown"]
    s += inverse(m["Recovery_Days"], 600) * w["Recovery_Days"]

    return round((s / sum(w.values())) * 100, 2)

# ============================================================
# MAIN EXECUTION
# ============================================================
print("\nüìà ANALYZING ALL NSE INDICES USING ETF DATA...\n")

results = []

for index, symbol in ETF_INDEX_SYMBOLS.items():
    print(f"‚û° {index} ‚Üí ({symbol})")

    price = download_etf(symbol)
    if price.empty:
        print("   ‚ö† No valid data. Skipping.\n")
        continue

    m = calculate_metrics(price)
    m["Index"] = index
    m["Score"] = score(m)

    results.append(m)
    print("   ‚úî Metrics calculated.\n")

df = pd.DataFrame(results).sort_values("Score", ascending=False)

# ============================================================
# PRINT RANKING
# ============================================================
print("\nüèÜ BEST INDEX TO INVEST (10-Year Ranking):\n")
for _, row in df.iterrows():
    print(f"{row['Index']} ‚Üí Score: {row['Score']} | CAGR: {row['CAGR_10Y']*100:.2f}% | Sharpe: {row['Sharpe']:.2f}")

best_index = df.iloc[0]

print("\nüî• BEST INDEX:", best_index["Index"])
print(f"‚≠ê Score: {best_index['Score']} | CAGR: {best_index['CAGR_10Y']*100:.2f}%")

# ============================================================
# CASE A / CASE B LOGIC
# ============================================================
user_choice = input("\nüëâ Enter the index you have invested in: ").strip().upper()

valid_indices = [i.upper() for i in df["Index"]]

print("\n==============================")
print("üîç Checking your invested index...")
print("==============================\n")

# CASE A ‚Äî User‚Äôs index is present
if user_choice in valid_indices:
    row = df[df["Index"].str.upper() == user_choice].iloc[0]

    print(f"‚úÖ Your index ({user_choice}) is valid and analyzed.")
    print(f"üìä Score: {row['Score']}")
    print(f"üìà CAGR: {row['CAGR_10Y']*100:.2f}%")

    if row["Index"] == best_index["Index"]:
        print("\nüíö GREAT! You already invested in the BEST index!")
    else:
        print("\n‚ö† Your index is good but NOT the top performer.")
        print(f"üëâ Top Recommended Index: {best_index['Index']} (Score {best_index['Score']})")

# CASE B ‚Äî User‚Äôs index NOT present
else:
    print(f"‚ùå Your index '{user_choice}' could NOT be analyzed.")
    print("This may be because:")
    print("‚Ä¢ Its ETF is not available OR")
    print("‚Ä¢ It has insufficient historical data OR")
    print("‚Ä¢ It has been delisted")

    print("\nüëâ Recommendation:")
    print(f"Switch to: **{best_index['Index']}** (Best Score: {best_index['Score']})")



üìà ANALYZING ALL NSE INDICES USING ETF DATA...

‚û° NIFTY 50 ‚Üí (NIFTYBEES.NS)
   ‚úî Metrics calculated.

‚û° NIFTY NEXT 50 ‚Üí (JUNIORBEES.NS)
   ‚úî Metrics calculated.

‚û° NIFTY MIDCAP 100 ‚Üí (MIDCAPETF.NS)
   ‚úî Metrics calculated.

‚û° NIFTY SMALLCAP 100 ‚Üí (SMALLCAP.NS)
   ‚úî Metrics calculated.


üèÜ BEST INDEX TO INVEST (10-Year Ranking):

NIFTY 50 ‚Üí Score: 46.54 | CAGR: 13.95% | Sharpe: 0.32
NIFTY NEXT 50 ‚Üí Score: 43.21 | CAGR: 14.33% | Sharpe: 0.60
NIFTY MIDCAP 100 ‚Üí Score: 29.34 | CAGR: 6.38% | Sharpe: 0.16
NIFTY SMALLCAP 100 ‚Üí Score: 22.87 | CAGR: 5.65% | Sharpe: 0.13

üî• BEST INDEX: NIFTY 50
‚≠ê Score: 46.54 | CAGR: 13.95%

üîç Checking your invested index...

‚úÖ Your index (NIFTY 50) is valid and analyzed.
üìä Score: 46.54
üìà CAGR: 13.95%

üíö GREAT! You already invested in the BEST index!
