In [None]:
import yfinance as yf
import pandas as pd
import numpy as np
from IPython.display import display, HTML

# 1. Setup & Data Fetching
# Tickers ranked by performance metrics (from best to worst)
tickers = ['USMV', 'SCHD', 'VDC', 'VOO', 'QUAL', 'SPMO', 'AVDV', 'XLP', 
           'VUG', 'XLV', 'VTI', 'AVUV', 'VXUS', 'QQQM', 'SPGP', 'VT', 
           'JQUA', 'DYNF']

# Download data
raw_data = yf.download(tickers, start="2000-01-01", end="2025-01-01")

# Handle Adj Close fallback - use 'Adj Close' if available, otherwise 'Close'
if 'Adj Close' in raw_data.columns.get_level_values(0):
    data = raw_data['Adj Close']
else:
    # If multi-level columns but no Adj Close
    if isinstance(raw_data.columns, pd.MultiIndex):
        if 'Close' in raw_data.columns.get_level_values(0):
            data = raw_data['Close']
        else:
            raise ValueError("Neither 'Adj Close' nor 'Close' found in data")
    else:
        data = raw_data

returns = data.pct_change().dropna()

# 2. Define Your Two Portfolios
# Portfolio A
weights_a = {'VTI': 0.85, 'VXUS': 0.15}
# Portfolio B
weights_b = {'SPMO': 0.1, 'VTI': 0.4, 'VONG': 0.1, 'VXUS': 0.05, 'AVUV': 0.1, 'AVDV': 0.1, 'JQUA': 0.1, 'DYNF': 0.15}

def simulate_future(returns, weights, years=30, block_size=20):
    """Creates a 'Frankenstein' future by stitching random 20-day blocks."""
    n_days = years * 252
    sim_returns = []
    
    while len(sim_returns) < n_days:
        # Pick a random starting point for a block
        start_idx = np.random.randint(0, len(returns) - block_size)
        block = returns.iloc[start_idx : start_idx + block_size]
        # Calculate weighted portfolio return for this block
        port_block = (block * pd.Series(weights)).sum(axis=1)
        sim_returns.extend(port_block.tolist())
        
    # Convert to cumulative wealth index
    wealth_index = (1 + pd.Series(sim_returns[:n_days])).cumprod()
    return wealth_index

year_streak=5

def calculate_metrics(path):
    """Calculate comprehensive metrics for a given path."""
    # Max Drawdown
    dd = (path / path.cummax() - 1).min()
    
    # Drawdown area (days below cummax times magnitude)
    drawdowns = path / path.cummax() - 1
    drawdown_area = (drawdowns[drawdowns < 0]).abs().sum()
    
    # Ulcer Index (sqrt of average squared drawdowns)
    ulcer_index = np.sqrt((drawdowns[drawdowns < 0] ** 2).sum() / len(path)) * 100
    
    # Final Growth
    growth = path.iloc[-1]
    
    # CAGR (30 years assumed)
    cagr = (growth ** (1/30)) - 1
    
    # Daily returns volatility
    daily_returns = path.pct_change().dropna()
    volatility_annual = daily_returns.std() * np.sqrt(252)
    
    # Sharpe Ratio (assuming 2.5% risk-free rate)
    risk_free_rate = 0.025
    sharpe_ratio = (cagr - risk_free_rate) / volatility_annual if volatility_annual > 0 else 0
    
    # Sortino Ratio (only downside volatility)
    downside_returns = daily_returns[daily_returns < 0]
    downside_volatility = downside_returns.std() * np.sqrt(252) if len(downside_returns) > 0 else 0
    sortino_ratio = (cagr - risk_free_rate) / downside_volatility if downside_volatility > 0 else 0
    
    # Win Rate (% of days with positive returns)
    win_rate = (daily_returns > 0).sum() / len(daily_returns) * 100 if len(daily_returns) > 0 else 0
    
    # Positive Years % - count how many years had positive returns
    yearly_returns = []
    for year in range(30):
        start_idx = year * 252
        end_idx = (year + 1) * 252
        if end_idx < len(path):
            year_return = (path.iloc[end_idx] / path.iloc[start_idx]) - 1
            yearly_returns.append(year_return)
    positive_years_pct = (sum(1 for r in yearly_returns if r > 0) / len(yearly_returns) * 100) if yearly_returns else 0
    
    # Recovery Factor = Total Return / Max Drawdown magnitude
    recovery_factor = (growth - 1) / abs(dd) if dd != 0 else 0
    
    # Decade CAGRs
    decade_cagrs = []
    for decade_start in [0, 10, 20]:
        start_idx = decade_start * 252
        end_idx = min((decade_start + 10) * 252, len(path) - 1)
        if end_idx > start_idx and start_idx < len(path) and end_idx < len(path):
            decade_growth = path.iloc[end_idx] / path.iloc[start_idx]
            decade_cagr = (decade_growth ** (1/10)) - 1 if decade_growth > 0 else 0
            decade_cagrs.append(decade_cagr)
        else:
            decade_cagrs.append(0)
    
    # Martin Ratio = Return / Ulcer Index
    martin_ratio = (growth - 1) / ulcer_index if ulcer_index > 0 else 0
    
    # Compounding Efficiency Score = Total Return / Drawdown Area
    efficiency = (growth - 1) / drawdown_area if drawdown_area > 0 else 0
    
    # Check for x-year underwater streak (x-years * 252)
    underwater = (path < path.cummax()).astype(int)
    streaks = []
    current_streak = 0
    for is_underwater in underwater:
        if is_underwater:
            current_streak += 1
        else:
            if current_streak > 0:
                streaks.append(current_streak)
            current_streak = 0
    if current_streak > 0:
        streaks.append(current_streak)
    
    max_streak = max(streaks) if streaks else 0
    has_xyr_streak = max_streak >= 252 * year_streak  # x years * 252 trading days
    
    # Total underwater days
    total_underwater_days = (underwater == 1).sum()
    
    return {
        'DD': dd,
        'Growth': growth,
        'CAGR': cagr,
        'Volatility_Annual': volatility_annual,
        'Sharpe_Ratio': sharpe_ratio,
        'Sortino_Ratio': sortino_ratio,
        'Win_Rate': win_rate,
        'Positive_Years_Pct': positive_years_pct,
        'Recovery_Factor': recovery_factor,
        'Decade1_CAGR': decade_cagrs[0] if len(decade_cagrs) > 0 else 0,
        'Decade2_CAGR': decade_cagrs[1] if len(decade_cagrs) > 1 else 0,
        'Decade3_CAGR': decade_cagrs[2] if len(decade_cagrs) > 2 else 0,
        'Drawdown_Area': drawdown_area,
        'Ulcer_Index': ulcer_index,
        'Martin_Ratio': martin_ratio,
        'Efficiency_Score': efficiency,
        f'Has_{year_streak}Yr_Streak': has_xyr_streak,
        'Max_Underwater_Streak_Days': max_streak,
        'Total_Underwater_Days': total_underwater_days
    }

# 3. Run the Stress Test (e.g., 100 simulations for speed)
results_a = []
results_b = []

for _ in range(100):
    path_a = simulate_future(returns, weights_a)
    path_b = simulate_future(returns, weights_b)
    
    metrics_a = calculate_metrics(path_a)
    metrics_b = calculate_metrics(path_b)
    
    results_a.append(metrics_a)
    results_b.append(metrics_b)

# 4. Analyze the "Compounding Safety"
df_a = pd.DataFrame(results_a)
df_b = pd.DataFrame(results_b)

# Calculate Probability of Ruin (% of sims where final growth < 1.0)
prob_ruin_a = (df_a['Growth'] < 1.0).sum() / len(df_a) * 100
prob_ruin_b = (df_b['Growth'] < 1.0).sum() / len(df_b) * 100

# Create summary statistics
summary_data = {
    'Metric': [
        'CAGR (Mean)',
        'CAGR (StdDev)',
        'Volatility Annual (Mean)',
        'Sharpe Ratio (Mean)',
        'Sortino Ratio (Mean)',
        'Win Rate % (Mean)',
        'Positive Years % (Mean)',
        'Recovery Factor (Mean)',
        'Decade 1 CAGR (Mean)',
        'Decade 2 CAGR (Mean)',
        'Decade 3 CAGR (Mean)',
        'Probability of Ruin (%)',
        'Max Drawdown (Mean)',
        'Max Drawdown (StdDev)',
        'Total Growth (Mean)',
        'Total Growth (5th %ile)',
        'Growth $ from $10k (5th %ile)',
        'Martin Ratio (Mean)',
        'Efficiency Score (Mean)',
        f'Probability of {year_streak}-Year Underwater Streak',
        'Max Underwater Streak (Mean Days)',
        'Max Underwater Streak (Mean Years)',
        'Total Underwater Days'
    ],
    'A': [
        f"{df_a['CAGR'].mean():.2%}",
        f"{df_a['CAGR'].std():.2%}",
        f"{df_a['Volatility_Annual'].mean():.2%}",
        f"{df_a['Sharpe_Ratio'].mean():.3f}",
        f"{df_a['Sortino_Ratio'].mean():.3f}",
        f"{df_a['Win_Rate'].mean():.1f}%",
        f"{df_a['Positive_Years_Pct'].mean():.1f}%",
        f"{df_a['Recovery_Factor'].mean():.2f}x",
        f"{df_a['Decade1_CAGR'].mean():.2%}",
        f"{df_a['Decade2_CAGR'].mean():.2%}",
        f"{df_a['Decade3_CAGR'].mean():.2%}",
        f"{prob_ruin_a:.1f}%",
        f"{df_a['DD'].mean():.2%}",
        f"{df_a['DD'].std():.2%}",
        f"{df_a['Growth'].mean():.2f}x",
        f"{df_a['Growth'].quantile(0.05):.2f}x",
        f"${10000 * df_a['Growth'].quantile(0.05):,.0f}",
        f"{df_a['Martin_Ratio'].mean():.4f}",
        f"{df_a['Efficiency_Score'].mean():.4f}",
        f"{df_a[f'Has_{year_streak}Yr_Streak'].sum() / len(df_a) * 100:.1f}%",
        f"{df_a['Max_Underwater_Streak_Days'].mean():.0f}",
        f"{df_a['Max_Underwater_Streak_Days'].mean() / 252:.1f}",
        f"{df_a['Total_Underwater_Days'].mean():.0f}"
    ],
    'B': [
        f"{df_b['CAGR'].mean():.2%}",
        f"{df_b['CAGR'].std():.2%}",
        f"{df_b['Volatility_Annual'].mean():.2%}",
        f"{df_b['Sharpe_Ratio'].mean():.3f}",
        f"{df_b['Sortino_Ratio'].mean():.3f}",
        f"{df_b['Win_Rate'].mean():.1f}%",
        f"{df_b['Positive_Years_Pct'].mean():.1f}%",
        f"{df_b['Recovery_Factor'].mean():.2f}x",
        f"{df_b['Decade1_CAGR'].mean():.2%}",
        f"{df_b['Decade2_CAGR'].mean():.2%}",
        f"{df_b['Decade3_CAGR'].mean():.2%}",
        f"{prob_ruin_b:.1f}%",
        f"{df_b['DD'].mean():.2%}",
        f"{df_b['DD'].std():.2%}",
        f"{df_b['Growth'].mean():.2f}x",
        f"{df_b['Growth'].quantile(0.05):.2f}x",
        f"${10000 * df_b['Growth'].quantile(0.05):,.0f}",
        f"{df_b['Martin_Ratio'].mean():.4f}",
        f"{df_b['Efficiency_Score'].mean():.4f}",
        f"{df_b[f'Has_{year_streak}Yr_Streak'].sum() / len(df_b) * 100:.1f}%",
        f"{df_b['Max_Underwater_Streak_Days'].mean():.0f}",
        f"{df_b['Max_Underwater_Streak_Days'].mean() / 252:.1f}",
        f"{df_b['Total_Underwater_Days'].mean():.0f}"
    ]
}

summary_df = pd.DataFrame(summary_data)
display(HTML("<h2>Compounding Safety Analysis (30-Year Monte Carlo Simulation)</h2>"))
display(HTML(summary_df.to_html(index=False)))

[*********************100%***********************]  18 of 18 completed


Metric,A,B
CAGR (Mean),13.57%,14.23%
CAGR (StdDev),3.11%,2.66%
Volatility Annual (Mean),16.37%,14.77%
Sharpe Ratio (Mean),0.678,0.795
Sortino Ratio (Mean),0.972,1.163
Win Rate % (Mean),53.6%,54.0%
Positive Years % (Mean),80.6%,83.9%
Recovery Factor (Mean),218.04x,260.64x
Decade 1 CAGR (Mean),13.33%,13.90%
Decade 2 CAGR (Mean),13.00%,15.07%
