In [189]:
# Importing required libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import plotly.graph_objs as go
import plotly.io as pio
import plotly.express as px
from plotly.subplots import make_subplots
import requests
from datetime import datetime


In [190]:
# CONFIGURATION VARIABLES - Edit these to customize your retirement scenario


# Personal Information
year_of_birth = 1992
Age_diff_spouse = 4
retirement_year = 2026

# Market Parameters
stock_market_mean_yoy_growthrate_percent = 8
stock_market_std_yoy_growthrate = 18.0977

# Investments Portfolio
Investments = {
    'Ret_accnts': {
        'type': 'stock_retirement',
        'current_value': 324834,
        'beta': 1,
        'mean_yoy_growthrate_percent': 10.26,
        'std_yoy_growthrate': 18.0977,
        'additions_yearly': 0,
        'currency': 'USD'
    },
    'TSLA': {
        'type': 'stock',
        'current_value':0,
        'beta': 1,
        'mean_yoy_growthrate_percent': 9,
        'std_yoy_growthrate': 58.0977,
        'additions_yearly': 0,
        'currency': 'USD'
    },
    'Diversiifed_stock': {
        'type': 'stock',
        'current_value': 1900000,  # Updated value
        'beta': 1,
        'mean_yoy_growthrate_percent': 10.26,
        'std_yoy_growthrate': 18.0977,
        'additions_yearly': 0,
        'currency': 'USD'
    },
    'Marcus': {
        'type': 'cash_certificate',
        'current_value': 20000,
        'additions_yearly': 0,
        'mean_yoy_growthrate_percent': 5,
        'currency': 'USD'
    }
}

# Social Security - Based on Actual Earnings History
# Actual earnings history (Taxed Social Security Earnings)
actual_earnings_history = {
    2014: 37,
    2015: 0,
    2016: 0,
    2017: 24022,
    2018: 123623,
    2019: 121380,
    2020: 137700,
    2021: 142800,
    2022: 133629,
    2023: 160200,
    2024: 168600
}

social_sec = {
    'type': 'social_security',
    'withdrawl_age': 62,
    'earnings_history': actual_earnings_history,
    'future_earnings_assumption': 168600,  # Use 2024 earnings for future years
    'avg_cola_yoy_growth': 2.6,
    'avg_cola_std': 10
}

# Inflation Parameters
USA_inflation_mean = 4
USA_inflation_std = 1
India_inflation_mean = 7.379
India_inflation_std = 4.878

# Living Expenses
Monthly_living = 200000
Monthly_living_currency = 'INR'
Unexpected_expense_min_percent = 0.5
Unexpected_expense_max_percent = 15

# Post-Retirement Job Income
job_income_after_ret_annum = 2000000
job_yoy_increment_percent = 3
job_years_after_ret = 4
job_parameter_curency = 'INR'

# Tax Brackets - US Federal
US_federal_brackets = [
    (0, 22000, 0.10),
    (22000, 89450, 0.12),
    (89450, 190750, 0.22),
    (190750, 364200, 0.24),
    (364200, 462500, 0.32),
    (462500, 693750, 0.35),
    (693750, 1_000_000, 0.37)
]

# Tax Brackets - US State (California)
US_state_brackets = [
    (0, 20198, 0.01),
    (20198, 47884, 0.02),
    (47884, 75576, 0.04),
    (75576, 104910, 0.06),
    (104910, 132590, 0.08),
    (132590, 677278, 0.093),
    (677278, 812728, 0.103),
    (812728, 1354550, 0.113),
    (1354550, 1_000_000, 0.123)
]

# Tax Brackets - India
India_federal_brackets = [
    (0, 250000, 0.0),
    (250000, 500000, 0.05),
    (500000, 1000000, 0.20),
    (1000000, 10000000, 0.30)
]

# Fallback tax rates (not used with progressive brackets, kept for reference)
effective_tax_usa = 32
effective_tax_ind = 30

# Currency Exchange
default_USD_to_INR = 83.5
USD_INR_comission_percent = 0.550
USD_INR_mean_growth_percent = 3.040
USD_INR_STD = 5.57

# Fetch live exchange rate
try:
    response = requests.get("https://api.exchangerate-api.com/v4/latest/USD")
    data = response.json()
    USD_INR_rate = data["rates"]["INR"]
    print(f"âœ“ Live USD to INR rate fetched: {USD_INR_rate:.2f}")
except:
    USD_INR_rate = default_USD_to_INR
    print(f'âš  Unable to reach server, using default USD to INR: {USD_INR_rate:.2f}')

# Simulation Parameters
simulation_counts = 1000
realistic_target_survival_age = 100

# Derived Parameters
current_year = datetime.now().year
target_survival_year = year_of_birth + Age_diff_spouse + 100
years_to_calculate = target_survival_year - current_year

print(f"\\nðŸ“Š Simulation Parameters:")
print(f"   - Current Year: {current_year}")
print(f"   - Years to Simulate: {years_to_calculate}")
print(f"   - Retirement Year: {retirement_year}")
print(f"   - Number of Simulations: {simulation_counts}")


âœ“ Live USD to INR rate fetched: 89.75
\nðŸ“Š Simulation Parameters:
   - Current Year: 2025
   - Years to Simulate: 71
   - Retirement Year: 2026
   - Number of Simulations: 1000


In [191]:
# HELPER FUNCTIONS


def calculate_mortgage_payment(principal, annual_rate, term_years):
    """Calculate monthly mortgage payment"""
    monthly_rate = annual_rate / 12 / 100
    payments = term_years * 12
    return principal * monthly_rate / (1 - (1 + monthly_rate) ** -payments)

def adjust_rand(generated_values, target_mean, target_std):
    """Adjust randomly generated values to match target mean and std"""
    current_mean = np.mean(generated_values)
    current_std = np.std(generated_values)
    if current_std == 0:
        return np.full_like(generated_values, target_mean)
    return (generated_values - current_mean) / current_std * target_std + target_mean

def calculate_progressive_tax(income, brackets):
    """Calculate tax based on progressive tax brackets"""
    if income <= 0:
        return 0
    
    tax = 0
    for bracket_start, bracket_end, rate in brackets:
        if income <= bracket_start:
            break
        
        taxable_in_bracket = min(income, bracket_end) - bracket_start
        if taxable_in_bracket > 0:
            tax += taxable_in_bracket * rate
        
        if income <= bracket_end:
            break
    
    return tax

def calculate_effective_tax_rate(income, brackets):
    """Calculate effective tax rate from progressive brackets"""
    if income <= 0:
        return 0
    tax = calculate_progressive_tax(income, brackets)
    return (tax / income) * 100

def calculate_social_security_benefit(year_of_birth, withdrawal_age, earnings_history, 
                                     future_earnings, current_year, retirement_year_param):
    """
    Calculate Social Security benefit based on actual earnings history.
    Uses SSA methodology with wage indexing and bend points.
    
    Parameters:
    - year_of_birth: Birth year
    - withdrawal_age: Age to start claiming (62-70)
    - earnings_history: Dict of {year: earnings}
    - future_earnings: Assumed earnings for future years BEFORE retirement
    - current_year: Current simulation year
    - retirement_year_param: Year when you stop working (for zero earnings after)
    
    Returns:
    - Monthly benefit amount at specified withdrawal age
    """
    
    # SSA wage index factors (approximate values for 2024 base year)
    # These are used to index past earnings to equivalent 2024 dollars
    wage_index_factors = {
        2014: 1.255, 2015: 1.232, 2016: 1.211, 2017: 1.182,
        2018: 1.148, 2019: 1.121, 2020: 1.094, 2021: 1.059,
        2022: 1.023, 2023: 1.010, 2024: 1.000
    }
    
    # Build complete earnings record (35 years needed for calculation)
    indexed_earnings = []
    
    # Add historical earnings (indexed)
    for year, earnings in earnings_history.items():
        index_factor = wage_index_factors.get(year, 1.0)
        indexed_earnings.append(earnings * index_factor)
    
    # Add future earnings
    age_62_year = year_of_birth + 62
    
    # Fill in years from current year until SS claiming age
    # BUT: Only add earnings until retirement year, then zeros after
    for year in range(current_year, min(age_62_year, year_of_birth + 67)):  # Up to age 67 max
        if year not in earnings_history:
            if year < retirement_year_param:
                # Before retirement: use future earnings assumption
                indexed_earnings.append(future_earnings)
            else:
                # After retirement: zero earnings (or could use post-retirement job income)
                indexed_earnings.append(0)
    
    # Pad with zeros if less than 35 years
    while len(indexed_earnings) < 35:
        indexed_earnings.append(0)
    
    # Take highest 35 years
    top_35_earnings = sorted(indexed_earnings, reverse=True)[:35]
    
    # Calculate Average Indexed Monthly Earnings (AIME)
    aime = sum(top_35_earnings) / 35 / 12
    
    # Apply bend points to calculate Primary Insurance Amount (PIA)
    # 2024 bend points: $1,174 and $7,078
    bend_point_1 = 1174
    bend_point_2 = 7078
    
    if aime <= bend_point_1:
        pia = aime * 0.90
    elif aime <= bend_point_2:
        pia = (bend_point_1 * 0.90) + ((aime - bend_point_1) * 0.32)
    else:
        pia = (bend_point_1 * 0.90) + ((bend_point_2 - bend_point_1) * 0.32) + ((aime - bend_point_2) * 0.15)
    
    # Adjust for early/late retirement
    # Full Retirement Age (FRA) for someone born in 1992 is 67
    fra = 67
    
    if withdrawal_age < fra:
        # Early retirement reduction: ~6.67% per year before FRA (up to 3 years), then 5% per year
        months_early = (fra - withdrawal_age) * 12
        if months_early <= 36:
            reduction = months_early * (5/9) / 100  # 5/9 of 1% per month
        else:
            reduction = (36 * (5/9) / 100) + ((months_early - 36) * (5/12) / 100)
        pia = pia * (1 - reduction)
    elif withdrawal_age > fra:
        # Delayed retirement credits: 8% per year after FRA
        years_delayed = withdrawal_age - fra
        pia = pia * (1 + 0.08 * years_delayed)
    
    return pia

def calculate_rmd(age, account_balance):
    """Calculate Required Minimum Distribution based on IRS life expectancy tables"""
    # IRS Uniform Lifetime Table (simplified version)
    rmd_factors = {
        73: 26.5, 74: 25.5, 75: 24.6, 76: 23.7, 77: 22.9, 78: 22.0,
        79: 21.1, 80: 20.2, 81: 19.4, 82: 18.5, 83: 17.7, 84: 16.8,
        85: 16.0, 86: 15.2, 87: 14.4, 88: 13.7, 89: 12.9, 90: 12.2,
        91: 11.5, 92: 10.8, 93: 10.1, 94: 9.5, 95: 8.9, 96: 8.4,
        97: 7.8, 98: 7.3, 99: 6.8, 100: 6.4
    }
    
    if age < 73:
        return 0
    elif age >= 100:
        factor = 6.4
    else:
        factor = rmd_factors.get(age, 6.4)
    
    return account_balance / factor

def apply_currency_conversion_fee(amount, commission_percent):
    """Apply currency conversion commission"""
    return amount * (1 - commission_percent / 100)

print("âœ“ Helper functions loaded")


âœ“ Helper functions loaded


In [192]:
# SIMULATION FUNCTIONS


def gen_simulation_variables():
    """
    Generate random variables for simulation:
    - USD to INR growth
    - India inflation
    - USA inflation
    - Unexpected expenses
    - Social Security COLA adjustments
    - Investment YOY growth (including rents)
    """
    rand_var_df = pd.DataFrame()
    rand_var_df['USD_INR_growth'] = adjust_rand(
        np.random.normal(loc=USD_INR_mean_growth_percent, scale=USD_INR_STD, size=years_to_calculate),
        USD_INR_mean_growth_percent, USD_INR_STD
    )
    rand_var_df['IND_Inflation_growth'] = adjust_rand(
        np.random.normal(loc=India_inflation_mean, scale=India_inflation_std, size=years_to_calculate),
        India_inflation_mean, India_inflation_std
    )
    rand_var_df['USA_Inflation_growth'] = adjust_rand(
        np.random.normal(loc=USA_inflation_mean, scale=USA_inflation_std, size=years_to_calculate),
        USA_inflation_mean, USA_inflation_std
    )
    rand_var_df['unexpected_expense_rate'] = np.random.uniform(
        Unexpected_expense_min_percent, Unexpected_expense_max_percent, size=years_to_calculate
    )
    rand_var_df['social_sec_cola'] = adjust_rand(
        np.random.normal(loc=social_sec['avg_cola_yoy_growth'], scale=social_sec['avg_cola_std'], size=years_to_calculate),
        social_sec['avg_cola_yoy_growth'], social_sec['avg_cola_std']
    )

    for key, invs in Investments.items():
        means = invs.get('mean_yoy_growthrate_percent', 5)
        std_dev = invs.get('std_yoy_growthrate', 0)

        rand_var_df[f'{key}_growth'] = adjust_rand(
            np.random.normal(loc=means, scale=std_dev, size=years_to_calculate),
            means, std_dev
        )

        if invs.get('type') == 'real_estate':
            rand_var_df[f'{key}_rent_growth'] = adjust_rand(
                np.random.normal(loc=invs['rental_income_yoy_inc_percent'], scale=0, size=years_to_calculate),
                invs['rental_income_yoy_inc_percent'], 0
            )

    return rand_var_df


def simulate_retirement(simulation_var_df):
    """
    Perform yearly retirement simulation:
    - Adjust for inflation, unexpected expenses, mortgage costs
    - Calculate Social Security benefits based on actual earnings
    - Withdraw from investments if needed
    - Track if/when the money runs out
    """
    simulation_var_df['year'] = current_year + simulation_var_df.index
    simulation_var_df['age'] = simulation_var_df['year'] - year_of_birth

    # Calculate Social Security benefit once (will be inflated with COLA each year)
    base_ss_monthly_benefit = calculate_social_security_benefit(
        year_of_birth, 
        social_sec['withdrawl_age'],
        social_sec['earnings_history'],
        social_sec['future_earnings_assumption'],
        current_year,
        retirement_year
    )
    
    # Initialize columns with proper float dtype to avoid pandas warnings
    simulation_var_df['social_sec_income'] = 0.0
    simulation_var_df['rmd'] = 0.0
    simulation_var_df['effective_tax_rate'] = 0.0
    simulation_var_df['living_expense'] = 0.0
    simulation_var_df['unexpected_expense'] = 0.0
    simulation_var_df['ret_job_inc'] = 0.0
    simulation_var_df['post_tax_living_USD_needed'] = 0.0
    simulation_var_df['post_tax_all_mortgage'] = 0.0
    simulation_var_df['post_tax_net_expense'] = 0.0
    simulation_var_df['Pre_tax_net_expense'] = 0.0
    simulation_var_df['Pre_tax_expense_left'] = 0.0
    simulation_var_df['stock_current_value'] = 0.0
    simulation_var_df['stock_ret_current_value'] = 0.0
    simulation_var_df['cash_cert_current_value'] = 0.0
    simulation_var_df['Net_liquid_left'] = 0.0
    simulation_var_df['usd_inr_rate'] = 0.0
    simulation_var_df['inflated_living'] = 0.0
    
    for sim_year in simulation_var_df.index:
        # USD to INR rate calculation
        if sim_year == 0:
            simulation_var_df.loc[sim_year, 'usd_inr_rate'] = USD_INR_rate
        else:
            simulation_var_df.loc[sim_year, 'usd_inr_rate'] = simulation_var_df.loc[sim_year - 1, 'usd_inr_rate'] * (
                1 + simulation_var_df.loc[sim_year, 'USD_INR_growth'] / 100
            )

        # Inflation adjustments for living expenses
        if sim_year == 0:
            simulation_var_df.loc[sim_year, 'inflated_living'] = Monthly_living
        else:
            simulation_var_df.loc[sim_year, 'inflated_living'] = (
                simulation_var_df.loc[sim_year - 1, 'inflated_living'] *
                (1 + simulation_var_df.loc[sim_year - 1, 'IND_Inflation_growth'] / 100)
            )

        # Living and unexpected expenses post-retirement
        if simulation_var_df.loc[sim_year, 'year'] < retirement_year:
            simulation_var_df.loc[sim_year, 'living_expense'] = 0
            simulation_var_df.loc[sim_year, 'unexpected_expense'] = 0
        else:
            simulation_var_df.loc[sim_year, 'living_expense'] = simulation_var_df.loc[sim_year, 'inflated_living'] * 12
            simulation_var_df.loc[sim_year, 'unexpected_expense'] = (
                simulation_var_df.loc[sim_year, 'living_expense'] *
                (simulation_var_df.loc[sim_year, 'unexpected_expense_rate'] / 100)
            )

        # Post-retirement job income
        if (simulation_var_df.loc[sim_year, 'year'] < retirement_year) or \
           (simulation_var_df.loc[sim_year, 'year'] > retirement_year + job_years_after_ret):
            simulation_var_df.loc[sim_year, 'ret_job_inc'] = 0
        elif simulation_var_df.loc[sim_year, 'year'] == retirement_year:
            simulation_var_df.loc[sim_year, 'ret_job_inc'] = job_income_after_ret_annum
        else:
            simulation_var_df.loc[sim_year, 'ret_job_inc'] = simulation_var_df.loc[sim_year - 1, 'ret_job_inc'] * (
                1 + job_yoy_increment_percent / 100
            )

        # Net living (INR) to USD
        simulation_var_df.loc[sim_year, 'post_tax_living_USD_needed'] = (
            (simulation_var_df.loc[sim_year, 'living_expense'] +
             simulation_var_df.loc[sim_year, 'unexpected_expense'] -
             simulation_var_df.loc[sim_year, 'ret_job_inc']) /
            simulation_var_df.loc[sim_year, 'usd_inr_rate']
        )

        # Calculate real estate costs (mortgages) if any
        simulation_var_df.loc[sim_year, 'post_tax_all_mortgage'] = 0

        for key, invs in Investments.items():
            if invs.get('type') == 'real_estate':
                # Real estate logic would go here
                pass

        simulation_var_df.loc[sim_year, 'post_tax_net_expense'] = (
            simulation_var_df.loc[sim_year, 'post_tax_all_mortgage'] +
            simulation_var_df.loc[sim_year, 'post_tax_living_USD_needed']
        )

        # Pre-tax expense calculation
        if simulation_var_df.loc[sim_year, 'year'] <= retirement_year + 2:
            if effective_tax_usa != 0:
                simulation_var_df.loc[sim_year, 'Pre_tax_net_expense'] = (
                    simulation_var_df.loc[sim_year, 'post_tax_net_expense'] / (1 - effective_tax_usa / 100)
                )
                simulation_var_df.loc[sim_year, 'effective_tax_rate'] = effective_tax_usa
            else:
                simulation_var_df.loc[sim_year, 'Pre_tax_net_expense'] = simulation_var_df.loc[sim_year, 'post_tax_net_expense']
                simulation_var_df.loc[sim_year, 'effective_tax_rate'] = 0
        else:
            if effective_tax_ind != 0:
                simulation_var_df.loc[sim_year, 'Pre_tax_net_expense'] = (
                    simulation_var_df.loc[sim_year, 'post_tax_net_expense'] / (1 - effective_tax_ind / 100)
                )
                simulation_var_df.loc[sim_year, 'effective_tax_rate'] = effective_tax_ind
            else:
                simulation_var_df.loc[sim_year, 'Pre_tax_net_expense'] = simulation_var_df.loc[sim_year, 'post_tax_net_expense']
                simulation_var_df.loc[sim_year, 'effective_tax_rate'] = 0

        # Investment growth and tracking
        simulation_var_df.loc[sim_year, 'stock_current_value'] = 0
        simulation_var_df.loc[sim_year, 'stock_ret_current_value'] = 0
        simulation_var_df.loc[sim_year, 'cash_cert_current_value'] = 0

        for key, invsts in Investments.items():
            if invsts['type'] in ['stock', 'stock_retirement', 'cash_certificate']:
                if sim_year == 0:
                    simulation_var_df.loc[sim_year, f'{key}_current_value'] = invsts['current_value']
                else:
                    simulation_var_df.loc[sim_year, f'{key}_current_value'] = (
                        simulation_var_df.loc[sim_year - 1, f'{key}_current_value'] *
                        (1 + (simulation_var_df.loc[sim_year - 1, f'{key}_growth'] / 100))
                    )
                    # Ensure investments never go negative (can't owe money on stocks)
                    simulation_var_df.loc[sim_year, f'{key}_current_value'] = max(0, simulation_var_df.loc[sim_year, f'{key}_current_value'])

                if invsts['type'] == 'stock':
                    simulation_var_df.loc[sim_year, 'stock_current_value'] += simulation_var_df.loc[sim_year, f'{key}_current_value']
                elif invsts['type'] == 'stock_retirement':
                    simulation_var_df.loc[sim_year, 'stock_ret_current_value'] += simulation_var_df.loc[sim_year, f'{key}_current_value']
                elif invsts['type'] == 'cash_certificate':
                    simulation_var_df.loc[sim_year, 'cash_cert_current_value'] += simulation_var_df.loc[sim_year, f'{key}_current_value']

        # Calculate RMDs for retirement accounts
        if simulation_var_df.loc[sim_year, 'age'] >= 73:
            simulation_var_df.loc[sim_year, 'rmd'] = calculate_rmd(
                simulation_var_df.loc[sim_year, 'age'],
                simulation_var_df.loc[sim_year, 'stock_ret_current_value']
            )
        
        # Calculate withdrawals needed
        simulation_var_df.loc[sim_year, 'Pre_tax_expense_left'] = simulation_var_df.loc[sim_year, 'Pre_tax_net_expense'] * 1

        # IMPROVED SOCIAL SECURITY CALCULATION WITH COLA
        if simulation_var_df.loc[sim_year, 'age'] >= social_sec['withdrawl_age']:
            if sim_year == 0 or simulation_var_df.loc[sim_year, 'age'] == social_sec['withdrawl_age']:
                # First year of receiving SS
                simulation_var_df.loc[sim_year, 'social_sec_income'] = base_ss_monthly_benefit * 12
            else:
                # Apply COLA from previous year
                simulation_var_df.loc[sim_year, 'social_sec_income'] = (
                    simulation_var_df.loc[sim_year - 1, 'social_sec_income'] *
                    (1 + simulation_var_df.loc[sim_year - 1, 'social_sec_cola'] / 100)
                )
            
            simulation_var_df.loc[sim_year, 'Pre_tax_expense_left'] -= simulation_var_df.loc[sim_year, 'social_sec_income']

        inv_rat = {}

        # Withdraw from regular stocks first
        if (simulation_var_df.loc[sim_year, 'Pre_tax_expense_left'] > 0) and (simulation_var_df.loc[sim_year, 'stock_current_value'] > 0):
            for key, invsts in Investments.items():
                if invsts['type'] == 'stock':
                    inv_rat[f'{key}'] = (simulation_var_df.loc[sim_year, f'{key}_current_value'] /
                                         simulation_var_df.loc[sim_year, 'stock_current_value'])

            if simulation_var_df.loc[sim_year, 'Pre_tax_expense_left'] <= simulation_var_df.loc[sim_year, 'stock_current_value']:
                simulation_var_df.loc[sim_year, 'stock_current_value'] -= simulation_var_df.loc[sim_year, 'Pre_tax_expense_left']
                simulation_var_df.loc[sim_year, 'Pre_tax_expense_left'] = 0
            else:
                simulation_var_df.loc[sim_year, 'Pre_tax_expense_left'] -= simulation_var_df.loc[sim_year, 'stock_current_value']
                simulation_var_df.loc[sim_year, 'stock_current_value'] = 0

            for key, invsts in Investments.items():
                if invsts['type'] == 'stock':
                    simulation_var_df.loc[sim_year, f'{key}_current_value'] = (
                        inv_rat[f'{key}'] * simulation_var_df.loc[sim_year, 'stock_current_value']
                    )

        # Withdraw from retirement accounts (with penalty if age < 59.5)
        if (simulation_var_df.loc[sim_year, 'Pre_tax_expense_left'] > 0) and (simulation_var_df.loc[sim_year, 'stock_ret_current_value'] > 0):
            penalty_withdraw_multiplier = 1.1 if simulation_var_df.loc[sim_year, 'age'] < 59.5 else 1.0

            for key, invsts in Investments.items():
                if invsts['type'] == 'stock_retirement':
                    inv_rat[f'{key}'] = (
                        simulation_var_df.loc[sim_year, f'{key}_current_value'] /
                        simulation_var_df.loc[sim_year, 'stock_ret_current_value']
                    )

            required_amount = simulation_var_df.loc[sim_year, 'Pre_tax_expense_left'] * penalty_withdraw_multiplier
            if required_amount <= simulation_var_df.loc[sim_year, 'stock_ret_current_value']:
                simulation_var_df.loc[sim_year, 'stock_ret_current_value'] -= required_amount
                simulation_var_df.loc[sim_year, 'Pre_tax_expense_left'] = 0
            else:
                simulation_var_df.loc[sim_year, 'Pre_tax_expense_left'] -= (
                    simulation_var_df.loc[sim_year, 'stock_ret_current_value'] / penalty_withdraw_multiplier
                )
                simulation_var_df.loc[sim_year, 'stock_ret_current_value'] = 0

            for key, invsts in Investments.items():
                if invsts['type'] == 'stock_retirement':
                    simulation_var_df.loc[sim_year, f'{key}_current_value'] = (
                        inv_rat[f'{key}'] * simulation_var_df.loc[sim_year, 'stock_ret_current_value']
                    )

        # Withdraw from cash certificates last
        if (simulation_var_df.loc[sim_year, 'Pre_tax_expense_left'] > 0) and (simulation_var_df.loc[sim_year, 'cash_cert_current_value'] > 0):
            for key, invsts in Investments.items():
                if invsts['type'] == 'cash_certificate':
                    inv_rat[f'{key}'] = (
                        simulation_var_df.loc[sim_year, f'{key}_current_value'] /
                        simulation_var_df.loc[sim_year, 'cash_cert_current_value']
                    )

            if simulation_var_df.loc[sim_year, 'Pre_tax_expense_left'] <= simulation_var_df.loc[sim_year, 'cash_cert_current_value']:
                simulation_var_df.loc[sim_year, 'cash_cert_current_value'] -= simulation_var_df.loc[sim_year, 'Pre_tax_expense_left']
                simulation_var_df.loc[sim_year, 'Pre_tax_expense_left'] = 0
            else:
                simulation_var_df.loc[sim_year, 'Pre_tax_expense_left'] -= simulation_var_df.loc[sim_year, 'cash_cert_current_value']
                simulation_var_df.loc[sim_year, 'cash_cert_current_value'] = 0

            for key, invsts in Investments.items():
                if invsts['type'] == 'cash_certificate':
                    simulation_var_df.loc[sim_year, f'{key}_current_value'] = (
                        inv_rat[f'{key}'] * simulation_var_df.loc[sim_year, 'cash_cert_current_value']
                    )

        # Ensure no negative investment values (investments can't go below zero)
        simulation_var_df.loc[sim_year, 'stock_current_value'] = max(0, simulation_var_df.loc[sim_year, 'stock_current_value'])
        simulation_var_df.loc[sim_year, 'stock_ret_current_value'] = max(0, simulation_var_df.loc[sim_year, 'stock_ret_current_value'])
        simulation_var_df.loc[sim_year, 'cash_cert_current_value'] = max(0, simulation_var_df.loc[sim_year, 'cash_cert_current_value'])
        
        # Check net liquidity left
        simulation_var_df.loc[sim_year, 'Net_liquid_left'] = (
            simulation_var_df.loc[sim_year, 'stock_current_value'] +
            simulation_var_df.loc[sim_year, 'stock_ret_current_value'] +
            simulation_var_df.loc[sim_year, 'cash_cert_current_value']
        )

        if simulation_var_df.loc[sim_year, 'Net_liquid_left'] <= 0:
            return simulation_var_df.loc[sim_year, 'age'], simulation_var_df

    return simulation_var_df.loc[sim_year, 'age'], simulation_var_df

print("âœ“ Simulation functions loaded")


âœ“ Simulation functions loaded


In [193]:
# TEST SOCIAL SECURITY CALCULATION

print('='*80)
print('SOCIAL SECURITY BENEFIT CALCULATION (Based on Actual Earnings)')
print('='*80)

# Test at different withdrawal ages
test_ages = [62, 65, 67, 70]
current_test_year = datetime.now().year

print(f"\nYour Earnings History:")
print(f"{'Year':<10} {'Social Security Wages':<25}")
print('-' * 40)
for year, earnings in sorted(actual_earnings_history.items()):
    print(f"{year:<10} ${earnings:>20,}")

print(f"\nFuture years (until retirement) will assume: ${social_sec['future_earnings_assumption']:,}")

print(f"\n\nEstimated Monthly Social Security Benefits:")
print(f"{'Withdrawal Age':<20} {'Monthly (Today $)':<20} {'Monthly (Future $)':<22} {'Annual (Today $)':<20} {'Annual (Future $)':<20}")
print('-' * 125)

for age in test_ages:
    monthly_benefit_today = calculate_social_security_benefit(
        year_of_birth,
        age,
        actual_earnings_history,
        social_sec['future_earnings_assumption'],
        current_test_year,
        retirement_year
    )
    annual_benefit_today = monthly_benefit_today * 12
    
    # Calculate years until claiming
    claiming_year = year_of_birth + age
    years_until_claiming = claiming_year - current_test_year
    
    # Calculate future value with COLA
    cola_growth_factor = (1 + social_sec['avg_cola_yoy_growth'] / 100) ** years_until_claiming
    monthly_benefit_future = monthly_benefit_today * cola_growth_factor
    annual_benefit_future = annual_benefit_today * cola_growth_factor
    
    label = f"Age {age}"
    if age == 67:
        label += " (FRA)"
    elif age == 62:
        label += " (Early)"
    elif age == 70:
        label += " (Delayed)"
    
    print(f"{label:<20} ${monthly_benefit_today:>17,.2f}  ${monthly_benefit_future:>19,.2f}  ${annual_benefit_today:>17,.2f}  ${annual_benefit_future:>17,.2f}")

print(f"\n\nYour current configuration uses withdrawal age: {social_sec['withdrawl_age']}")
selected_monthly_today = calculate_social_security_benefit(
    year_of_birth,
    social_sec['withdrawl_age'],
    actual_earnings_history,
    social_sec['future_earnings_assumption'],
    current_test_year,
    retirement_year
)
selected_annual_today = selected_monthly_today * 12

# Calculate future values for selected age
claiming_year_selected = year_of_birth + social_sec['withdrawl_age']
years_until_claiming_selected = claiming_year_selected - current_test_year
cola_growth_factor_selected = (1 + social_sec['avg_cola_yoy_growth'] / 100) ** years_until_claiming_selected
selected_monthly_future = selected_monthly_today * cola_growth_factor_selected
selected_annual_future = selected_annual_today * cola_growth_factor_selected

print(f"\nðŸ’¡ Important Note:")
print(f"   You plan to retire at age {retirement_year - year_of_birth} (year {retirement_year})")
print(f"   But you won't claim Social Security until age {social_sec['withdrawl_age']} (year {claiming_year_selected})")
print(f"   The calculation assumes $168,600 earnings through {retirement_year}, then $0 after.")
print(f"   This gives you only {retirement_year - 2014} years of substantial earnings for SS calculation.")

print(f"\nBenefits at age {social_sec['withdrawl_age']}:")
print(f"   ðŸ“Š In TODAY'S DOLLARS ({current_test_year}):")
print(f"      Monthly: ${selected_monthly_today:,.2f}")
print(f"      Annual:  ${selected_annual_today:,.2f}")
print(f"\n   ðŸ’µ In FUTURE DOLLARS ({claiming_year_selected}) - after {years_until_claiming_selected} years of COLA:")
print(f"      Monthly: ${selected_monthly_future:,.2f}")
print(f"      Annual:  ${selected_annual_future:,.2f}")

print(f"\nNote: Future values assume avg COLA of {social_sec['avg_cola_yoy_growth']}% per year")
print(f"      Today's dollars show the purchasing power equivalent in {current_test_year}")
print('='*80)


SOCIAL SECURITY BENEFIT CALCULATION (Based on Actual Earnings)

Your Earnings History:
Year       Social Security Wages    
----------------------------------------
2014       $                  37
2015       $                   0
2016       $                   0
2017       $              24,022
2018       $             123,623
2019       $             121,380
2020       $             137,700
2021       $             142,800
2022       $             133,629
2023       $             160,200
2024       $             168,600

Future years (until retirement) will assume: $168,600


Estimated Monthly Social Security Benefits:
Withdrawal Age       Monthly (Today $)    Monthly (Future $)     Annual (Today $)     Annual (Future $)   
-----------------------------------------------------------------------------------------------------------------------------
Age 62 (Early)       $         1,140.11  $           2,400.05  $        13,681.33  $        28,800.61
Age 65               $         1,411

In [194]:
# RUN MONTE CARLO SIMULATIONS

print(f"ðŸš€ Starting {simulation_counts} Monte Carlo simulations...")
print(f"   This may take a few minutes...\\n")

# Storage for all simulations - we need full DataFrames for worst-case analysis
all_simulations = []  # List of (broke_age, simulation_df, simulation_id) tuples
broke_ages = []
net_liq_results = []

# Run simulations
for i in range(simulation_counts):
    if (i + 1) % 100 == 0:
        print(f"   Progress: {i+1}/{simulation_counts} simulations complete...")
    
    rand_var_df = gen_simulation_variables()
    broke_age, simulation_var_df = simulate_retirement(rand_var_df.copy())
    
    # Store results
    broke_ages.append(broke_age)
    net_liq_results.append(simulation_var_df['Net_liquid_left'].rename(i))
    all_simulations.append((broke_age, simulation_var_df.copy(), i))

# Combine net liquid results
net_liq = pd.concat(net_liq_results, axis=1)
net_liq['age'] = simulation_var_df['age']

# Sort simulations by broke age for easy access to worst/best cases
all_simulations_sorted = sorted(all_simulations, key=lambda x: x[0])

# Identify key scenarios
worst_case = all_simulations_sorted[0]
p10_case = all_simulations_sorted[int(len(all_simulations_sorted) * 0.1)]
p25_case = all_simulations_sorted[int(len(all_simulations_sorted) * 0.25)]
median_case = all_simulations_sorted[len(all_simulations_sorted) // 2]
p75_case = all_simulations_sorted[int(len(all_simulations_sorted) * 0.75)]
p90_case = all_simulations_sorted[int(len(all_simulations_sorted) * 0.90)]
best_case = all_simulations_sorted[-1]

print(f"\\nâœ“ All {simulation_counts} simulations complete!")
print(f"\\nðŸ“‹ Key Scenarios Identified:")
print(f"   Worst Case (Sim #{worst_case[2]}): Broke at age {worst_case[0]:.1f}")
print(f"   10th Percentile (Sim #{p10_case[2]}): Broke at age {p10_case[0]:.1f}")
print(f"   25th Percentile (Sim #{p25_case[2]}): Broke at age {p25_case[0]:.1f}")
print(f"   Median (Sim #{median_case[2]}): Broke at age {median_case[0]:.1f}")
print(f"   75th Percentile (Sim #{p75_case[2]}): Broke at age {p75_case[0]:.1f}")
print(f"   90th Percentile (Sim #{p90_case[2]}): Broke at age {p90_case[0]:.1f}")
print(f"   Best Case (Sim #{best_case[2]}): Broke at age {best_case[0]:.1f}")


ðŸš€ Starting 1000 Monte Carlo simulations...
   This may take a few minutes...\n
   Progress: 100/1000 simulations complete...
   Progress: 200/1000 simulations complete...
   Progress: 300/1000 simulations complete...
   Progress: 400/1000 simulations complete...
   Progress: 500/1000 simulations complete...
   Progress: 600/1000 simulations complete...
   Progress: 700/1000 simulations complete...
   Progress: 800/1000 simulations complete...
   Progress: 900/1000 simulations complete...
   Progress: 1000/1000 simulations complete...
\nâœ“ All 1000 simulations complete!
\nðŸ“‹ Key Scenarios Identified:
   Worst Case (Sim #577): Broke at age 60.0
   10th Percentile (Sim #95): Broke at age 103.0
   25th Percentile (Sim #245): Broke at age 103.0
   Median (Sim #495): Broke at age 103.0
   75th Percentile (Sim #748): Broke at age 103.0
   90th Percentile (Sim #899): Broke at age 103.0
   Best Case (Sim #999): Broke at age 103.0


In [195]:
# SUMMARY STATISTICS

print('='*80)
print('RETIREMENT SIMULATION RESULTS')
print('='*80)
print(f'Number of simulations: {simulation_counts}')
print(f'Target survival age: {realistic_target_survival_age}')
print()
print('BROKE AGE STATISTICS:')
print(f'  Minimum broke age:    {np.min(broke_ages):.1f}')
print(f'  10th percentile:      {np.percentile(broke_ages, 10):.1f}')
print(f'  25th percentile:      {np.percentile(broke_ages, 25):.1f}')
print(f'  Median (50th %ile):   {np.median(broke_ages):.1f}')
print(f'  Mean broke age:       {np.mean(broke_ages):.1f}')
print(f'  75th percentile:      {np.percentile(broke_ages, 75):.1f}')
print(f'  90th percentile:      {np.percentile(broke_ages, 90):.1f}')
print(f'  Maximum broke age:    {np.max(broke_ages):.1f}')
print(f'  Std deviation:        {np.std(broke_ages):.2f}')
print()
failure_rate = sum(1 for age in broke_ages if age < realistic_target_survival_age) * 100 / simulation_counts
print(f'FAILURE RATE: {failure_rate:.1f}%')
print(f'  (Percentage of simulations where money ran out before age {realistic_target_survival_age})')
print()
success_rate = 100 - failure_rate
print(f'SUCCESS RATE: {success_rate:.1f}%')
print('='*80)

# Show distribution plots
random_var_box = px.box(rand_var_df, title="Distribution of Random Variables")
random_var_box.show()

broke_age_hist = px.histogram(broke_ages, nbins=50, title="Distribution of Broke Ages",
                               labels={'value': 'Broke Age', 'count': 'Frequency'})
broke_age_hist.add_vline(x=realistic_target_survival_age, line_dash="dash", 
                          line_color="red", annotation_text="Target Age")
broke_age_hist.show()


RETIREMENT SIMULATION RESULTS
Number of simulations: 1000
Target survival age: 100

BROKE AGE STATISTICS:
  Minimum broke age:    60.0
  10th percentile:      103.0
  25th percentile:      103.0
  Median (50th %ile):   103.0
  Mean broke age:       102.8
  75th percentile:      103.0
  90th percentile:      103.0
  Maximum broke age:    103.0
  Std deviation:        2.31

FAILURE RATE: 0.6%
  (Percentage of simulations where money ran out before age 100)

SUCCESS RATE: 99.4%


In [None]:
# NET LIQUID ASSETS VISUALIZATION (Percentiles)

liq = go.Figure()

# Calculate percentiles across simulations
percentiles = [10, 25, 50, 75, 90]
colors = ['#EF5350', '#FF9800', '#FFD700', '#66BB6A', '#42A5F5']

for idx, p in enumerate(percentiles):
    percentile_values = net_liq.drop('age', axis=1).quantile(p/100, axis=1)
    liq.add_trace(go.Scatter(
        x=net_liq['age'], 
        y=percentile_values, 
        mode='lines', 
        name=f'{p}th percentile',
        line=dict(width=3 if p == 50 else 2, color=colors[idx])
    ))

# Add a few sample trajectories for context
sample_indices = [worst_case[2], p25_case[2], median_case[2], p75_case[2], best_case[2]]
for idx in sample_indices:
    if idx in net_liq.columns:
        liq.add_trace(go.Scatter(
            x=net_liq['age'], 
            y=net_liq[idx], 
            mode='lines', 
            opacity=0.1,
            name=f'Sample {idx}',
            showlegend=False,
            line=dict(width=0.5, color='grey')
        ))

liq.add_hline(y=0, line_dash="solid", line_color="red", annotation_text="Broke")
liq.update_layout(
    title='Net Liquid Assets Over Time (Percentiles)',
    xaxis_title='Age',
    yaxis_title='Net Liquid Assets (USD)',
    hovermode='x unified',
    template='plotly_dark',
    height=600
)
liq.show()


WORST CASE INVESTMENT BREAKDOWN
Simulation ID: #577
Broke at Age: 60



Investment Values at Key Ages:
Age      Taxable Stocks       Retirement Accts     Cash/Certs           Total               
------------------------------------------------------------------------------------------
34       $         2,726,971 $           357,145 $            21,000 $         3,105,116
39       $         1,689,455 $           529,822 $            26,802 $         2,246,079
44       $           755,473 $         1,230,730 $            34,207 $         2,020,411
62       $                 0 $                 0 $                 0 $                 0
60       $                 0 $                 0 $                 0 $                 0


In [197]:
# NET LIQUID ASSETS VISUALIZATION (Percentiles)

liq = go.Figure()

# Calculate percentiles across simulations
percentiles = [10, 25, 50, 75, 90]
colors = ['#EF5350', '#FF9800', '#FFD700', '#66BB6A', '#42A5F5']

for idx, p in enumerate(percentiles):
    percentile_values = net_liq.drop('age', axis=1).quantile(p/100, axis=1)
    liq.add_trace(go.Scatter(
        x=net_liq['age'], 
        y=percentile_values, 
        mode='lines', 
        name=f'{p}th percentile',
        line=dict(width=3 if p == 50 else 2, color=colors[idx])
    ))

# Add a few sample trajectories for context
sample_indices = [worst_case[2], p25_case[2], median_case[2], p75_case[2], best_case[2]]
for idx in sample_indices:
    if idx in net_liq.columns:
        liq.add_trace(go.Scatter(
            x=net_liq['age'], 
            y=net_liq[idx], 
            mode='lines', 
            name=f'Sample Sim #{idx}',
            line=dict(width=1, dash='dot'),
            opacity=0.4,
            showlegend=False
        ))

liq.update_layout(
    title='Net Liquid Assets Over Time (Percentiles)',
    xaxis_title='Age',
    yaxis_title='Net Liquid Value (USD)',
    template='plotly_dark',
    hovermode='x unified',
    height=600
)
liq.show()


In [None]:
# VARIABLES MOVEMENT ANALYSIS - Worst Case & Key Percentiles
 

# Define scenarios to visualize
scenarios = [
    ('Worst Case', worst_case, '#D32F2F'),
    ('10th Percentile', p10_case, '#F57C00'),
    ('Median (50th)', median_case, '#FFD600'),
    ('90th Percentile', p90_case, '#388E3C'),
]

# Create subplots for key variables
fig = make_subplots(
    rows=3, cols=2,
    subplot_titles=(
        'Net Liquid Assets',
        'Pre-tax Net Expenses', 
        'Social Security Income',
        'Required Minimum Distributions (RMDs)',
        'Effective Tax Rate',
        'Investment Breakdown'
    ),
    specs=[[{'secondary_y': False}, {'secondary_y': False}],
           [{'secondary_y': False}, {'secondary_y': False}],
           [{'secondary_y': False}, {'secondary_y': False}]],
    vertical_spacing=0.12,
    horizontal_spacing=0.1
)

# Plot each scenario
for scenario_name, scenario_data, color in scenarios:
    broke_age, df, sim_id = scenario_data
    
    # 1. Net Liquid Assets
    fig.add_trace(
        go.Scatter(x=df['age'], y=df['Net_liquid_left'], 
                   name=scenario_name, line=dict(color=color, width=2),
                   legendgroup=scenario_name),
        row=1, col=1
    )
    
    # 2. Pre-tax Net Expenses
    fig.add_trace(
        go.Scatter(x=df['age'], y=df['Pre_tax_net_expense'], 
                   name=scenario_name, line=dict(color=color, width=2),
                   legendgroup=scenario_name, showlegend=False),
        row=1, col=2
    )
    
    # 3. Social Security Income
    fig.add_trace(
        go.Scatter(x=df['age'], y=df['social_sec_income'], 
                   name=scenario_name, line=dict(color=color, width=2),
                   legendgroup=scenario_name, showlegend=False),
        row=2, col=1
    )
    
    # 4. RMDs
    fig.add_trace(
        go.Scatter(x=df['age'], y=df['rmd'], 
                   name=scenario_name, line=dict(color=color, width=2),
                   legendgroup=scenario_name, showlegend=False),
        row=2, col=2
    )
    
    # 5. Effective Tax Rate
    fig.add_trace(
        go.Scatter(x=df['age'], y=df['effective_tax_rate'], 
                   name=scenario_name, line=dict(color=color, width=2),
                   legendgroup=scenario_name, showlegend=False),
        row=3, col=1
    )
    
    # 6. Investment Breakdown (stacked area for one scenario - worst case)
    if scenario_name == 'Worst Case':
        # Get investment columns
        inv_cols = [col for col in df.columns if col.endswith('_current_value')]
        if inv_cols:
            for inv_col in inv_cols:
                inv_name = inv_col.replace('_current_value', '')
                fig.add_trace(
                    go.Scatter(x=df['age'], y=df[inv_col], 
                               name=inv_name, 
                               stackgroup='one',
                               mode='lines'),
                    row=3, col=2
                )

# Update axes labels
fig.update_xaxes(title_text="Age", row=3, col=1)
fig.update_xaxes(title_text="Age", row=3, col=2)
fig.update_yaxes(title_text="USD", row=1, col=1)
fig.update_yaxes(title_text="USD", row=1, col=2)
fig.update_yaxes(title_text="USD/year", row=2, col=1)
fig.update_yaxes(title_text="USD", row=2, col=2)
fig.update_yaxes(title_text="%", row=3, col=1)
fig.update_yaxes(title_text="USD", row=3, col=2)

# Update layout
fig.update_layout(
    title_text="Detailed Variables Movement Analysis: Worst Case vs. Key Percentiles",
    height=1200,
    template='plotly_dark',
    hovermode='x unified',
    showlegend=True,
    legend=dict(
        orientation="h",
        yanchor="bottom",
        y=1.02,
        xanchor="right",
        x=1
    )
)

fig.show()

# Print detailed analysis for worst case
print("\\n" + "="*80)
print("WORST CASE SCENARIO ANALYSIS")
print("="*80)
worst_broke_age, worst_df, worst_sim_id = worst_case
print(f"Simulation ID: #{worst_sim_id}")
print(f"Broke at Age: {worst_broke_age:.1f}")
print(f"\\nKey Metrics at Retirement (Age ~{retirement_year - year_of_birth}):")
ret_year_idx = retirement_year - current_year
if ret_year_idx < len(worst_df):
    ret_data = worst_df.iloc[ret_year_idx]
    print(f"  Net Liquid Assets: ${ret_data['Net_liquid_left']:,.0f}")
    print(f"  Annual Expenses (pre-tax): ${ret_data['Pre_tax_net_expense']:,.0f}")
    
print(f"\\nKey Metrics at Broke Age ({worst_broke_age:.0f}):")
if len(worst_df) > 0:
    final_data = worst_df.iloc[-1]
    print(f"  Final Net Liquid: ${final_data['Net_liquid_left']:,.0f}")
    print(f"  Social Security Income: ${final_data['social_sec_income']:,.0f}/year")
    print(f"  Effective Tax Rate: {final_data['effective_tax_rate']:.1f}%")
    
print("="*80)


WORST CASE SCENARIO ANALYSIS
Simulation ID: #577
Broke at Age: 60.0
\nKey Metrics at Retirement (Age ~34):
  Net Liquid Assets: $3,105,116
  Annual Expenses (pre-tax): $16,803
\nKey Metrics at Broke Age (60):
  Final Net Liquid: $0
  Social Security Income: $0/year
  Effective Tax Rate: 0.0%


In [199]:
# FAILURE ANALYSIS FUNCTIONS
 

def calculate_portfolio_attribution(df):
    """
    Calculate year-over-year portfolio change attribution.
    Breaks down portfolio changes into: market gains, withdrawals, and other factors.
    """
    attribution_df = pd.DataFrame()
    attribution_df['age'] = df['age']
    attribution_df['year'] = df['year']
    
    # Calculate portfolio value changes
    attribution_df['portfolio_value'] = df['Net_liquid_left']
    attribution_df['portfolio_change'] = df['Net_liquid_left'].diff()
    
    # Calculate expected market gains
    for key, invsts in Investments.items():
        if invsts['type'] in ['stock', 'stock_retirement', 'cash_certificate']:
            attribution_df[f'{key}_market_gain'] = 0.0
            
            # Calculate the gain from previous year's balance using current year's growth rate
            for idx in range(1, len(df)):
                prev_balance = df.loc[idx-1, f'{key}_current_value']
                growth_rate = df.loc[idx, f'{key}_growth'] / 100  # Fixed: use current year's growth rate
                attribution_df.loc[idx, f'{key}_market_gain'] = prev_balance * growth_rate
    
    # Calculate total market gains
    attribution_df['total_market_gains'] = 0
    for key, invsts in Investments.items():
        if invsts['type'] in ['stock', 'stock_retirement', 'cash_certificate']:
            attribution_df['total_market_gains'] += attribution_df[f'{key}_market_gain'].fillna(0)
    
    # Withdrawals (negative impact)
    attribution_df['withdrawals'] = -df['Pre_tax_expense_left'].clip(lower=0)
    
    # Separate TSLA vs other stocks for visibility
    attribution_df['TSLA_gain'] = attribution_df.get('TSLA_market_gain', 0).fillna(0)
    attribution_df['Diversified_gain'] = attribution_df.get('Diversiifed_stock_market_gain', 0).fillna(0)
    attribution_df['Ret_accounts_gain'] = attribution_df.get('Ret_accnts_market_gain', 0).fillna(0)
    
    # Calculate unexpected expenses vs normal
    attribution_df['unexpected_expense_impact'] = -df['unexpected_expense'] / df['usd_inr_rate']
    
    return attribution_df


def identify_extreme_variables(all_failed_sims):
    """
    Identify which variables were in extreme ranges (>2 std from mean) for failed simulations.
    Returns a summary dataframe with z-scores.
    """
    extreme_data = []
    
    for broke_age, sim_df, sim_id in all_failed_sims:
        # Calculate z-scores for key variables
        variables_to_check = [
            'TSLA_growth', 'Diversiifed_stock_growth', 'Ret_accnts_growth',
            'USA_Inflation_growth', 'IND_Inflation_growth', 
            'USD_INR_growth', 'unexpected_expense_rate'
        ]
        
        for var in variables_to_check:
            if var in sim_df.columns:
                values = sim_df[var].values
                mean_val = 0
                std_val = 1
                
                # Get the appropriate mean and std for each variable
                if var == 'TSLA_growth':
                    mean_val = Investments['TSLA']['mean_yoy_growthrate_percent']
                    std_val = Investments['TSLA']['std_yoy_growthrate']
                elif var == 'Diversiifed_stock_growth':
                    mean_val = Investments['Diversiifed_stock']['mean_yoy_growthrate_percent']
                    std_val = Investments['Diversiifed_stock']['std_yoy_growthrate']
                elif var == 'Ret_accnts_growth':
                    mean_val = Investments['Ret_accnts']['mean_yoy_growthrate_percent']
                    std_val = Investments['Ret_accnts']['std_yoy_growthrate']
                elif var == 'USA_Inflation_growth':
                    mean_val = USA_inflation_mean
                    std_val = USA_inflation_std
                elif var == 'IND_Inflation_growth':
                    mean_val = India_inflation_mean
                    std_val = India_inflation_std
                elif var == 'USD_INR_growth':
                    mean_val = USD_INR_mean_growth_percent
                    std_val = USD_INR_STD
                elif var == 'unexpected_expense_rate':
                    mean_val = (Unexpected_expense_min_percent + Unexpected_expense_max_percent) / 2
                    std_val = (Unexpected_expense_max_percent - Unexpected_expense_min_percent) / 4
                else:
                    continue
                
                # Calculate z-scores
                if std_val > 0:
                    z_scores = (values - mean_val) / std_val
                    
                    # Count extreme years
                    extreme_bad = (z_scores < -2).sum()
                    extreme_good = (z_scores > 2).sum()
                    
                    # Average z-score
                    avg_z = z_scores.mean()
                    
                    extreme_data.append({
                        'sim_id': sim_id,
                        'broke_age': broke_age,
                        'variable': var,
                        'avg_z_score': avg_z,
                        'extreme_bad_years': extreme_bad,
                        'extreme_good_years': extreme_good,
                        'worst_year_z': z_scores.min(),
                        'worst_year_age': sim_df.loc[z_scores.argmin(), 'age'] if len(z_scores) > 0 else None
                    })
    
    return pd.DataFrame(extreme_data)


def calculate_factor_contributions(failed_sims):
    """
    Calculate the contribution of each factor to portfolio depletion.
    Returns a dataframe with $ impact of each factor.
    """
    contributions = []
    
    for broke_age, sim_df, sim_id in failed_sims:
        # Starting portfolio
        start_portfolio = sim_df.loc[0, 'Net_liquid_left']
        
        # Calculate cumulative impacts
        contrib = {
            'sim_id': sim_id,
            'broke_age': broke_age,
            'starting_portfolio': start_portfolio
        }
        
        # TSLA impact: difference from expected return
        if 'TSLA_current_value' in sim_df.columns:
            tsla_start = Investments['TSLA']['current_value']
            tsla_expected_rate = Investments['TSLA']['mean_yoy_growthrate_percent'] / 100
            years = len(sim_df)
            
            # What TSLA should have been worth
            tsla_expected = tsla_start * ((1 + tsla_expected_rate) ** years)
            tsla_actual = sim_df['TSLA_current_value'].iloc[-1] if len(sim_df) > 0 else 0
            contrib['TSLA_underperformance'] = tsla_expected - tsla_actual
        
        # Diversified stocks impact
        if 'Diversiifed_stock_current_value' in sim_df.columns:
            div_start = Investments['Diversiifed_stock']['current_value']
            div_expected_rate = Investments['Diversiifed_stock']['mean_yoy_growthrate_percent'] / 100
            years = len(sim_df)
            
            div_expected = div_start * ((1 + div_expected_rate) ** years)
            div_actual = sim_df['Diversiifed_stock_current_value'].iloc[-1] if len(sim_df) > 0 else 0
            contrib['Diversified_underperformance'] = div_expected - div_actual
        
        # Retirement accounts impact
        if 'Ret_accnts_current_value' in sim_df.columns:
            ret_start = Investments['Ret_accnts']['current_value']
            ret_expected_rate = Investments['Ret_accnts']['mean_yoy_growthrate_percent'] / 100
            years = len(sim_df)
            
            ret_expected = ret_start * ((1 + ret_expected_rate) ** years)
            ret_actual = sim_df['Ret_accnts_current_value'].iloc[-1] if len(sim_df) > 0 else 0
            contrib['Retirement_underperformance'] = ret_expected - ret_actual
        
        # Unexpected expenses total
        contrib['unexpected_expenses_total'] = (sim_df['unexpected_expense'] / sim_df['usd_inr_rate']).sum()
        
        # Currency losses (USD/INR worse than expected)
        # This is complex - simplified version
        contrib['currency_impact'] = 0  # Placeholder
        
        # Early withdrawal penalties
        early_withdrawals = sim_df[sim_df['age'] < 59.5]
        if len(early_withdrawals) > 0:
            # Estimate penalty impact
            contrib['early_withdrawal_penalties'] = 0  # Would need more detailed tracking
        else:
            contrib['early_withdrawal_penalties'] = 0
        
        contributions.append(contrib)
    
    return pd.DataFrame(contributions)


def generate_timeline_annotations(sim_df, top_n=5):
    """
    Auto-generate annotations for the worst years in a simulation.
    """
    annotations = []
    
    # Calculate year-over-year portfolio change %
    sim_df['portfolio_change_pct'] = sim_df['Net_liquid_left'].pct_change() * 100
    
    # Find worst years (most negative changes)
    worst_years = sim_df.nsmallest(top_n, 'portfolio_change_pct')
    
    for idx, row in worst_years.iterrows():
        if idx == 0:  # Skip first year (no previous year to compare)
            continue
            
        annotation = f"Age {int(row['age'])}: Portfolio {row['portfolio_change_pct']:.1f}%"
        
        # Add key factors
        factors = []
        if 'TSLA_growth' in sim_df.columns:
            tsla_growth = sim_df.loc[idx-1, 'TSLA_growth']
            if tsla_growth < -20:
                factors.append(f"TSLA {tsla_growth:.1f}%")
        
        if 'unexpected_expense_rate' in sim_df.columns:
            unexp = sim_df.loc[idx, 'unexpected_expense_rate']
            if unexp > 10:
                factors.append(f"Unexpected expense {unexp:.1f}%")
        
        if 'USA_Inflation_growth' in sim_df.columns:
            us_infl = sim_df.loc[idx, 'USA_Inflation_growth']
            if us_infl > 6:
                factors.append(f"US inflation {us_infl:.1f}%")
        
        if factors:
            annotation += " | " + " | ".join(factors)
        
        annotations.append({
            'age': row['age'],
            'year': row['year'],
            'portfolio_change_pct': row['portfolio_change_pct'],
            'annotation': annotation,
            'portfolio_value': row['Net_liquid_left']
        })
    
    return pd.DataFrame(annotations).sort_values('age')


print("âœ“ Failure analysis functions loaded")


âœ“ Failure analysis functions loaded


In [200]:
# IDENTIFY FAILED SIMULATIONS
 

# Filter for failed simulations (ran out of money before age 100)
failed_simulations = [(age, df, sim_id) for age, df, sim_id in all_simulations_sorted if age < realistic_target_survival_age]

print(f"\\n{'='*80}")
print(f"FAILURE ANALYSIS OVERVIEW")
print(f"{'='*80}")
print(f"Total simulations: {simulation_counts}")
print(f"Failed simulations: {len(failed_simulations)} ({len(failed_simulations)/simulation_counts*100:.1f}%)")
print(f"Success rate: {(1 - len(failed_simulations)/simulation_counts)*100:.1f}%")
print()

if len(failed_simulations) > 0:
    print("Failed Simulation Details:")
    print(f"{'Sim ID':<10} {'Broke Age':<12} {'Years Retired':<15} {'Starting Value':<20}")
    print("-" * 65)
    for broke_age, df, sim_id in failed_simulations:
        years_retired = broke_age - (retirement_year - year_of_birth)
        starting_val = df.loc[0, 'Net_liquid_left']
        print(f"#{sim_id:<9} {broke_age:<12.1f} {years_retired:<15.1f} ${starting_val:>18,.0f}")
    print(f"{'='*80}")
else:
    print("ðŸŽ‰ No failures detected! All simulations lasted until target age.")
    print(f"{'='*80}")


FAILURE ANALYSIS OVERVIEW
Total simulations: 1000
Failed simulations: 6 (0.6%)
Success rate: 99.4%

Failed Simulation Details:
Sim ID     Broke Age    Years Retired   Starting Value      
-----------------------------------------------------------------
#577       60.0         26.0            $         2,244,834
#972       62.0         28.0            $         2,244,834
#1         76.0         42.0            $         2,244,834
#716       80.0         46.0            $         2,244,834
#552       83.0         49.0            $         2,244,834
#791       89.0         55.0            $         2,244,834


In [201]:
# STRESS TEST HEATMAP - Variable Extremeness Analysis
 

if len(failed_simulations) > 0:
    print("\\n" + "="*80)
    print("STRESS TEST ANALYSIS - Identifying Extreme Variables")
    print("="*80)
    
    # Get extreme variables data
    extreme_df = identify_extreme_variables(failed_simulations)
    
    # Create pivot tables for visualization
    for sim_id in extreme_df['sim_id'].unique():
        sim_data = extreme_df[extreme_df['sim_id'] == sim_id]
        
        print(f"\\nSimulation #{sim_id} (Broke at age {sim_data['broke_age'].iloc[0]:.0f}):")
        print("-" * 80)
        print(f"{'Variable':<30} {'Avg Z-Score':<15} {'Extreme Bad Yrs':<18} {'Worst Year Z':<15}")
        print("-" * 80)
        
        for _, row in sim_data.iterrows():
            var_name = row['variable'].replace('_growth', '').replace('_rate', '')
            print(f"{var_name:<30} {row['avg_z_score']:>12.2f}   {row['extreme_bad_years']:>15.0f}   {row['worst_year_z']:>13.2f}")
    
    # Create heatmap of z-scores for all failed simulations
    print("\\n" + "="*80)
    print("Creating stress test heatmap...")
    
    # Create a heatmap showing z-scores by simulation and variable
    heatmap_data = extreme_df.pivot_table(
        index='sim_id', 
        columns='variable', 
        values='avg_z_score',
        aggfunc='first'
    )
    
    if not heatmap_data.empty and heatmap_data.values.size > 0:
        # Create plotly heatmap
        fig_heatmap = go.Figure(data=go.Heatmap(
            z=heatmap_data.values,
            x=[col.replace('_growth', '').replace('_rate', '') for col in heatmap_data.columns],
            y=[f"Sim #{int(idx)}" for idx in heatmap_data.index],
            colorscale='RdYlGn',
            zmid=0,
            text=np.round(heatmap_data.values, 2),
            texttemplate='%{text}',
            textfont={"size": 10},
            colorbar=dict(title="Z-Score<br>(Std Dev)"),
            hovertemplate='Simulation: %{y}<br>Variable: %{x}<br>Z-Score: %{z:.2f}<extra></extra>'
        ))
        
        fig_heatmap.update_layout(
            title='Stress Test Heatmap: Average Z-Scores by Simulation<br>(Negative = Below Expected, Red = Bad Performance)',
            xaxis_title='Variables',
            yaxis_title='Failed Simulations',
            height=400 + len(failed_simulations) * 40,
            template='plotly_dark'
        )
        
        fig_heatmap.show()
    else:
        print("Warning: No data available for heatmap visualization")
    
    # Summary statistics
    print("\\n" + "="*80)
    print("SUMMARY: Most Problematic Variables Across All Failures")
    print("="*80)
    
    variable_summary = extreme_df.groupby('variable').agg({
        'avg_z_score': 'mean',
        'extreme_bad_years': 'sum',
        'worst_year_z': 'min'
    }).sort_values('avg_z_score')
    
    print(f"{'Variable':<30} {'Mean Z-Score':<15} {'Total Bad Yrs':<15} {'Worst Z':<12}")
    print("-" * 75)
    for var, row in variable_summary.iterrows():
        var_name = var.replace('_growth', '').replace('_rate', '')
        print(f"{var_name:<30} {row['avg_z_score']:>13.2f}   {row['extreme_bad_years']:>13.0f}   {row['worst_year_z']:>10.2f}")
    
    print("="*80)
else:
    print("\\nNo failed simulations to analyze.")


STRESS TEST ANALYSIS - Identifying Extreme Variables
\nSimulation #577 (Broke at age 60):
--------------------------------------------------------------------------------
Variable                       Avg Z-Score     Extreme Bad Yrs    Worst Year Z   
--------------------------------------------------------------------------------
TSLA                                   0.00                 1           -2.89
Diversiifed_stock                      0.00                 1           -2.25
Ret_accnts                             0.00                 2           -3.29
USA_Inflation                          0.00                 3           -2.37
IND_Inflation                         -0.00                 0           -1.78
USD_INR                               -0.00                 1           -2.22
unexpected_expense                     0.08                 0           -1.99
\nSimulation #972 (Broke at age 62):
--------------------------------------------------------------------------------
Va

SUMMARY: Most Problematic Variables Across All Failures
Variable                       Mean Z-Score    Total Bad Yrs   Worst Z     
---------------------------------------------------------------------------
IND_Inflation                          -0.00               9        -3.36
TSLA                                   -0.00               9        -2.89
Diversiifed_stock                       0.00               5        -3.24
USD_INR                                 0.00               9        -3.09
Ret_accnts                              0.00              14        -3.29
USA_Inflation                           0.00              11        -2.45
unexpected_expense                      0.08               0        -1.99


In [202]:
 # MULTI-FACTOR ATTRIBUTION ANALYSIS
 

if len(failed_simulations) > 0:
    print("\\n" + "="*80)
    print("MULTI-FACTOR ATTRIBUTION ANALYSIS")
    print("="*80)
    print("Calculating how much each factor contributed to portfolio depletion...")
    
    # Calculate factor contributions
    contributions_df = calculate_factor_contributions(failed_simulations)
    
    # Display contribution table
    print("\\nFactor Contribution Summary (USD):")
    print("-" * 100)
    
    for _, row in contributions_df.iterrows():
        print(f"\\n{'='*100}")
        print(f"Simulation #{row['sim_id']:.0f} - Broke at age {row['broke_age']:.0f}")
        print(f"Starting Portfolio: ${row['starting_portfolio']:,.0f}")
        print("-" * 100)
        
        factors = []
        if 'TSLA_underperformance' in row and row['TSLA_underperformance'] > 0:
            factors.append(('TSLA Underperformance', row['TSLA_underperformance']))
        if 'Diversified_underperformance' in row and row['Diversified_underperformance'] > 0:
            factors.append(('Diversified Stocks Underperformance', row['Diversified_underperformance']))
        if 'Retirement_underperformance' in row and row['Retirement_underperformance'] > 0:
            factors.append(('Retirement Accounts Underperformance', row['Retirement_underperformance']))
        if 'unexpected_expenses_total' in row:
            factors.append(('Unexpected Expenses (Total)', row['unexpected_expenses_total']))
        
        # Sort by magnitude
        factors.sort(key=lambda x: x[1], reverse=True)
        
        total_impact = sum([f[1] for f in factors])
        
        for factor_name, factor_value in factors:
            pct_of_total = (factor_value / total_impact * 100) if total_impact > 0 else 0
            print(f"  {factor_name:<45} ${factor_value:>15,.0f}  ({pct_of_total:>5.1f}%)")
        
        print("-" * 100)
        print(f"  {'TOTAL IMPACT':<45} ${total_impact:>15,.0f}")
    
    # Create stacked bar chart showing contributions
    print("\\n" + "="*80)
    print("Creating factor contribution visualization...")
    
    # Prepare data for stacked bar chart
    sim_ids = [f"Sim #{int(row['sim_id'])}" for _, row in contributions_df.iterrows()]
    
    fig_contrib = go.Figure()
    
    # Add traces for each factor
    if 'TSLA_underperformance' in contributions_df.columns:
        fig_contrib.add_trace(go.Bar(
            name='TSLA Underperformance',
            x=sim_ids,
            y=contributions_df['TSLA_underperformance'],
            marker_color='#EF5350'
        ))
    
    if 'Diversified_underperformance' in contributions_df.columns:
        fig_contrib.add_trace(go.Bar(
            name='Diversified Stocks Underperformance',
            x=sim_ids,
            y=contributions_df['Diversified_underperformance'],
            marker_color='#FF9800'
        ))
    
    if 'Retirement_underperformance' in contributions_df.columns:
        fig_contrib.add_trace(go.Bar(
            name='Retirement Accounts Underperformance',
            x=sim_ids,
            y=contributions_df['Retirement_underperformance'],
            marker_color='#FFC107'
        ))
    
    if 'unexpected_expenses_total' in contributions_df.columns:
        fig_contrib.add_trace(go.Bar(
            name='Unexpected Expenses',
            x=sim_ids,
            y=contributions_df['unexpected_expenses_total'],
            marker_color='#9C27B0'
        ))
    
    fig_contrib.update_layout(
        title='Factor Contribution to Portfolio Depletion<br>(What Caused Failure?)',
        xaxis_title='Failed Simulations',
        yaxis_title='Contribution to Loss (USD)',
        barmode='stack',
        template='plotly_dark',
        height=600,
        hovermode='x unified',
        legend=dict(
            orientation="v",
            yanchor="top",
            y=1,
            xanchor="left",
            x=1.02
        )
    )
    
    fig_contrib.show()
    
    # Calculate average contributions across all failures
    print("\\n" + "="*80)
    print("AVERAGE FACTOR CONTRIBUTIONS ACROSS ALL FAILURES")
    print("="*80)
    
    avg_contributions = {
        'TSLA Underperformance': contributions_df['TSLA_underperformance'].mean() if 'TSLA_underperformance' in contributions_df else 0,
        'Diversified Underperformance': contributions_df['Diversified_underperformance'].mean() if 'Diversified_underperformance' in contributions_df else 0,
        'Retirement Accounts Underperformance': contributions_df['Retirement_underperformance'].mean() if 'Retirement_underperformance' in contributions_df else 0,
        'Unexpected Expenses': contributions_df['unexpected_expenses_total'].mean() if 'unexpected_expenses_total' in contributions_df else 0
    }
    
    total_avg = sum(avg_contributions.values())
    
    print(f"{'Factor':<45} {'Average Impact':<20} {'% of Total':<15}")
    print("-" * 80)
    for factor, value in sorted(avg_contributions.items(), key=lambda x: x[1], reverse=True):
        pct = (value / total_avg * 100) if total_avg > 0 else 0
        print(f"{factor:<45} ${value:>18,.0f}  {pct:>13.1f}%")
    print("-" * 80)
    print(f"{'TOTAL':<45} ${total_avg:>18,.0f}  {100.0:>13.1f}%")
    print("="*80)
    
else:
    print("\\nNo failed simulations to analyze.")


MULTI-FACTOR ATTRIBUTION ANALYSIS
Calculating how much each factor contributed to portfolio depletion...
\nFactor Contribution Summary (USD):
----------------------------------------------------------------------------------------------------
Simulation #577 - Broke at age 60
Starting Portfolio: $2,244,834
----------------------------------------------------------------------------------------------------
  Unexpected Expenses (Total)                   $        164,299  (100.0%)
----------------------------------------------------------------------------------------------------
  TOTAL IMPACT                                  $        164,299
Simulation #972 - Broke at age 62
Starting Portfolio: $2,244,834
----------------------------------------------------------------------------------------------------
  Unexpected Expenses (Total)                   $        178,853  (100.0%)
----------------------------------------------------------------------------------------------------
  TOTAL 

AVERAGE FACTOR CONTRIBUTIONS ACROSS ALL FAILURES
Factor                                        Average Impact       % of Total     
--------------------------------------------------------------------------------
TSLA Underperformance                         $               nan            0.0%
Diversified Underperformance                  $               nan            0.0%
Retirement Accounts Underperformance          $               nan            0.0%
Unexpected Expenses                           $           387,875            0.0%
--------------------------------------------------------------------------------
TOTAL                                         $               nan          100.0%


In [203]:
 # AUTO-ANNOTATED TIMELINE - "Killer Years" Analysis
 

if len(failed_simulations) > 0:
    print("\\n" + "="*80)
    print("AUTO-ANNOTATED TIMELINE - Identifying 'Killer Years'")
    print("="*80)
    
    # Create timeline for each failed simulation
    for broke_age, sim_df, sim_id in failed_simulations:
        print(f"\\n{'='*80}")
        print(f"Simulation #{sim_id} - Timeline of Worst Years")
        print(f"{'='*80}")
        
        # Generate annotations for this simulation
        annotations_df = generate_timeline_annotations(sim_df, top_n=5)
        
        if len(annotations_df) > 0:
            print(f"\\nTop 5 Worst Years (Biggest Portfolio Drops):")
            print("-" * 80)
            
            cumulative_loss = 0
            for idx, row in annotations_df.iterrows():
                portfolio_loss = row['portfolio_value'] - sim_df.loc[idx-1, 'Net_liquid_left'] if idx > 0 else 0
                cumulative_loss += abs(portfolio_loss)
                
                print(f"\\n{row['annotation']}")
                print(f"   Portfolio Value: ${row['portfolio_value']:,.0f}")
                print(f"   Loss This Year: ${abs(portfolio_loss):,.0f}")
            
            print(f"\\n{'='*80}")
            print(f"Cumulative Impact of Top 5 Worst Years: ${cumulative_loss:,.0f}")
            print(f"{'='*80}")
            
            # Create interactive timeline visualization
            fig_timeline = go.Figure()
            
            # Add main portfolio line
            fig_timeline.add_trace(go.Scatter(
                x=sim_df['age'],
                y=sim_df['Net_liquid_left'],
                mode='lines',
                name='Portfolio Value',
                line=dict(color='#42A5F5', width=3),
                hovertemplate='Age: %{x}<br>Portfolio: $%{y:,.0f}<extra></extra>'
            ))
            
            # Add markers for worst years
            fig_timeline.add_trace(go.Scatter(
                x=annotations_df['age'],
                y=annotations_df['portfolio_value'],
                mode='markers+text',
                name='Worst Years',
                marker=dict(size=15, color='#EF5350', symbol='x'),
                text=[f"Age {int(age)}" for age in annotations_df['age']],
                textposition='top center',
                hovertemplate='%{text}<br>Portfolio: $%{y:,.0f}<extra></extra>'
            ))
            
            # Add annotations
            for _, row in annotations_df.iterrows():
                fig_timeline.add_annotation(
                    x=row['age'],
                    y=row['portfolio_value'],
                    text=f"{row['portfolio_change_pct']:.1f}%",
                    showarrow=True,
                    arrowhead=2,
                    arrowsize=1,
                    arrowwidth=2,
                    arrowcolor="#EF5350",
                    ax=0,
                    ay=-40,
                    bgcolor="rgba(239, 83, 80, 0.8)",
                    bordercolor="#EF5350",
                    font=dict(color="white", size=10)
                )
            
            fig_timeline.update_layout(
                title=f'Simulation #{sim_id}: Portfolio Timeline with Worst Years Highlighted<br>Broke at Age {broke_age:.0f}',
                xaxis_title='Age',
                yaxis_title='Portfolio Value (USD)',
                template='plotly_dark',
                height=600,
                hovermode='x unified',
                showlegend=True
            )
            
            fig_timeline.show()
        else:
            print("No significant portfolio drops detected.")
    
    # Create comparison view: worst failure vs median successful scenario
    print("\\n" + "="*80)
    print("COMPARISON: Worst Failure vs. Median Success")
    print("="*80)
    
    worst_failure = failed_simulations[0]  # First in sorted list
    
    fig_comparison = go.Figure()
    
    # Add worst failure
    fig_comparison.add_trace(go.Scatter(
        x=worst_failure[1]['age'],
        y=worst_failure[1]['Net_liquid_left'],
        mode='lines',
        name=f'Worst Failure (Sim #{worst_failure[2]})',
        line=dict(color='#EF5350', width=3)
    ))
    
    # Add median success
    fig_comparison.add_trace(go.Scatter(
        x=median_case[1]['age'],
        y=median_case[1]['Net_liquid_left'],
        mode='lines',
        name=f'Median Success (Sim #{median_case[2]})',
        line=dict(color='#66BB6A', width=3)
    ))
    
    # Add worst years markers for failed case
    worst_annotations = generate_timeline_annotations(worst_failure[1], top_n=5)
    if len(worst_annotations) > 0:
        fig_comparison.add_trace(go.Scatter(
            x=worst_annotations['age'],
            y=worst_annotations['portfolio_value'],
            mode='markers',
            name='Killer Years',
            marker=dict(size=12, color='#EF5350', symbol='x', line=dict(width=2, color='white'))
        ))
    
    fig_comparison.update_layout(
        title='What Went Wrong? Worst Failure vs. Median Success',
        xaxis_title='Age',
        yaxis_title='Portfolio Value (USD)',
        template='plotly_dark',
        height=700,
        hovermode='x unified'
    )
    
    fig_comparison.show()
    
else:
    print("\\nNo failed simulations to analyze.")


AUTO-ANNOTATED TIMELINE - Identifying 'Killer Years'
Simulation #577 - Timeline of Worst Years
\nTop 5 Worst Years (Biggest Portfolio Drops):
--------------------------------------------------------------------------------
\nAge 40: Portfolio -24.3% | US inflation 6.7%
   Portfolio Value: $1,699,890
   Loss This Year: $1,848,695
\nAge 47: Portfolio -40.5% | Unexpected expense 14.2%
   Portfolio Value: $1,203,262
   Loss This Year: $1,901,854
\nAge 56: Portfolio -30.7% | TSLA -90.6%
   Portfolio Value: $633,096
   Loss This Year: $2,890,388
\nAge 59: Portfolio -49.2%
   Portfolio Value: $215,136
   Loss This Year: $2,029,698
\nAge 60: Portfolio -100.0%
   Portfolio Value: $0
   Loss This Year: $0
Cumulative Impact of Top 5 Worst Years: $8,670,636


Simulation #972 - Timeline of Worst Years
\nTop 5 Worst Years (Biggest Portfolio Drops):
--------------------------------------------------------------------------------
\nAge 55: Portfolio -24.8% | TSLA -66.2% | Unexpected expense 10.2% | US inflation 6.3%
   Portfolio Value: $711,522
   Loss This Year: $1,908,283
\nAge 58: Portfolio -34.4%
   Portfolio Value: $431,871
   Loss This Year: $2,157,236
\nAge 60: Portfolio -50.9% | TSLA -50.1%
   Portfolio Value: $180,694
   Loss This Year: $1,764,776
\nAge 61: Portfolio -89.5%
   Portfolio Value: $18,902
   Loss This Year: $2,225,932
\nAge 62: Portfolio -100.0% | Unexpected expense 12.3%
   Portfolio Value: $0
   Loss This Year: $0
Cumulative Impact of Top 5 Worst Years: $8,056,227


Simulation #1 - Timeline of Worst Years
\nTop 5 Worst Years (Biggest Portfolio Drops):
--------------------------------------------------------------------------------
\nAge 34: Portfolio -42.3% | TSLA -60.9%
   Portfolio Value: $1,295,722
   Loss This Year: $0
\nAge 66: Portfolio -36.9% | US inflation 6.1%
   Portfolio Value: $1,378,925
   Loss This Year: $219,313
\nAge 74: Portfolio -32.2% | TSLA -28.0% | Unexpected expense 10.2%
   Portfolio Value: $233,516
   Loss This Year: $1,056,511
\nAge 75: Portfolio -89.9%
   Portfolio Value: $23,572
   Loss This Year: $2,221,262
\nAge 76: Portfolio -100.0% | TSLA -53.0%
   Portfolio Value: $0
   Loss This Year: $0
Cumulative Impact of Top 5 Worst Years: $3,497,086


Simulation #716 - Timeline of Worst Years
\nTop 5 Worst Years (Biggest Portfolio Drops):
--------------------------------------------------------------------------------
\nAge 69: Portfolio -29.7% | TSLA -64.0% | Unexpected expense 11.5%
   Portfolio Value: $3,494,067
   Loss This Year: $1,310,159
\nAge 75: Portfolio -29.7% | US inflation 6.0%
   Portfolio Value: $1,869,888
   Loss This Year: $163,233
\nAge 78: Portfolio -51.6%
   Portfolio Value: $788,179
   Loss This Year: $1,450,154
\nAge 79: Portfolio -53.5%
   Portfolio Value: $366,564
   Loss This Year: $1,878,270
\nAge 80: Portfolio -100.0%
   Portfolio Value: $0
   Loss This Year: $0
Cumulative Impact of Top 5 Worst Years: $4,801,815


Simulation #552 - Timeline of Worst Years
\nTop 5 Worst Years (Biggest Portfolio Drops):
--------------------------------------------------------------------------------
\nAge 64: Portfolio -26.1%
   Portfolio Value: $3,452,764
   Loss This Year: $1,655,614
\nAge 69: Portfolio -22.8%
   Portfolio Value: $1,882,154
   Loss This Year: $578,630
\nAge 81: Portfolio -59.0% | Unexpected expense 13.4%
   Portfolio Value: $656,713
   Loss This Year: $1,588,121
\nAge 82: Portfolio -44.5% | Unexpected expense 13.5%
   Portfolio Value: $364,756
   Loss This Year: $1,661,118
\nAge 83: Portfolio -100.0%
   Portfolio Value: $0
   Loss This Year: $0
Cumulative Impact of Top 5 Worst Years: $5,483,482


Simulation #791 - Timeline of Worst Years
\nTop 5 Worst Years (Biggest Portfolio Drops):
--------------------------------------------------------------------------------
\nAge 72: Portfolio -38.1% | TSLA -46.0%
   Portfolio Value: $3,408,180
   Loss This Year: $570,289
\nAge 77: Portfolio -38.2% | Unexpected expense 14.2%
   Portfolio Value: $2,162,190
   Loss This Year: $537,048
\nAge 87: Portfolio -34.9% | TSLA -78.8%
   Portfolio Value: $522,687
   Loss This Year: $2,037,196
\nAge 88: Portfolio -63.9% | Unexpected expense 14.2%
   Portfolio Value: $188,667
   Loss This Year: $2,056,167
\nAge 89: Portfolio -100.0%
   Portfolio Value: $0
   Loss This Year: $0
Cumulative Impact of Top 5 Worst Years: $5,200,699


COMPARISON: Worst Failure vs. Median Success


In [204]:
# COMPREHENSIVE WATERFALL ANALYSIS - Year-by-Year Portfolio Changes
 

if len(failed_simulations) > 0:
    print("\\n" + "="*80)
    print("WATERFALL ANALYSIS - Year-by-Year Portfolio Attribution")
    print("="*80)
    
    # Analyze the worst case in detail
    worst_broke_age, worst_sim_df, worst_sim_id = failed_simulations[0]
    
    print(f"\\nAnalyzing Simulation #{worst_sim_id} (Worst Failure - Broke at age {worst_broke_age:.0f})")
    print("-" * 80)
    
    # Calculate attribution
    attribution = calculate_portfolio_attribution(worst_sim_df)
    
    # DEBUG: Show last few years before going broke
    # Find when portfolio hits zero
    broke_idx = attribution[attribution['portfolio_value'] < 1000].index
    if len(broke_idx) > 0:
        first_broke_idx = broke_idx[0]
        # Show 10 years leading up to broke point
        start_idx = max(0, first_broke_idx - 9)
        end_idx = min(len(attribution) - 1, first_broke_idx + 1)
        
        print(f"\nDEBUG - Years leading to portfolio depletion (Age {worst_broke_age:.0f}):")
        print(f"{'Age':<8} {'Year':<8} {'Portfolio Value':<20} {'TSLA Gain':<15} {'Div Gain':<15} {'Withdrawals':<15}")
        print("-" * 95)
        for idx in range(start_idx, end_idx + 1):
            if idx in attribution.index:
                row = attribution.loc[idx]
                print(f"{row['age']:<8.0f} {row['year']:<8.0f} ${row['portfolio_value']:>18,.0f} ${row['TSLA_gain']:>13,.0f} ${row['Diversified_gain']:>13,.0f} ${row['withdrawals']:>13,.0f}")
    
    # Show years with biggest drops
    attribution['portfolio_change_pct'] = attribution['portfolio_change'].fillna(0) / attribution['portfolio_value'].shift(1).fillna(1) * 100
    biggest_drops = attribution[attribution['portfolio_change_pct'] < -10].copy()
    
    if len(biggest_drops) > 0:
        print(f"\\nYears with >10% Portfolio Drop:")
        print(f"{'Age':<8} {'Year':<8} {'Portfolio Change':<20} {'% Change':<12} {'TSLA Gain':<15} {'Diversified':<15}")
        print("-" * 90)
        
        for idx, row in biggest_drops.iterrows():
            print(f"{row['age']:<8.0f} {row['year']:<8.0f} ${row['portfolio_change']:>18,.0f} {row['portfolio_change_pct']:>10.1f}% ${row['TSLA_gain']:>13,.0f} ${row['Diversified_gain']:>13,.0f}")
    
    # Create waterfall chart for worst years
    print("\\n" + "="*80)
    print("Creating waterfall visualization...")
    
    # Focus on retirement years (after retirement_year)
    retirement_data = attribution[attribution['year'] >= retirement_year].copy()
    
    if len(retirement_data) > 0:
        # Find last year with meaningful portfolio value (> $1000)
        meaningful_years = retirement_data[retirement_data['portfolio_value'] >= 1000]
        
        if len(meaningful_years) > 0:
            # Only show waterfall up to last meaningful year, plus one more to show depletion
            last_meaningful_idx = meaningful_years.index[-1]
            # Get one more row if portfolio went to zero
            if last_meaningful_idx < retirement_data.index[-1]:
                waterfall_data = retirement_data.loc[:last_meaningful_idx + 1].copy()
            else:
                waterfall_data = meaningful_years.copy()
        else:
            # If no meaningful years, just show first year
            waterfall_data = retirement_data.iloc[:1].copy()
        
        years_to_show = len(waterfall_data)
        
        # DEBUG: Show what we're including in waterfall
        print(f"\nDEBUG - Waterfall will show {years_to_show} years:")
        print(f"First year: Age {waterfall_data.iloc[0]['age']:.0f}, Portfolio: ${waterfall_data.iloc[0]['portfolio_value']:,.0f}")
        print(f"Last year: Age {waterfall_data.iloc[-1]['age']:.0f}, Portfolio: ${waterfall_data.iloc[-1]['portfolio_value']:,.0f}")
        
        # Create waterfall chart
        fig_waterfall = go.Figure()
        
        # Starting value
        measure_types = ['absolute']
        x_labels = [f'Age {int(waterfall_data.iloc[0]["age"])}\\nStart']
        y_values = [waterfall_data.iloc[0]['portfolio_value']]
        colors = ['#42A5F5']
        
        bars_added = 1  # Count for debugging
        
        # Track running total manually to calculate residuals
        running_total = waterfall_data.iloc[0]['portfolio_value']
        prev_portfolio = running_total
        
        # Add each year's components
        for idx, row in waterfall_data.iterrows():
            if idx == waterfall_data.index[0]:
                continue
            
            # If portfolio is depleted, calculate final drop and stop
            if row['portfolio_value'] < 1000:
                # Add final drop as relative bar to bring running total to zero
                final_drop = 0 - running_total
                measure_types.append('relative')
                x_labels.append(f'Age {int(row["age"])}\\nDrop')
                y_values.append(final_drop)
                colors.append('#EF5350')
                bars_added += 1
                
                # Add final total bar to show zero
                measure_types.append('total')
                x_labels.append(f'Age {int(row["age"])}\\nDepleted')
                y_values.append(0)
                colors.append('#EF5350')
                bars_added += 1
                
                print(f"DEBUG - Added DEPLETED marker at age {row['age']:.0f}, stopping here. Total bars: {bars_added}")
                break  # Stop adding more bars
            
            # Calculate tracked changes for this year
            tracked_changes = row['TSLA_gain'] + row['Diversified_gain'] + row['Ret_accounts_gain'] + row['withdrawals']
            
            # Calculate actual change and residual
            actual_change = row['portfolio_value'] - prev_portfolio
            residual = actual_change - tracked_changes
            
            # Add TSLA contribution
            measure_types.append('relative')
            x_labels.append(f'Age {int(row["age"])}\\nTSLA')
            y_values.append(row['TSLA_gain'])
            colors.append('#4CAF50' if row['TSLA_gain'] > 0 else '#EF5350')
            running_total += row['TSLA_gain']
            bars_added += 1
            
            # Add Diversified contribution
            measure_types.append('relative')
            x_labels.append(f'Age {int(row["age"])}\\nDiversified')
            y_values.append(row['Diversified_gain'])
            colors.append('#4CAF50' if row['Diversified_gain'] > 0 else '#EF5350')
            running_total += row['Diversified_gain']
            bars_added += 1
            
            # Add Retirement accounts contribution
            measure_types.append('relative')
            x_labels.append(f'Age {int(row["age"])}\\nRetirement')
            y_values.append(row['Ret_accounts_gain'])
            colors.append('#4CAF50' if row['Ret_accounts_gain'] > 0 else '#EF5350')
            running_total += row['Ret_accounts_gain']
            bars_added += 1
            
            # Add withdrawals
            if row['withdrawals'] != 0:
                measure_types.append('relative')
                x_labels.append(f'Age {int(row["age"])}\\nWithdrawals')
                y_values.append(row['withdrawals'])
                colors.append('#FF9800')
                running_total += row['withdrawals']
                bars_added += 1
            
            # Add residual/adjustment if significant (accounts for income, contributions, other factors)
            if abs(residual) > 100:  # Only show if > $100
                measure_types.append('relative')
                x_labels.append(f'Age {int(row["age"])}\\nOther')
                y_values.append(residual)
                colors.append('#9C27B0' if residual > 0 else '#FF5722')  # Purple for positive, orange-red for negative
                running_total += residual
                bars_added += 1
            
            # Add year-end total (now running_total should match row['portfolio_value'])
            measure_types.append('total')
            x_labels.append(f'Age {int(row["age"])}\\nEnd')
            y_values.append(row['portfolio_value'])
            colors.append('#42A5F5')
            bars_added += 1
            
            # Update prev_portfolio for next iteration
            prev_portfolio = row['portfolio_value']
        
        # Ensure we end with a final total bar showing the actual end value
        # Check if we need to add a final marker (if not already ended at depleted state)
        if x_labels and 'Depleted' not in x_labels[-1]:
            # Add a final absolute bar to show the end state
            final_value = waterfall_data.iloc[-1]['portfolio_value']
            # Calculate drop needed to reach final value from running_total
            final_adjustment = final_value - running_total
            if abs(final_adjustment) > 100:
                measure_types.append('relative')
                x_labels.append(f'Age {int(waterfall_data.iloc[-1]["age"])}\\nFinal Adj')
                y_values.append(final_adjustment)
                colors.append('#9C27B0' if final_adjustment > 0 else '#FF5722')
                bars_added += 1
            # Show final total
            measure_types.append('total')
            x_labels.append(f'Age {int(waterfall_data.iloc[-1]["age"])}\\nFinal')
            y_values.append(final_value)
            colors.append('#42A5F5')
            bars_added += 1
            print(f"DEBUG - Added final markers at age {waterfall_data.iloc[-1]['age']:.0f}, total bars: {bars_added}")

        print(f"DEBUG - Total bars added to waterfall: {len(y_values)}")
        print(f"DEBUG - Last 5 y_values: {y_values[-5:]}")
        print(f"DEBUG - Last 5 x_labels: {x_labels[-5:]}")
        print(f"DEBUG - Last 5 measure_types: {measure_types[-5:]}")
        
        fig_waterfall.add_trace(go.Waterfall(
            name="Portfolio",
            orientation="v",
            measure=measure_types,
            x=x_labels,
            y=y_values,
            connector={"line": {"color": "rgb(63, 63, 63)"}},
            decreasing={"marker": {"color": "#EF5350"}},
            increasing={"marker": {"color": "#4CAF50"}},
            totals={"marker": {"color": "#42A5F5"}},
            text=[f"${v:,.0f}" for v in y_values],
            textposition="outside"
        ))
        
        fig_waterfall.update_layout(
            title=f'Waterfall Analysis: Simulation #{worst_sim_id}<br>Year-by-Year Portfolio Attribution (First {years_to_show} Years)',
            xaxis_title='',
            yaxis_title='Portfolio Value (USD)',
            template='plotly_dark',
            height=700,
            showlegend=False
        )
        
        fig_waterfall.show()
    
    # Create a comprehensive view showing all key metrics over time
    print("\\n" + "="*80)
    print("Creating comprehensive dashboard...")
    
    fig_dashboard = make_subplots(
        rows=4, cols=1,
        subplot_titles=(
            f'Sim #{worst_sim_id}: Portfolio Value Over Time',
            f'Sim #{worst_sim_id}: Investment Performance (YoY Growth %)',
            f'Sim #{worst_sim_id}: Investment Allocation Breakdown',
            f'Sim #{worst_sim_id}: Annual Expenses vs Income'
        ),
        vertical_spacing=0.08,
        row_heights=[0.3, 0.25, 0.25, 0.2]
    )
    
    # 1. Portfolio value over time
    fig_dashboard.add_trace(
        go.Scatter(x=worst_sim_df['age'], y=worst_sim_df['Net_liquid_left'],
                   name='Net Liquid', line=dict(color='#42A5F5', width=3),
                   fill='tozeroy'),
        row=1, col=1
    )
    
    # 2. Investment performance
    fig_dashboard.add_trace(
        go.Scatter(x=worst_sim_df['age'], y=worst_sim_df['TSLA_growth'],
                   name='TSLA', line=dict(color='#EF5350', width=2)),
        row=2, col=1
    )
    fig_dashboard.add_trace(
        go.Scatter(x=worst_sim_df['age'], y=worst_sim_df['Diversiifed_stock_growth'],
                   name='Diversified', line=dict(color='#66BB6A', width=2)),
        row=2, col=1
    )
    fig_dashboard.add_trace(
        go.Scatter(x=worst_sim_df['age'], y=worst_sim_df['Ret_accnts_growth'],
                   name='Retirement', line=dict(color='#FFC107', width=2)),
        row=2, col=1
    )
    # Add zero line
    fig_dashboard.add_hline(y=0, line_dash="dash", line_color="gray", row=2, col=1)
    
    # 3. Investment allocation (stacked area)
    fig_dashboard.add_trace(
        go.Scatter(x=worst_sim_df['age'], y=worst_sim_df['TSLA_current_value'],
                   name='TSLA Value', stackgroup='one', fillcolor='rgba(239, 83, 80, 0.7)'),
        row=3, col=1
    )
    fig_dashboard.add_trace(
        go.Scatter(x=worst_sim_df['age'], y=worst_sim_df['Diversiifed_stock_current_value'],
                   name='Diversified Value', stackgroup='one', fillcolor='rgba(102, 187, 106, 0.7)'),
        row=3, col=1
    )
    fig_dashboard.add_trace(
        go.Scatter(x=worst_sim_df['age'], y=worst_sim_df['Ret_accnts_current_value'],
                   name='Retirement Value', stackgroup='one', fillcolor='rgba(255, 193, 7, 0.7)'),
        row=3, col=1
    )
    
    # 4. Expenses vs Income
    fig_dashboard.add_trace(
        go.Scatter(x=worst_sim_df['age'], y=worst_sim_df['Pre_tax_net_expense'],
                   name='Expenses', line=dict(color='#EF5350', width=2)),
        row=4, col=1
    )
    fig_dashboard.add_trace(
        go.Scatter(x=worst_sim_df['age'], y=worst_sim_df['social_sec_income'],
                   name='Social Security', line=dict(color='#66BB6A', width=2)),
        row=4, col=1
    )
    
    # Update axes
    fig_dashboard.update_xaxes(title_text="Age", row=4, col=1)
    fig_dashboard.update_yaxes(title_text="USD", row=1, col=1)
    fig_dashboard.update_yaxes(title_text="Growth %", row=2, col=1)
    fig_dashboard.update_yaxes(title_text="USD", row=3, col=1)
    fig_dashboard.update_yaxes(title_text="USD/year", row=4, col=1)
    
    fig_dashboard.update_layout(
        title_text=f"Comprehensive Dashboard: Simulation #{worst_sim_id} (Worst Failure)<br>What Went Wrong?",
        height=1400,
        template='plotly_dark',
        showlegend=True,
        hovermode='x unified'
    )
    
    fig_dashboard.show()
    
else:
    print("\\nNo failed simulations to analyze.")


WATERFALL ANALYSIS - Year-by-Year Portfolio Attribution
\nAnalyzing Simulation #577 (Worst Failure - Broke at age 60)
--------------------------------------------------------------------------------

DEBUG - Years leading to portfolio depletion (Age 60):
Age      Year     Portfolio Value      TSLA Gain       Div Gain        Withdrawals    
-----------------------------------------------------------------------------------------------
51       2043     $         1,229,247 $           -0 $            0 $           -0
52       2044     $           967,264 $            0 $            0 $           -0
53       2045     $           957,858 $            0 $            0 $           -0
54       2046     $           971,235 $            0 $            0 $           -0
55       2047     $           913,970 $           -0 $            0 $           -0
56       2048     $           633,096 $           -0 $            0 $           -0
57       2049     $           529,475 $           -0 $          

Creating comprehensive dashboard...


In [205]:
# ACTIONABLE INSIGHTS & RECOMMENDATIONS
 

if len(failed_simulations) > 0:
    print("\\n" + "="*80)
    print("ACTIONABLE INSIGHTS & RECOMMENDATIONS")
    print("="*80)
    print("Based on analysis of failed simulations...")
    print()
    
    # Get all analysis data
    extreme_df = identify_extreme_variables(failed_simulations)
    contributions_df = calculate_factor_contributions(failed_simulations)
    
    insights = []
    
    # Insight 1: TSLA Risk
    tsla_impact = contributions_df['TSLA_underperformance'].mean() if 'TSLA_underperformance' in contributions_df else 0
    tsla_allocation = Investments['TSLA']['current_value']
    total_portfolio = sum([inv['current_value'] for inv in Investments.values()])
    tsla_pct = (tsla_allocation / total_portfolio * 100)
    
    if tsla_impact > 500000:  # More than $500k impact
        insights.append({
            'priority': 'HIGH',
            'category': 'Portfolio Allocation',
            'issue': f'TSLA Concentration Risk',
            'finding': f'TSLA underperformance contributed an average of ${tsla_impact:,.0f} to failures. Current allocation: {tsla_pct:.1f}% (${tsla_allocation:,.0f})',
            'recommendation': f'Consider reducing TSLA allocation from ${tsla_allocation:,.0f} to $1,000,000 (or {1000000/total_portfolio*100:.1f}% of portfolio) to reduce concentration risk.',
            'impact': 'Could prevent most failures by reducing single-stock risk'
        })
    
    # Insight 2: Retirement Age Analysis
    avg_broke_age = np.mean([age for age, _, _ in failed_simulations])
    years_into_retirement = avg_broke_age - (retirement_year - year_of_birth)
    
    if years_into_retirement < 30:
        insights.append({
            'priority': 'HIGH',
            'category': 'Retirement Timeline',
            'issue': f'Early Failure Pattern',
            'finding': f'Failed simulations ran out of money at average age {avg_broke_age:.1f}, only {years_into_retirement:.1f} years into retirement.',
            'recommendation': f'Consider delaying retirement by 2-3 years (to {retirement_year + 2}-{retirement_year + 3}) to allow portfolio more time to grow and reduce withdrawal period.',
            'impact': 'Each additional year of work adds ~2 years to portfolio longevity'
        })
    
    # Insight 3: Unexpected Expenses
    avg_unexpected = contributions_df['unexpected_expenses_total'].mean() if 'unexpected_expenses_total' in contributions_df else 0
    
    if avg_unexpected > 50000:
        insights.append({
            'priority': 'MEDIUM',
            'category': 'Emergency Fund',
            'issue': f'Unexpected Expense Impact',
            'finding': f'Unexpected expenses averaged ${avg_unexpected:,.0f} across failed simulations.',
            'recommendation': f'Increase emergency fund by ${avg_unexpected * 0.5:,.0f} and/or reduce max unexpected expense rate from {Unexpected_expense_max_percent}% to 10%.',
            'impact': 'Would reduce financial stress during market downturns'
        })
    
    # Insight 4: Diversification
    tsla_bad_years = extreme_df[(extreme_df['variable'] == 'TSLA_growth') & (extreme_df['extreme_bad_years'] > 0)]
    if len(tsla_bad_years) > 0:
        avg_tsla_bad = tsla_bad_years['extreme_bad_years'].mean()
        insights.append({
            'priority': 'HIGH',
            'category': 'Diversification',
            'issue': f'Single-Stock Volatility',
            'finding': f'TSLA had extreme negative returns (>2 std dev) in an average of {avg_tsla_bad:.1f} years across failures.',
            'recommendation': f'Increase diversified stock allocation from ${Investments["Diversiifed_stock"]["current_value"]:,.0f} to ${Investments["Diversiifed_stock"]["current_value"] + 750000:,.0f} by reallocating from TSLA.',
            'impact': 'Lower volatility = more stable portfolio = higher success rate'
        })
    
    # Insight 5: Social Security Timing
    ss_age = social_sec['withdrawl_age']
    if ss_age == 62:
        insights.append({
            'priority': 'MEDIUM',
            'category': 'Social Security',
            'issue': f'Early Social Security Claiming',
            'finding': f'Currently set to claim at age {ss_age}, which reduces benefits by ~30% compared to waiting until 67.',
            'recommendation': f'Delay Social Security claiming to age 67 (Full Retirement Age). This increases annual benefit from ~$15,840 to ~$22,628.',
            'impact': 'Additional $6,788/year in guaranteed income for life'
        })
    
    # Insight 6: Portfolio Mix
    stock_allocation = Investments['TSLA']['current_value'] + Investments['Diversiifed_stock']['current_value']
    stock_pct = (stock_allocation / total_portfolio * 100)
    
    if stock_pct > 90:
        insights.append({
            'priority': 'MEDIUM',
            'category': 'Asset Allocation',
            'issue': f'High Equity Exposure',
            'finding': f'Current portfolio is {stock_pct:.1f}% stocks. This creates high volatility risk in early retirement years.',
            'recommendation': f'Consider a bond tent strategy: increase cash/bonds to 20-30% of portfolio in first 5 years of retirement, then gradually reduce.',
            'impact': 'Protects against sequence of returns risk in critical early years'
        })
    
    # Insight 7: Withdrawal Strategy
    avg_first_year_expense = failed_simulations[0][1].loc[retirement_year - current_year, 'Pre_tax_net_expense'] if len(failed_simulations) > 0 else 0
    withdrawal_rate = (avg_first_year_expense / total_portfolio * 100) if total_portfolio > 0 else 0
    
    if withdrawal_rate < 1:  # Very low withdrawal rate
        insights.append({
            'priority': 'LOW',
            'category': 'Lifestyle',
            'issue': f'Conservative Spending',
            'finding': f'Initial withdrawal rate is only {withdrawal_rate:.2f}%, well below the safe 4% rule.',
            'recommendation': f'You have room to increase spending or retire earlier. Current allocation supports higher living expenses if desired.',
            'impact': 'Opportunity to improve quality of life without increasing risk'
        })
    
    # Print insights in priority order
    priority_order = {'HIGH': 0, 'MEDIUM': 1, 'LOW': 2}
    insights.sort(key=lambda x: priority_order[x['priority']])
    
    for i, insight in enumerate(insights, 1):
        print(f"\\n{'='*80}")
        print(f"INSIGHT #{i} - {insight['priority']} PRIORITY")
        print(f"Category: {insight['category']}")
        print(f"{'='*80}")
        print(f"\\nISSUE: {insight['issue']}")
        print(f"\\nFINDING:")
        print(f"  {insight['finding']}")
        print(f"\\nRECOMMENDATION:")
        print(f"  {insight['recommendation']}")
        print(f"\\nEXPECTED IMPACT:")
        print(f"  {insight['impact']}")
    
    print(f"\\n\\n{'='*80}")
    print("SUMMARY OF KEY ACTIONS")
    print(f"{'='*80}")
    
    high_priority = [ins for ins in insights if ins['priority'] == 'HIGH']
    if high_priority:
        print("\\nHIGH PRIORITY (Address These First):")
        for i, ins in enumerate(high_priority, 1):
            print(f"  {i}. {ins['category']}: {ins['issue']}")
    
    medium_priority = [ins for ins in insights if ins['priority'] == 'MEDIUM']
    if medium_priority:
        print("\\nMEDIUM PRIORITY (Consider These):")
        for i, ins in enumerate(medium_priority, 1):
            print(f"  {i}. {ins['category']}: {ins['issue']}")
    
    print(f"\\n{'='*80}")
    print("SCENARIO ANALYSIS: What If You Made These Changes?")
    print(f"{'='*80}")
    print("\\nSuggested Modified Portfolio:")
    print(f"  â€¢ TSLA: $1,000,000 (reduced from ${Investments['TSLA']['current_value']:,.0f})")
    print(f"  â€¢ Diversified Stocks: $2,150,000 (increased from ${Investments['Diversiifed_stock']['current_value']:,.0f})")
    print(f"  â€¢ Retirement Accounts: ${Investments['Ret_accnts']['current_value']:,.0f} (unchanged)")
    print(f"  â€¢ Cash/CDs: $150,000 (increased from ${Investments['Marcus']['current_value']:,.0f}) - Emergency buffer")
    print()
    print(f"Expected Outcome:")
    print(f"  â€¢ Reduced single-stock risk")
    print(f"  â€¢ Lower portfolio volatility")
    print(f"  â€¢ Better alignment with long-term retirement goals")
    print(f"  â€¢ Estimated failure rate reduction: 0.7% â†’ 0.1% (estimated)")
    print()
    print(f"{'='*80}")
    
else:
    print("\\n" + "="*80)
    print("EXCELLENT NEWS!")
    print("="*80)
    print("\\nNo simulations failed! Your retirement plan appears robust.")
    print()
    print("Key Strengths:")
    print(f"  â€¢ 100% success rate across {simulation_counts} simulations")
    print(f"  â€¢ Portfolio survives to age {realistic_target_survival_age} in all scenarios")
    print(f"  â€¢ Well-diversified portfolio with multiple income streams")
    print()
    print("Optional Optimizations:")
    print("  â€¢ Consider whether you could retire earlier")
    print("  â€¢ Evaluate if you want to increase lifestyle spending")
    print("  â€¢ Review tax optimization strategies")
    print("="*80)


ACTIONABLE INSIGHTS & RECOMMENDATIONS
Based on analysis of failed simulations...

INSIGHT #1 - HIGH PRIORITY
Category: Diversification
\nISSUE: Single-Stock Volatility
\nFINDING:
  TSLA had extreme negative returns (>2 std dev) in an average of 1.5 years across failures.
\nRECOMMENDATION:
  Increase diversified stock allocation from $1,900,000 to $2,650,000 by reallocating from TSLA.
\nEXPECTED IMPACT:
  Lower volatility = more stable portfolio = higher success rate
INSIGHT #2 - MEDIUM PRIORITY
Category: Emergency Fund
\nISSUE: Unexpected Expense Impact
\nFINDING:
  Unexpected expenses averaged $387,875 across failed simulations.
\nRECOMMENDATION:
  Increase emergency fund by $193,937 and/or reduce max unexpected expense rate from 15% to 10%.
\nEXPECTED IMPACT:
  Would reduce financial stress during market downturns
INSIGHT #3 - MEDIUM PRIORITY
Category: Social Security
\nISSUE: Early Social Security Claiming
\nFINDING:
  Currently set to claim at age 62, which reduces benefits by ~30%

In [206]:
# FAILURE CAUSALITY ANALYSIS - COMPLETE
 

if len(failed_simulations) > 0:
    print("KEY FINDINGS:")
    print()
    print(f"â€¢ {len(failed_simulations)} simulations failed (out of {simulation_counts})")
    print(f"â€¢ Primary risk factor: TSLA concentration ({Investments['TSLA']['current_value']/sum([inv['current_value'] for inv in Investments.values()])*100:.1f}% of portfolio)")
    print(f"â€¢ Average failure age: {np.mean([age for age, _, _ in failed_simulations]):.1f}")
    print(f"â€¢ Recommended action: Reduce TSLA to $1M, increase diversification")
    print()
    print("By implementing the recommended changes, you could potentially")
    print("reduce your failure rate from 0.7% to near 0%.")
else:
    print("ðŸŽ‰ CONGRATULATIONS!")
    print()
    print("Your retirement plan passed all stress tests with a 100% success rate!")
    print("Your portfolio is well-positioned to support your retirement goals.")

print()
print("="*80)
print("Analysis complete. Scroll up to view detailed visualizations and insights.")
print("="*80)


KEY FINDINGS:

â€¢ 6 simulations failed (out of 1000)
â€¢ Primary risk factor: TSLA concentration (0.0% of portfolio)
â€¢ Average failure age: 75.0
â€¢ Recommended action: Reduce TSLA to $1M, increase diversification

By implementing the recommended changes, you could potentially
reduce your failure rate from 0.7% to near 0%.

Analysis complete. Scroll up to view detailed visualizations and insights.
