In [0]:
%pip install --upgrade --force-reinstall numpy==1.26.0
dbutils.library.restartPython()

In [0]:
%pip install --upgrade --force-reinstall cvxpy==1.5.2
dbutils.library.restartPython()

In [0]:
!pip install pandas yfinance matplotlib

In [0]:
import sys
display({"python_version": sys.version})

import numpy
display({"numpy_version": numpy.__version__})

import cvxpy
display({"cvxpy_version": cvxpy.__version__})

# task A

In [0]:
# Import required libraries
import numpy as np
import pandas as pd
import yfinance as yf
import cvxpy as cp
from datetime import datetime
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

# 1. Data Collection Function
def fetch_stock_data(tickers, start_date, end_date):
    """Fetch historical stock data and market caps via Yahoo Finance API"""
    print(f"Downloading price data for {len(tickers)} stocks...")
    # Get price data
    price_data = yf.download(tickers, start=start_date, end=end_date)['Close']
    
    # Get latest market caps (for initial weights)
    print("Fetching market cap data...")
    market_caps = {}
    for ticker in tickers:
        try:
            stock = yf.Ticker(ticker)
            # Use most recent quarter's market cap
            market_cap = stock.info.get('marketCap', np.nan)
            market_caps[ticker] = market_cap
        except Exception as e:
            print(f"Couldn't get market cap for {ticker}: {str(e)}")
            market_caps[ticker] = np.nan
    
    return price_data, pd.Series(market_caps)

# 2. CVaR Calculation
def calculate_cvar(returns, alpha=0.95):
    """Calculate Conditional Value-at-Risk (CVaR)"""
    var = np.percentile(returns, 100*(1-alpha))
    return returns[returns <= var].mean()

# 3. Portfolio Optimization
def optimize_cvar_portfolio(returns, alpha=0.95, max_weight=0.050, min_weight=0.001):
    """
    Robust optimization with solver fallback
    Returns None if optimization fails
    """
    n = returns.shape[1]
    returns = returns.dropna()
    
    if len(returns) < 60:
        print("Insufficient data for optimization")
        return None
    
    weights = cp.Variable(n)
    tau = cp.Variable()
    portfolio_returns = returns.values @ weights
    loss = -portfolio_returns
    cvar = tau + (1/(1-alpha)) * cp.mean(cp.pos(loss - tau))
    
    constraints = [
        cp.sum(weights) <= 1 + 1e-2,
        cp.sum(weights) >= 1 - 1e-2,
        weights >= min_weight,
        weights <= max_weight
    ]
    
    prob = cp.Problem(cp.Minimize(cvar), constraints)
    
    # Try Clarabel first
    try:
        prob.solve(solver=cp.CLARABEL)
        if prob.status in [cp.OPTIMAL, cp.OPTIMAL_INACCURATE]:
            optimized_weights = np.maximum(weights.value, 0)
            return optimized_weights / optimized_weights.sum()
    except Exception as e:
        print(f"Clarabel failed: {str(e)}")
    
    # Fallback to ECOS
    try:
        prob.solve(solver=cp.ECOS)
        if prob.status in [cp.OPTIMAL, cp.OPTIMAL_INACCURATE]:
            optimized_weights = np.maximum(weights.value, 0)
            return optimized_weights / optimized_weights.sum()
    except Exception as e:
        print(f"ECOS failed: {str(e)}")
    
    return None  # Explicit failure signal

# 4. Backtesting Engine
def run_detailed_backtest(price_data, market_caps, rebalance_freq='Q', transaction_cost=0.001):
    """Run backtest with cap-weighted, equal-weighted, and CVaR strategies"""
    # Data preparation
    price_data = price_data.ffill().bfill()
    returns = price_data.pct_change().dropna()
    dates = returns.index
    n_stocks = len(price_data.columns)
    tickers = price_data.columns.tolist()
    
    # Get proper quarter-end rebalance dates
    rebalance_dates = pd.date_range(dates[0], dates[-1], freq=f'{rebalance_freq}-DEC')
    
    # Initialize weights
    # Cap-weighted (normalize market caps, fill missing with equal weight)
    cw_weights = (market_caps / market_caps.sum()).fillna(1/n_stocks).values
    cw_weights = cw_weights / cw_weights.sum()  # Ensure exact sum to 1
    
    # Equal-weighted
    ew_weights = np.ones(n_stocks) / n_stocks
    
    # CVaR starts equal-weighted
    cvar_weights = ew_weights.copy()

    portfolio_values = {
        'CW': [1.0],   # Cap-weighted
        'EW': [1.0],    # Equal-weighted
        'CVaR': [1.0]   # Optimized
    }
    
    weight_history = []
    rebalance_flags = []

    # Main backtest loop
    for i in range(1, len(dates)):
        date = dates[i]
        current_prices = price_data.iloc[i]
        prev_prices = price_data.iloc[i-1]
        
        # Calculate daily returns
        price_ratios = current_prices / prev_prices
        cw_return = np.dot(cw_weights, price_ratios - 1)
        ew_return = np.dot(ew_weights, price_ratios - 1)
        cvar_return = np.dot(cvar_weights, price_ratios - 1)
        
        print(f"date: {date.strftime('%Y-%m-%d')} cw_return: {cw_return:.4f} ew_return: {ew_return:.4f} cvar_return: {cvar_return:.4f}")

        # Check for rebalance
        rebalance_flag = 0
        if date in rebalance_dates:
            lookback_returns = returns.loc[:date].iloc[-252:]  # 1 year lookback
            
            if len(lookback_returns) > 60:  # Sufficient data
                new_weights = optimize_cvar_portfolio(lookback_returns)
                
                if new_weights is not None:
                    rebalance_flag = 1
                    # Apply transaction costs
                    turnover = np.sum(np.abs(new_weights - cvar_weights))
                    portfolio_values['CVaR'][-1] *= (1 - transaction_cost * turnover)
                    cvar_weights = new_weights
                    print(f"Successful rebalance on {date.strftime('%Y-%m-%d')}")
                else:
                    print(f"Optimization failed on {date.strftime('%Y-%m-%d')} - maintaining current weights")
        
        # Update portfolio values
        portfolio_values['CW'].append(portfolio_values['CW'][-1] * (1 + cw_return))
        portfolio_values['EW'].append(portfolio_values['EW'][-1] * (1 + ew_return))
        portfolio_values['CVaR'].append(portfolio_values['CVaR'][-1] * (1 + cvar_return))
        
        # Record weights and flags
        weight_record = {'Date': date}
        for j, ticker in enumerate(tickers):
            weight_record[f"{ticker}_CW"] = cw_weights[j]
            weight_record[f"{ticker}_CVaR"] = cvar_weights[j]
        weight_history.append(weight_record)
        rebalance_flags.append(rebalance_flag)
        
        # Update drifting weights
        cw_weights = (cw_weights * price_ratios)
        cw_weights /= cw_weights.sum()
        
        ew_weights = (ew_weights * price_ratios)
        ew_weights /= ew_weights.sum()

    # Create results DataFrames
    results_df = pd.DataFrame({
        'Date': dates[1:],
        'CW_Value': portfolio_values['CW'][1:],
        'EW_Value': portfolio_values['EW'][1:],
        'CVaR_Value': portfolio_values['CVaR'][1:],
        'Rebalance_Flag': rebalance_flags
    })
    
    weights_df = pd.DataFrame(weight_history)
    
    return results_df, weights_df

# 5. Performance Metrics
def calculate_performance_metrics(returns, risk_free_rate=0.0):
    """Calculate comprehensive performance metrics"""
    metrics = {}
    
    # Annualized return
    metrics['Annual Return'] = (1 + returns.mean())**252 - 1
    
    # Annualized volatility
    metrics['Annual Volatility'] = returns.std() * np.sqrt(252)
    
    # Sharpe ratio
    metrics['Sharpe Ratio'] = (metrics['Annual Return'] - risk_free_rate) / metrics['Annual Volatility']
    
    # Maximum drawdown
    cumulative = (1 + returns).cumprod()
    peak = cumulative.expanding().max()
    drawdown = (cumulative - peak) / peak
    metrics['Max Drawdown'] = drawdown.min()
    
    # CVaR
    metrics['95% CVaR'] = calculate_cvar(returns)
    
    # Sortino ratio (using downside deviation)
    downside_returns = returns[returns < 0]
    downside_dev = downside_returns.std() * np.sqrt(252)
    metrics['Sortino Ratio'] = (metrics['Annual Return'] - risk_free_rate) / downside_dev
    
    return metrics

# Main execution
if __name__ == "__main__":
    # Configuration
    start_date = '2010-01-01'
    end_date = '2024-12-31'
    tickers = [
        'AAPL', 'ABBV', 'ABT', 'ACN', 'ADBE', 'AMD', 'AMZN', 'AVGO', 'AXP', 'BAC', 
        'BLK', 'BMY', 'BRK-B', 'C', 'CMCSA', 'COST', 'CRM', 'CSCO', 'CVS', 'CVX', 
        'DE', 'DHR', 'DIS', 'FDX', 'GILD', 'GOOG', 'GS', 'HON', 'IBM', 'INTC', 
        'ISRG', 'JNJ', 'JPM', 'KO', 'LMT', 'MA', 'MCD', 'META', 'MRK', 'MS', 
        'MSFT', 'NFLX', 'NOW', 'NVDA', 'ORCL', 'PEP', 'PFE', 'PG', 'PLTR', 'PM', 
        'PYPL', 'QCOM', 'SCHW', 'T', 'TMUS', 'TSLA', 'TXN', 'V', 'VZ', 'WMT'
    ]
    
    # 1. Fetch data
    price_data, market_caps = fetch_stock_data(tickers, start_date, end_date)
    
    # 2. Run backtest
    results_df, weights_df = run_detailed_backtest(price_data, market_caps)
    
    # 3. Verify initial period
    first_rebalance = results_df[results_df['Rebalance_Flag'] == 1].index[0]
    initial_period = results_df.iloc[:first_rebalance]
    print(f"Portfolios identical before first rebalance? {np.allclose(initial_period['EW_Value'], initial_period['CVaR_Value'], rtol=1e-02, atol=1e-04)}")
    
    # 4. Export results
    combined_df = results_df.merge(weights_df, on='Date', how='left')
    combined_df['Date'] = combined_df['Date'].dt.strftime('%Y-%m-%d')
    combined_df.to_csv('/Volumes/workspace/mixture/etoro_pi/taskA_portfolio_weights_history_and_result.csv', index=False)
    
    # 5. Plot results
    plt.figure(figsize=(14, 7))
    
    # Plot portfolio values
    plt.plot(results_df['Date'], results_df['CW_Value'], label='Cap Weighted', linewidth=2, color='green')
    plt.plot(results_df['Date'], results_df['EW_Value'], label='Equal Weighted', linewidth=2, color='blue')
    plt.plot(results_df['Date'], results_df['CVaR_Value'], label='CVaR Optimized', linewidth=2, color='orange')
    
    # Mark rebalance dates
    rebalance_dates = results_df[results_df['Rebalance_Flag'] == 1]['Date']
    for date in rebalance_dates:
        plt.axvline(date, color='gray', linestyle='--', alpha=0.3)
    
    # Highlight first rebalance
    plt.axvline(results_df.loc[first_rebalance, 'Date'], color='red', linestyle=':', 
               label='First Rebalance')
    
    plt.title('Portfolio Performance Comparison', fontsize=16)
    plt.xlabel('Date', fontsize=12)
    plt.ylabel('Portfolio Value ($1 Initial)', fontsize=12)
    plt.legend(fontsize=12)
    plt.grid(True, linestyle='--', alpha=0.7)
    
    # Format x-axis
    plt.gca().xaxis.set_major_locator(mdates.YearLocator())
    plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%Y'))
    plt.gcf().autofmt_xdate()
    
    plt.tight_layout()
    plt.savefig('/Volumes/workspace/mixture/etoro_pi/taskA_portfolio_comparison.png', dpi=300)
    plt.show()
    
    # 6. Calculate and display metrics
    print("\nPerformance Metrics:")
    print("="*50)
    
    # Calculate returns from portfolio values
    cw_returns = results_df['CW_Value'].pct_change().dropna()
    ew_returns = results_df['EW_Value'].pct_change().dropna()
    cvar_returns = results_df['CVaR_Value'].pct_change().dropna()
    
    print("\nCap Weighted Strategy:")
    cw_metrics = calculate_performance_metrics(cw_returns)
    for k, v in cw_metrics.items():
        print(f"{k:>20}: {v:.4f}")
    
    print("\nEqual Weighted Strategy:")
    ew_metrics = calculate_performance_metrics(ew_returns)
    for k, v in ew_metrics.items():
        print(f"{k:>20}: {v:.4f}")
    
    print("\nCVaR Optimized Strategy:")
    cvar_metrics = calculate_performance_metrics(cvar_returns)
    for k, v in cvar_metrics.items():
        print(f"{k:>20}: {v:.4f}")
    
    # Save metrics
    with open('/Volumes/workspace/mixture/etoro_pi/taskA_performance_metrics.txt', 'w') as f:
        f.write("Cap Weighted Strategy:\n")
        for k, v in cw_metrics.items():
            f.write(f"{k:>20}: {v:.4f}\n")
        
        f.write("\nEqual Weighted Strategy:\n")
        for k, v in ew_metrics.items():
            f.write(f"{k:>20}: {v:.4f}\n")
        
        f.write("\nCVaR Optimized Strategy:\n")
        for k, v in cvar_metrics.items():
            f.write(f"{k:>20}: {v:.4f}\n")
    
    print("\nAnalysis complete. Files saved:")
    print("- portfolio_weights_history_and_result.csv")
    print("- portfolio_comparison.png")
    print("- performance_metrics.txt")

# task B


In [0]:
!pip install hmmlearn shap

In [0]:
# Import required libraries
import numpy as np
import pandas as pd
import yfinance as yf
import cvxpy as cp
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from sklearn.preprocessing import StandardScaler
from hmmlearn import hmm
import requests
import json
from sklearn.ensemble import RandomForestClassifier
import shap
from tqdm import tqdm

# 1. Enhanced Data Collection Function   
def fetch_economic_data(start_date, end_date):
    """Fetch expanded macroeconomic dataset with GDP growth and Fed rates"""
    fred_api_key = "c4f4a36ec3edf3db66a7a28b639c5e9b"  # REQUIRED - get from FRED
    
    # Expanded series list including GDP and Fed rates
    series_ids = {
        # Original series
        'T10Y2Y': 'T10Y2Y',       # 10-2 Year Treasury Yield Spread
        'USREC': 'USREC',          # US Recession Indicators
        'VIXCLS': 'VIXCLS',        # VIX Index (fallback to Yahoo)
        'DTWEXB': 'DTWEXB',        # Trade Weighted US Dollar Index
        'CPIAUCSL': 'CPIAUCSL',    # CPI All Items
        # New additions
        'GDPC1': 'GDPC1',          # Real GDP (Quarterly)
        'GDPC1_PCT': 'A191RL1Q225SBEA', # GDP growth % (Quarterly)
        'FEDFUNDS': 'FEDFUNDS',    # Federal Funds Rate
        'DFF': 'DFF',              # Daily Federal Funds Rate
    }
    
    econ_data = pd.DataFrame()
    
    for name, series_id in series_ids.items():
        url = f"https://api.stlouisfed.org/fred/series/observations?series_id={series_id}&api_key={fred_api_key}&file_type=json&observation_start={start_date}&observation_end={end_date}"
        try:
            response = requests.get(url, timeout=10)
            response.raise_for_status()
            data = response.json()
            
            if not data.get('observations'):
                print(f"⚠️ No FRED data for {name} ({series_id})")
                continue
                
            temp_df = pd.DataFrame(data['observations'])
            temp_df['date'] = pd.to_datetime(temp_df['date'])
            temp_df = temp_df.dropna(subset=['date', 'value'])
            
            # Convert quarterly GDP to monthly (forward fill)
            if series_id in ['GDPC1', 'A191RL1Q225SBEA']:
                temp_df = temp_df.set_index('date')['value'].astype(float).rename(name)
                temp_df = temp_df.resample('D').ffill().rename(name)
            else:
                temp_df = temp_df.set_index('date')['value'].astype(float).rename(name)
            
            econ_data = pd.concat([econ_data, temp_df], axis=1)
            
        except Exception as e:
            print(f"🚨 Error fetching {name}: {str(e)}")
    
    # Fallback for VIX if FRED fails
    if 'VIXCLS' not in econ_data.columns:
        print("Fetching VIX from Yahoo Finance...")
        try:
            vix = yf.download('^VIX', start=start_date, end=end_date)['Close'].rename('VIXCLS')
            econ_data = pd.concat([econ_data, vix], axis=1)
        except Exception as e:
            print(f"Failed to fetch VIX: {str(e)}")
    
    # Calculate monthly returns for GDP growth if we got quarterly data
    if 'GDPC1' in econ_data.columns and 'GDPC1_PCT' not in econ_data.columns:
        econ_data['GDPC1_PCT'] = econ_data['GDPC1'].pct_change(periods=1) * 100  # Quarterly growth
    
    # Forward fill quarterly/daily data to get monthly values
    econ_data = econ_data.ffill()
    
    # Ensure we have all dates in range
    full_date_range = pd.date_range(start=start_date, end=end_date, freq='D')
    econ_data = econ_data.reindex(full_date_range)
    econ_data = econ_data.ffill().bfill()
    
    return econ_data



def fetch_stock_data(tickers, start_date, end_date):
    """Fetch historical stock data and market caps via Yahoo Finance API"""
    print(f"Downloading price data for {len(tickers)} stocks...")
    # Get price data
    price_data = yf.download(tickers, start=start_date, end=end_date)['Close']
    
    # Get latest market caps (for initial weights)
    print("Fetching market cap data...")
    market_caps = {}
    for ticker in tickers:
        try:
            stock = yf.Ticker(ticker)
            # Use most recent quarter's market cap
            market_cap = stock.info.get('marketCap', np.nan)
            market_caps[ticker] = market_cap
        except Exception as e:
            print(f"Couldn't get market cap for {ticker}: {str(e)}")
            market_caps[ticker] = np.nan
    
    return price_data, pd.Series(market_caps)

# 2. Enhanced CVaR Calculation with Regime Adjustment
class RegimeCVaRModel:
    def __init__(self, n_regimes=3):
        self.n_regimes = n_regimes
        self.scaler = StandardScaler()
        self.hmm = hmm.GaussianHMM(n_components=n_regimes, 
                                 covariance_type="diag", 
                                 n_iter=1000)
        self.regime_classifier = None
        self.feature_importance = None
        self.shap_explainer = None
    
    def fit_regime_model(self, econ_data):
        """Fit HMM model to identify market regimes"""
        scaled_data = self.scaler.fit_transform(econ_data)
        self.hmm.fit(scaled_data)
        
        regimes = self.hmm.predict(scaled_data)
        self.regime_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
        self.regime_classifier.fit(econ_data, regimes)
        
        self.feature_importance = pd.DataFrame({
            'feature': econ_data.columns,
            'importance': self.regime_classifier.feature_importances_
        }).sort_values('importance', ascending=False)
        
        self.shap_explainer = shap.TreeExplainer(self.regime_classifier)
        return regimes
    
    def get_current_regime(self, econ_data):
        """Predict current market regime"""
        if len(econ_data.shape) == 1:
            econ_data = econ_data.values.reshape(1, -1)
        scaled_data = self.scaler.transform(econ_data)
        return self.hmm.predict(scaled_data)[0]
    
    def explain_regime(self, econ_data):
        """Generate SHAP values for regime explanation"""
        if len(econ_data.shape) == 1:
            econ_data = econ_data.values.reshape(1, -1)
        return self.shap_explainer.shap_values(econ_data)

def calculate_cvar(returns, alpha=0.95):
    """Calculate Conditional Value-at-Risk (CVaR)"""
    var = np.percentile(returns, 100*(1-alpha))
    return returns[returns <= var].mean()


# 3. Enhanced Portfolio Optimization with Regime Adaptation
def optimize_cvar_portfolio(returns, regime=None, alpha=0.95, max_weight=0.050, min_weight=0.001):
    """Robust optimization with regime adaptation"""
    n = returns.shape[1]
    returns = returns.dropna()
    
    if len(returns) < 60:
        print("Insufficient data for optimization")
        return None
    
    weights = cp.Variable(n)
    tau = cp.Variable()
    portfolio_returns = returns.values @ weights
    loss = -portfolio_returns
    cvar = tau + (1/(1-alpha)) * cp.mean(cp.pos(loss - tau))
    
    # Adjust constraints based on regime
    if regime == 0:  # High volatility regime
        constraints = [
            cp.sum(weights) <= 1 + 1e-2,
            cp.sum(weights) >= 1 - 1e-2,
            weights >= min_weight,
            weights <= max_weight * 0.7,
            cp.norm(weights, 1) <= 1.2
        ]
    elif regime == 1:  # Normal regime
        constraints = [
            cp.sum(weights) <= 1 + 1e-2,
            cp.sum(weights) >= 1 - 1e-2,
            weights >= min_weight,
            weights <= max_weight
        ]
    else:  # Low volatility regime
        constraints = [
            cp.sum(weights) <= 1 + 1e-2,
            cp.sum(weights) >= 1 - 1e-2,
            weights >= min_weight,
            weights <= max_weight * 1.2,
            cp.norm(weights, 1) <= 1.3
        ]
    
    prob = cp.Problem(cp.Minimize(cvar), constraints)
    
    # Try Clarabel first
    try:
        prob.solve(solver=cp.CLARABEL)
        if prob.status in [cp.OPTIMAL, cp.OPTIMAL_INACCURATE]:
            optimized_weights = np.maximum(weights.value, 0)
            return optimized_weights / optimized_weights.sum()
    except Exception as e:
        print(f"Clarabel failed: {str(e)}")
    
    # Fallback to ECOS
    try:
        prob.solve(solver=cp.ECOS)
        if prob.status in [cp.OPTIMAL, cp.OPTIMAL_INACCURATE]:
            optimized_weights = np.maximum(weights.value, 0)
            return optimized_weights / optimized_weights.sum()
    except Exception as e:
        print(f"ECOS failed: {str(e)}")
    
    return None

# 4. Enhanced Backtesting Engine with Walk-Forward Validation
def run_enhanced_backtest(price_data, market_caps, econ_data, rebalance_freq='Q', transaction_cost=0.001):
    """Run backtest with regime adaptation"""
    price_data = price_data.ffill().bfill()
    returns = price_data.pct_change().dropna()
    dates = returns.index
    n_stocks = len(price_data.columns)
    tickers = price_data.columns.tolist()
    
    rebalance_dates = pd.date_range(dates[0], dates[-1], freq=f'{rebalance_freq}-DEC')
    
    # Initialize regime model
    regime_model = RegimeCVaRModel(n_regimes=3)
    
    # Initialize weights
    cw_weights = (market_caps / market_caps.sum()).fillna(1/n_stocks).values
    cw_weights /= cw_weights.sum()
    ew_weights = np.ones(n_stocks) / n_stocks
    cvar_weights = ew_weights.copy()

    portfolio_values = {
        'CW': [1.0],
        'EW': [1.0],
        'CVaR': [1.0],
        'Enhanced_CVaR': [1.0]
    }
    
    weight_history = []
    rebalance_flags = []
    regime_history = []
    
    # Main backtest loop
    for i in tqdm(range(1, len(dates)), desc="Running backtest"):
        date = dates[i]
        current_prices = price_data.iloc[i]
        prev_prices = price_data.iloc[i-1]
        
        price_ratios = current_prices / prev_prices
        cw_return = np.dot(cw_weights, price_ratios - 1)
        ew_return = np.dot(ew_weights, price_ratios - 1)
        cvar_return = np.dot(cvar_weights, price_ratios - 1)
        enhanced_cvar_return = cvar_return
        
        rebalance_flag = 0
        if date in rebalance_dates:
            lookback_returns = returns.loc[:date].iloc[-252:]
            
            if len(lookback_returns) > 60:
                new_weights = optimize_cvar_portfolio(lookback_returns)
                
                if date >= pd.to_datetime('2020-01-01'):
                    current_econ = econ_data.loc[:date].iloc[-1]
                    regime = regime_model.get_current_regime(current_econ)
                    enhanced_weights = optimize_cvar_portfolio(lookback_returns, regime=regime)
                    
                    if enhanced_weights is not None:
                        turnover = np.sum(np.abs(enhanced_weights - cvar_weights))
                        portfolio_values['Enhanced_CVaR'][-1] *= (1 - transaction_cost * turnover)
                        cvar_weights = enhanced_weights
                        rebalance_flag = 1
                        regime_history.append(regime)
                else:
                    train_econ = econ_data.loc[:date]
                    regimes = regime_model.fit_regime_model(train_econ)
                
                if new_weights is not None:
                    turnover = np.sum(np.abs(new_weights - cvar_weights))
                    portfolio_values['CVaR'][-1] *= (1 - transaction_cost * turnover)
                    cvar_weights = new_weights
                    rebalance_flag = 1
        
        portfolio_values['CW'].append(portfolio_values['CW'][-1] * (1 + cw_return))
        portfolio_values['EW'].append(portfolio_values['EW'][-1] * (1 + ew_return))
        portfolio_values['CVaR'].append(portfolio_values['CVaR'][-1] * (1 + cvar_return))
        portfolio_values['Enhanced_CVaR'].append(portfolio_values['Enhanced_CVaR'][-1] * (1 + enhanced_cvar_return))
        
        weight_record = {'Date': date}
        for j, ticker in enumerate(tickers):
            weight_record[f"{ticker}_CW"] = cw_weights[j]
            weight_record[f"{ticker}_CVaR"] = cvar_weights[j]
        weight_history.append(weight_record)
        rebalance_flags.append(rebalance_flag)
        
        cw_weights = (cw_weights * price_ratios)
        cw_weights /= cw_weights.sum()
        ew_weights = (ew_weights * price_ratios)
        ew_weights /= ew_weights.sum()

    results_df = pd.DataFrame({
        'Date': dates[1:],
        'CW_Value': portfolio_values['CW'][1:],
        'EW_Value': portfolio_values['EW'][1:],
        'CVaR_Value': portfolio_values['CVaR'][1:],
        'Enhanced_CVaR_Value': portfolio_values['Enhanced_CVaR'][1:],
        'Rebalance_Flag': rebalance_flags
    })
    
    weights_df = pd.DataFrame(weight_history)
    
    return results_df, weights_df, regime_model

# 5. Performance Metrics (unchanged)
def calculate_performance_metrics(returns, risk_free_rate=0.0):
    """Calculate comprehensive performance metrics"""
    metrics = {}
    
    # Annualized return
    metrics['Annual Return'] = (1 + returns.mean())**252 - 1
    
    # Annualized volatility
    metrics['Annual Volatility'] = returns.std() * np.sqrt(252)
    
    # Sharpe ratio
    metrics['Sharpe Ratio'] = (metrics['Annual Return'] - risk_free_rate) / metrics['Annual Volatility']
    
    # Maximum drawdown
    cumulative = (1 + returns).cumprod()
    peak = cumulative.expanding().max()
    drawdown = (cumulative - peak) / peak
    metrics['Max Drawdown'] = drawdown.min()
    
    # CVaR
    metrics['95% CVaR'] = calculate_cvar(returns)
    
    # Sortino ratio
    downside_returns = returns[returns < 0]
    downside_dev = downside_returns.std() * np.sqrt(252)
    metrics['Sortino Ratio'] = (metrics['Annual Return'] - risk_free_rate) / downside_dev
    
    return metrics

# Main execution
if __name__ == "__main__":
    # Configuration
    start_date = '2010-01-01'
    end_date = '2024-12-31'
    tickers = [
        'AAPL', 'ABBV', 'ABT', 'ACN', 'ADBE', 'AMD', 'AMZN', 'AVGO', 'AXP', 'BAC', 
        'BLK', 'BMY', 'BRK-B', 'C', 'CMCSA', 'COST', 'CRM', 'CSCO', 'CVS', 'CVX', 
        'DE', 'DHR', 'DIS', 'FDX', 'GILD', 'GOOG', 'GS', 'HON', 'IBM', 'INTC', 
        'ISRG', 'JNJ', 'JPM', 'KO', 'LMT', 'MA', 'MCD', 'META', 'MRK', 'MS', 
        'MSFT', 'NFLX', 'NOW', 'NVDA', 'ORCL', 'PEP', 'PFE', 'PG', 'PLTR', 'PM', 
        'PYPL', 'QCOM', 'SCHW', 'T', 'TMUS', 'TSLA', 'TXN', 'V', 'VZ', 'WMT'
    ]
    
    # 1. Fetch data
    print("Fetching market data...")
    price_data, market_caps = fetch_stock_data(tickers, start_date, end_date)
    print("Fetching economic data...")
    econ_data = fetch_economic_data(start_date, end_date)
    
    # 2. Run enhanced backtest
    print("Running enhanced backtest...")
    results_df, weights_df, regime_model = run_enhanced_backtest(price_data, market_caps, econ_data)
    
	
    # 3. Verify initial period
    first_rebalance = results_df[results_df['Rebalance_Flag'] == 1].index[0]
    initial_period = results_df.iloc[:first_rebalance]
    print(f"Portfolios identical before first rebalance? {np.allclose(initial_period['EW_Value'], initial_period['CVaR_Value'], rtol=1e-02, atol=1e-04)}")
    
    # 4. Export results
    combined_df = results_df.merge(weights_df, on='Date', how='left')
    combined_df['Date'] = combined_df['Date'].dt.strftime('%Y-%m-%d')
    combined_df.to_csv('/Volumes/workspace/mixture/etoro_pi/taskB_enhanced_portfolio_results.csv', index=False)
    
    # 5. Plot results
    plt.figure(figsize=(14, 7))
    
    # Plot portfolio values
    plt.plot(results_df['Date'], results_df['CW_Value'], label='Cap Weighted', linewidth=2, color='green')
    plt.plot(results_df['Date'], results_df['EW_Value'], label='Equal Weighted', linewidth=2, color='blue')
    plt.plot(results_df['Date'], results_df['CVaR_Value'], label='Standard CVaR', linewidth=2, color='orange')
    plt.plot(results_df['Date'], results_df['Enhanced_CVaR_Value'], label='Enhanced CVaR', linewidth=2, color='red')
    
    # Mark test period start
    plt.axvline(pd.to_datetime('2020-01-01'), color='purple', linestyle='--', 
               label='Test Period Start')
    
    plt.title('Enhanced Portfolio Performance Comparison', fontsize=16)
    plt.xlabel('Date', fontsize=12)
    plt.ylabel('Portfolio Value ($1 Initial)', fontsize=12)
    plt.legend(fontsize=12)
    plt.grid(True, linestyle='--', alpha=0.7)
    
    # Format x-axis
    plt.gca().xaxis.set_major_locator(mdates.YearLocator())
    plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%Y'))
    plt.gcf().autofmt_xdate()
    
    plt.tight_layout()
    plt.savefig('/Volumes/workspace/mixture/etoro_pi/taskB_enhanced_portfolio_comparison.png', dpi=300)
    plt.show()
    
    # 6. Calculate and display metrics for test period only
    test_results = results_df[results_df['Date'] >= '2020-01-01']
    
    print("\nPerformance Metrics (Jan 2020 - Dec 2024):")
    print("="*50)
    
    # Calculate returns from portfolio values
    cw_returns = test_results['CW_Value'].pct_change().dropna()
    ew_returns = test_results['EW_Value'].pct_change().dropna()
    cvar_returns = test_results['CVaR_Value'].pct_change().dropna()
    enhanced_returns = test_results['Enhanced_CVaR_Value'].pct_change().dropna()
    
    print("\nCap Weighted Strategy:")
    cw_metrics = calculate_performance_metrics(cw_returns)
    for k, v in cw_metrics.items():
        print(f"{k:>20}: {v:.4f}")
    
    print("\nEqual Weighted Strategy:")
    ew_metrics = calculate_performance_metrics(ew_returns)
    for k, v in ew_metrics.items():
        print(f"{k:>20}: {v:.4f}")
    
    print("\nStandard CVaR Strategy:")
    cvar_metrics = calculate_performance_metrics(cvar_returns)
    for k, v in cvar_metrics.items():
        print(f"{k:>20}: {v:.4f}")
    
    print("\nEnhanced CVaR Strategy:")
    enhanced_metrics = calculate_performance_metrics(enhanced_returns)
    for k, v in enhanced_metrics.items():
        print(f"{k:>20}: {v:.4f}")
    
    # 7. Save regime model analysis
    print("\nRegime Model Feature Importance:")
    print(regime_model.feature_importance)
    
    
    # Save metrics
    with open('/Volumes/workspace/mixture/etoro_pi/taskB_enhanced_performance_metrics.txt', 'w') as f:
        f.write("Cap Weighted Strategy:\n")
        for k, v in cw_metrics.items():
            f.write(f"{k:>20}: {v:.4f}\n")
        
        f.write("\nEqual Weighted Strategy:\n")
        for k, v in ew_metrics.items():
            f.write(f"{k:>20}: {v:.4f}\n")
        
        f.write("\nStandard CVaR Strategy:\n")
        for k, v in cvar_metrics.items():
            f.write(f"{k:>20}: {v:.4f}\n")
        
        f.write("\nEnhanced CVaR Strategy:\n")
        for k, v in enhanced_metrics.items():
            f.write(f"{k:>20}: {v:.4f}\n")
        
        f.write("\nRegime Model Feature Importance:\n")
        f.write(regime_model.feature_importance.to_string())
    
    print("\nAnalysis complete. Files saved:")
    print("- enhanced_portfolio_results.csv")
    print("- enhanced_portfolio_comparison.png")
    print("- enhanced_performance_metrics.txt")
    print("- regime_shap_analysis.png")

# Task C

In [0]:
# !pip install textblob hmmlearn shap feedparser pytrends nltk
# dbutils.library.restartPython()

In [0]:
# # Import required libraries
# import numpy as np
# import pandas as pd
# import yfinance as yf
# import cvxpy as cp
# from datetime import datetime, timedelta
# import matplotlib.pyplot as plt
# import matplotlib.dates as mdates
# from sklearn.preprocessing import StandardScaler
# from hmmlearn import hmm
# import requests
# import json
# from sklearn.ensemble import RandomForestClassifier
# import shap
# from tqdm import tqdm
# from textblob import TextBlob
# import feedparser
# from bs4 import BeautifulSoup
# import re
# from dateutil import parser as date_parser

# # 1. Enhanced Data Collection Function   
# def fetch_economic_data(start_date, end_date):
#     """Fetch expanded macroeconomic dataset with GDP growth and Fed rates (robust version)"""
#     fred_api_key = "c4f4a36ec3edf3db66a7a28b639c5e9b"  # REQUIRED - get from FRED
    
#     # Expanded series list including GDP and Fed rates
#     series_ids = {
#         'T10Y2Y': 'T10Y2Y',        # 10-2 Year Treasury Yield Spread
#         'USREC': 'USREC',          # US Recession Indicators
#         'VIXCLS': 'VIXCLS',        # VIX Index (fallback to Yahoo)
#         'DTWEXB': 'DTWEXB',        # Trade Weighted US Dollar Index
#         'CPIAUCSL': 'CPIAUCSL',    # CPI All Items
#         'GDPC1': 'GDPC1',          # Real GDP (Quarterly)
#         'GDPC1_PCT': 'A191RL1Q225SBEA',  # GDP growth % (Quarterly)
#         'FEDFUNDS': 'FEDFUNDS',    # Federal Funds Rate
#         'DFF': 'DFF',              # Daily Federal Funds Rate
#     }

#     econ_data = pd.DataFrame()

#     for name, series_id in series_ids.items():
#         url = f"https://api.stlouisfed.org/fred/series/observations?series_id={series_id}&api_key={fred_api_key}&file_type=json&observation_start={start_date}&observation_end={end_date}"
#         try:
#             response = requests.get(url, timeout=10)
#             response.raise_for_status()
#             data = response.json()

#             if not data.get('observations'):
#                 print(f"⚠️ No FRED data for {name} ({series_id})")
#                 continue

#             temp_df = pd.DataFrame(data['observations'])
#             temp_df['date'] = pd.to_datetime(temp_df['date'])
#             temp_df['value'] = pd.to_numeric(temp_df['value'], errors='coerce')
#             temp_df = temp_df.dropna(subset=['value'])
            
#             if series_id in ['GDPC1', 'A191RL1Q225SBEA']:
#                 temp_df = temp_df.set_index('date')['value'].resample('D').ffill().rename(name)
#             else:
#                 temp_df = temp_df.set_index('date')['value'].rename(name)

#             econ_data = pd.concat([econ_data, temp_df], axis=1)

#         except Exception as e:
#             print(f"🚨 Error fetching {name}: {str(e)}")

#     # Fallback for VIX if FRED fails
#     if 'VIXCLS' not in econ_data.columns:
#         print("Fetching VIX from Yahoo Finance...")
#         try:
#             vix = yf.download('^VIX', start=start_date, end=end_date)['Close'].rename('VIXCLS')
#             econ_data = pd.concat([econ_data, vix], axis=1)
#         except Exception as e:
#             print(f"Failed to fetch VIX: {str(e)}")

#     # Calculate GDP growth % if needed
#     if 'GDPC1' in econ_data.columns and 'GDPC1_PCT' not in econ_data.columns:
#         econ_data['GDPC1_PCT'] = econ_data['GDPC1'].pct_change() * 100

#     # Final processing and validation
#     econ_data = econ_data.ffill().bfill()
#     full_date_range = pd.date_range(start=start_date, end=end_date, freq='D')
#     econ_data = econ_data.reindex(full_date_range).ffill().bfill()
    
#     if econ_data.empty:
#         raise ValueError("No economic data was successfully fetched")
    
#     valid_columns = econ_data.columns[econ_data.notna().any()]
#     if len(valid_columns) < 3:
#         raise ValueError(f"Insufficient economic data. Only {len(valid_columns)} columns have data")
    
#     return econ_data[valid_columns]

# def fetch_stock_data(tickers, start_date, end_date):
#     """Fetch historical stock data and market caps via Yahoo Finance API"""
#     print(f"Downloading price data for {len(tickers)} stocks...")
#     price_data = yf.download(tickers, start=start_date, end=end_date)['Close']
    
#     print("Fetching market cap data...")
#     market_caps = {}
#     for ticker in tickers:
#         try:
#             stock = yf.Ticker(ticker)
#             market_cap = stock.info.get('marketCap', np.nan)
#             market_caps[ticker] = market_cap
#         except Exception as e:
#             print(f"Couldn't get market cap for {ticker}: {str(e)}")
#             market_caps[ticker] = np.nan
    
#     return price_data, pd.Series(market_caps)

# def clean_text(text):
#     """Clean text for sentiment analysis"""
#     text = BeautifulSoup(text, 'html.parser').get_text()
#     text = re.sub(r'[^a-zA-Z\s]', '', text)
#     return text.lower()

# def fetch_yahoo_news_sentiment(tickers, start_date, end_date):
#     """Fetch news sentiment from Yahoo Finance RSS feeds"""
#     base_url = "https://finance.yahoo.com/rss/headline?s="
#     sentiment_data = pd.DataFrame()
    
#     start_date = pd.to_datetime(start_date).date()
#     end_date = pd.to_datetime(end_date).date()
    
#     for ticker in tqdm(tickers, desc="Fetching Yahoo news sentiment"):
#         try:
#             url = f"{base_url}{ticker}"
#             feed = feedparser.parse(url)
            
#             sentiments = []
#             dates = []
            
#             for entry in feed.entries:
#                 try:
#                     pub_date = date_parser.parse(entry.published).date()
#                     if not (start_date <= pub_date <= end_date):
#                         continue
                    
#                     title = clean_text(entry.title)
#                     description = clean_text(entry.description) if hasattr(entry, 'description') else ""
#                     text = f"{title}. {description}"
                    
#                     blob = TextBlob(text)
#                     sentiment = blob.sentiment.polarity
                    
#                     sentiments.append(sentiment)
#                     dates.append(pub_date)
#                 except Exception as e:
#                     print(f"Error processing entry for {ticker}: {str(e)}")
#                     continue
            
#             if sentiments:
#                 temp_df = pd.DataFrame({'date': dates, ticker: sentiments})
#                 temp_df = temp_df.groupby('date').mean()
#                 sentiment_data = pd.concat([sentiment_data, temp_df], axis=1)
                
#         except Exception as e:
#             print(f"Error processing feed for {ticker}: {str(e)}")
#             continue
    
#     if not sentiment_data.empty:
#         full_date_range = pd.date_range(start=start_date, end=end_date, freq='D')
#         sentiment_data = sentiment_data.reindex(full_date_range)
#         sentiment_data = sentiment_data.ffill().bfill()
        
#         valid_columns = [col for col in sentiment_data.columns if col in tickers]
#         if valid_columns:
#             sentiment_data['MARKET_SENTIMENT'] = sentiment_data[valid_columns].mean(axis=1)
#         else:
#             sentiment_data['MARKET_SENTIMENT'] = np.nan
#     else:
#         full_date_range = pd.date_range(start=start_date, end=end_date, freq='D')
#         sentiment_data = pd.DataFrame(index=full_date_range)
#         sentiment_data['MARKET_SENTIMENT'] = np.nan
    
#     return sentiment_data

# # 2. Enhanced CVaR Calculation with Regime Adjustment
# class RegimeCVaRModel:
#     def __init__(self, n_regimes=3):
#         self.n_regimes = n_regimes
#         self.scaler = StandardScaler()
#         self.hmm = hmm.GaussianHMM(n_components=n_regimes, 
#                                  covariance_type="diag", 
#                                  n_iter=1000)
#         self.regime_classifier = None
#         self.feature_importance = None
#         self.shap_explainer = None
#         self.sentiment_data = None
    
#     def add_sentiment_data(self, sentiment_data):
#         """Add news sentiment data to the model"""
#         self.sentiment_data = sentiment_data
    
#     def fit_regime_model(self, econ_data, price_data=None):
#         """Fit HMM model to identify market regimes with optional sentiment data"""
#         if self.sentiment_data is not None and 'MARKET_SENTIMENT' in self.sentiment_data.columns:
#             market_sentiment = self.sentiment_data['MARKET_SENTIMENT'].copy()
#             combined_data = pd.concat([econ_data, market_sentiment], axis=1)
#         else:
#             combined_data = econ_data.copy()
        
#         combined_data = combined_data.ffill().bfill().dropna()
        
#         if len(combined_data) == 0:
#             print("Warning: No valid data remaining after cleaning. Using economic data only.")
#             combined_data = econ_data.ffill().bfill().dropna()
#             if len(combined_data) == 0:
#                 raise ValueError("No valid economic data available for regime modeling")
        
#         try:
#             scaled_data = self.scaler.fit_transform(combined_data)
#             self.hmm.fit(scaled_data)
            
#             regimes = self.hmm.predict(scaled_data)
#             self.regime_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
#             self.regime_classifier.fit(combined_data, regimes)
            
#             self.feature_importance = pd.DataFrame({
#                 'feature': combined_data.columns,
#                 'importance': self.regime_classifier.feature_importances_
#             }).sort_values('importance', ascending=False)
            
#             self.shap_explainer = shap.TreeExplainer(self.regime_classifier)
#             return regimes
#         except Exception as e:
#             print(f"Error fitting regime model: {str(e)}")
#             raise
    
#     def get_current_regime(self, econ_data, current_sentiment=None):
#         """Predict current market regime with optional sentiment"""
#         if current_sentiment is not None:
#             if isinstance(current_sentiment, (float, int)):
#                 current_sentiment = {'MARKET_SENTIMENT': current_sentiment}
#             combined_data = pd.concat([econ_data, pd.Series(current_sentiment)], axis=1)
#         else:
#             combined_data = econ_data.copy()
            
#         combined_data = combined_data.ffill().bfill().dropna()
        
#         if combined_data.empty:
#             raise ValueError("No valid data available for predicting the current regime")
        
#         if len(combined_data.shape) == 1:
#             combined_data = combined_data.values.reshape(1, -1)
#         elif combined_data.shape[1] != self.scaler.n_features_in_:
#             # Ensure the input data has the same number of features as the scaler was trained on
#             missing_features = set(self.scaler.feature_names_in_) - set(combined_data.columns)
#             for feature in missing_features:
#                 combined_data[feature] = 0
#             combined_data = combined_data[self.scaler.feature_names_in_]
        
#         scaled_data = self.scaler.transform(combined_data)
#         return self.hmm.predict(scaled_data)[0]
    
#     def explain_regime(self, econ_data, current_sentiment=None):
#         """Generate SHAP values for regime explanation"""
#         if current_sentiment is not None:
#             if isinstance(current_sentiment, (float, int)):
#                 current_sentiment = {'MARKET_SENTIMENT': current_sentiment}
#             combined_data = pd.concat([econ_data, pd.Series(current_sentiment)], axis=1)
#         else:
#             combined_data = econ_data.copy()
            
#         if len(combined_data.shape) == 1:
#             combined_data = combined_data.values.reshape(1, -1)
#         return self.shap_explainer.shap_values(combined_data)

# def calculate_cvar(returns, alpha=0.95):
#     """Calculate Conditional Value-at-Risk (CVaR)"""
#     var = np.percentile(returns, 100*(1-alpha))
#     return returns[returns <= var].mean()

# # 3. Enhanced Portfolio Optimization with Regime Adaptation
# def optimize_cvar_portfolio(returns, regime=None, alpha=0.95, max_weight=0.050, min_weight=0.001):
#     """Robust optimization with regime adaptation"""
#     n = returns.shape[1]
#     returns = returns.dropna()
    
#     if len(returns) < 60:
#         print("Insufficient data for optimization")
#         return None
    
#     weights = cp.Variable(n)
#     tau = cp.Variable()
#     portfolio_returns = returns.values @ weights
#     loss = -portfolio_returns
#     cvar = tau + (1/(1-alpha)) * cp.mean(cp.pos(loss - tau))
    
#     # Adjust constraints based on regime
#     if regime == 0:  # High volatility regime
#         constraints = [
#             cp.sum(weights) <= 1 + 1e-2,
#             cp.sum(weights) >= 1 - 1e-2,
#             weights >= min_weight,
#             weights <= max_weight * 0.7,
#             cp.norm(weights, 1) <= 1.2
#         ]
#     elif regime == 1:  # Normal regime
#         constraints = [
#             cp.sum(weights) <= 1 + 1e-2,
#             cp.sum(weights) >= 1 - 1e-2,
#             weights >= min_weight,
#             weights <= max_weight
#         ]
#     else:  # Low volatility regime
#         constraints = [
#             cp.sum(weights) <= 1 + 1e-2,
#             cp.sum(weights) >= 1 - 1e-2,
#             weights >= min_weight,
#             weights <= max_weight * 1.2,
#             cp.norm(weights, 1) <= 1.3
#         ]
    
#     prob = cp.Problem(cp.Minimize(cvar), constraints)
    
#     try:
#         prob.solve(solver=cp.CLARABEL)
#         if prob.status in [cp.OPTIMAL, cp.OPTIMAL_INACCURATE]:
#             optimized_weights = np.maximum(weights.value, 0)
#             return optimized_weights / optimized_weights.sum()
#     except Exception as e:
#         print(f"Clarabel failed: {str(e)}")
    
#     try:
#         prob.solve(solver=cp.ECOS)
#         if prob.status in [cp.OPTIMAL, cp.OPTIMAL_INACCURATE]:
#             optimized_weights = np.maximum(weights.value, 0)
#             return optimized_weights / optimized_weights.sum()
#     except Exception as e:
#         print(f"ECOS failed: {str(e)}")
    
#     return None

# # 4. Enhanced Backtesting Engine with Walk-Forward Validation
# def run_enhanced_backtest(price_data, market_caps, econ_data, sentiment_data=None, rebalance_freq='Q', transaction_cost=0.001):
#     """Run backtest with regime adaptation"""
#     if sentiment_data is not None:
#         if sentiment_data.empty or ('MARKET_SENTIMENT' not in sentiment_data.columns):
#             print("Warning: No valid sentiment data available. Proceeding without sentiment analysis.")
#             sentiment_data = None
#         else:
#             sentiment_data = sentiment_data[['MARKET_SENTIMENT']].ffill().bfill()
    
#     price_data = price_data.ffill().bfill()
#     returns = price_data.pct_change().dropna()
#     dates = returns.index
#     n_stocks = len(price_data.columns)
#     tickers = price_data.columns.tolist()
    
#     rebalance_dates = pd.date_range(dates[0], dates[-1], freq=f'{rebalance_freq}-DEC')
    
#     regime_model = RegimeCVaRModel(n_regimes=3)
    
#     if sentiment_data is not None:
#         regime_model.add_sentiment_data(sentiment_data)
    
#     cw_weights = (market_caps / market_caps.sum()).fillna(1/n_stocks).values
#     cw_weights /= cw_weights.sum()
#     ew_weights = np.ones(n_stocks) / n_stocks
#     cvar_weights = ew_weights.copy()

#     portfolio_values = {
#         'CW': [1.0],
#         'EW': [1.0],
#         'CVaR': [1.0],
#         'Enhanced_CVaR': [1.0]
#     }
    
#     weight_history = []
#     rebalance_flags = []
#     regime_history = []
    
#     for i in tqdm(range(1, len(dates)), desc="Running backtest"):
#         date = dates[i]
#         current_prices = price_data.iloc[i]
#         prev_prices = price_data.iloc[i-1]
        
#         price_ratios = current_prices / prev_prices
#         cw_return = np.dot(cw_weights, price_ratios - 1)
#         ew_return = np.dot(ew_weights, price_ratios - 1)
#         cvar_return = np.dot(cvar_weights, price_ratios - 1)
#         enhanced_cvar_return = cvar_return
        
#         rebalance_flag = 0
#         if date in rebalance_dates:
#             lookback_returns = returns.loc[:date].iloc[-252:]
            
#             if len(lookback_returns) > 60:
#                 new_weights = optimize_cvar_portfolio(lookback_returns)
                
#                 if date >= pd.to_datetime('2020-01-01'):
#                     current_econ = econ_data.loc[:date].iloc[-1]
                    
#                     current_sentiment = None
#                     if sentiment_data is not None and date in sentiment_data.index:
#                         current_sentiment = sentiment_data.loc[date, 'MARKET_SENTIMENT']
                    
#                     regime = regime_model.get_current_regime(current_econ, current_sentiment)
#                     enhanced_weights = optimize_cvar_portfolio(lookback_returns, regime=regime)
                    
#                     if enhanced_weights is not None:
#                         turnover = np.sum(np.abs(enhanced_weights - cvar_weights))
#                         portfolio_values['Enhanced_CVaR'][-1] *= (1 - transaction_cost * turnover)
#                         cvar_weights = enhanced_weights
#                         rebalance_flag = 1
#                         regime_history.append(regime)
#                 else:
#                     train_econ = econ_data.loc[:date]
#                     regimes = regime_model.fit_regime_model(train_econ)
                
#                 if new_weights is not None:
#                     turnover = np.sum(np.abs(new_weights - cvar_weights))
#                     portfolio_values['CVaR'][-1] *= (1 - transaction_cost * turnover)
#                     cvar_weights = new_weights
#                     rebalance_flag = 1
        
#         portfolio_values['CW'].append(portfolio_values['CW'][-1] * (1 + cw_return))
#         portfolio_values['EW'].append(portfolio_values['EW'][-1] * (1 + ew_return))
#         portfolio_values['CVaR'].append(portfolio_values['CVaR'][-1] * (1 + cvar_return))
#         portfolio_values['Enhanced_CVaR'].append(portfolio_values['Enhanced_CVaR'][-1] * (1 + enhanced_cvar_return))
        
#         weight_record = {'Date': date}
#         for j, ticker in enumerate(tickers):
#             weight_record[f"{ticker}_CW"] = cw_weights[j]
#             weight_record[f"{ticker}_CVaR"] = cvar_weights[j]
#         weight_history.append(weight_record)
#         rebalance_flags.append(rebalance_flag)
        
#         cw_weights = (cw_weights * price_ratios)
#         cw_weights /= cw_weights.sum()
#         ew_weights = (ew_weights * price_ratios)
#         ew_weights /= ew_weights.sum()

#     results_df = pd.DataFrame({
#         'Date': dates[1:],
#         'CW_Value': portfolio_values['CW'][1:],
#         'EW_Value': portfolio_values['EW'][1:],
#         'CVaR_Value': portfolio_values['CVaR'][1:],
#         'Enhanced_CVaR_Value': portfolio_values['Enhanced_CVaR'][1:],
#         'Rebalance_Flag': rebalance_flags
#     })
    
#     weights_df = pd.DataFrame(weight_history)
    
#     return results_df, weights_df, regime_model

# # 5. Performance Metrics
# def calculate_performance_metrics(returns, risk_free_rate=0.0):
#     """Calculate comprehensive performance metrics"""
#     metrics = {}
    
#     metrics['Annual Return'] = (1 + returns.mean())**252 - 1
#     metrics['Annual Volatility'] = returns.std() * np.sqrt(252)
#     metrics['Sharpe Ratio'] = (metrics['Annual Return'] - risk_free_rate) / metrics['Annual Volatility']
    
#     cumulative = (1 + returns).cumprod()
#     peak = cumulative.expanding().max()
#     drawdown = (cumulative - peak) / peak
#     metrics['Max Drawdown'] = drawdown.min()
    
#     metrics['95% CVaR'] = calculate_cvar(returns)
    
#     downside_returns = returns[returns < 0]
#     downside_dev = downside_returns.std() * np.sqrt(252)
#     metrics['Sortino Ratio'] = (metrics['Annual Return'] - risk_free_rate) / downside_dev
    
#     return metrics

# # Main execution
# if __name__ == "__main__":
#     try:
#         # Configuration
#         start_date = '2010-01-01'
#         end_date = '2024-12-31'
#         tickers = [
#             'AAPL', 'ABBV', 'ABT', 'ACN', 'ADBE', 'AMD', 'AMZN', 'AVGO', 'AXP', 'BAC', 
#             'BLK', 'BMY', 'BRK-B', 'C', 'CMCSA', 'COST', 'CRM', 'CSCO', 'CVS', 'CVX', 
#             'DE', 'DHR', 'DIS', 'FDX', 'GILD', 'GOOG', 'GS', 'HON', 'IBM', 'INTC', 
#             'ISRG', 'JNJ', 'JPM', 'KO', 'LMT', 'MA', 'MCD', 'META', 'MRK', 'MS', 
#             'MSFT', 'NFLX', 'NOW', 'NVDA', 'ORCL', 'PEP', 'PFE', 'PG', 'PLTR', 'PM', 
#             'PYPL', 'QCOM', 'SCHW', 'T', 'TMUS', 'TSLA', 'TXN', 'V', 'VZ', 'WMT'
#         ]
        
#         # 1. Fetch data
#         print("Fetching market data...")
#         price_data, market_caps = fetch_stock_data(tickers, start_date, end_date)
#         print("Fetching economic data...")
#         econ_data = fetch_economic_data(start_date, end_date)
        
#         print("Fetching news sentiment from Yahoo RSS...")
#         try:
#             sentiment_data = fetch_yahoo_news_sentiment(tickers, start_date, end_date)
#             if sentiment_data.empty or 'MARKET_SENTIMENT' not in sentiment_data.columns:
#                 print("Warning: No valid sentiment data obtained. Continuing without sentiment analysis.")
#                 sentiment_data = None
#         except Exception as e:
#             print(f"Error fetching sentiment data: {str(e)}. Continuing without sentiment analysis.")
#             sentiment_data = None
        
#         # 2. Run enhanced backtest
#         print("Running enhanced backtest...")
#         results_df, weights_df, regime_model = run_enhanced_backtest(
#             price_data, market_caps, econ_data, sentiment_data
#         )
        
#         # 3. Verify initial period
#         first_rebalance = results_df[results_df['Rebalance_Flag'] == 1].index[0]
#         initial_period = results_df.iloc[:first_rebalance]
#         print(f"Portfolios identical before first rebalance? {np.allclose(initial_period['EW_Value'], initial_period['CVaR_Value'], rtol=1e-02, atol=1e-04)}")
        
#         # 4. Export results
#         combined_df = results_df.merge(weights_df, on='Date', how='left')
#         combined_df['Date'] = combined_df['Date'].dt.strftime('%Y-%m-%d')
#         combined_df.to_csv('/Volumes/workspace/mixture/etoro_pi/taskC_alpha_enhanced_results.csv', index=False)
        
#         # 5. Plot results
#         plt.figure(figsize=(14, 7))
#         plt.plot(results_df['Date'], results_df['CW_Value'], label='Cap Weighted', linewidth=2, color='green')
#         plt.plot(results_df['Date'], results_df['EW_Value'], label='Equal Weighted', linewidth=2, color='blue')
#         plt.plot(results_df['Date'], results_df['CVaR_Value'], label='Standard CVaR', linewidth=2, color='orange')
#         plt.plot(results_df['Date'], results_df['Enhanced_CVaR_Value'], label='Enhanced CVaR', linewidth=2, color='red')
#         plt.axvline(pd.to_datetime('2020-01-01'), color='purple', linestyle='--', label='Test Period Start')
        
#         plt.title('Enhanced Portfolio Performance Comparison', fontsize=16)
#         plt.xlabel('Date', fontsize=12)
#         plt.ylabel('Portfolio Value ($1 Initial)', fontsize=12)
#         plt.legend(fontsize=12)
#         plt.grid(True, linestyle='--', alpha=0.7)
        
#         plt.gca().xaxis.set_major_locator(mdates.YearLocator())
#         plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%Y'))
#         plt.gcf().autofmt_xdate()
        
#         plt.tight_layout()
#         plt.savefig('/Volumes/workspace/mixture/etoro_pi/taskC_portfolio_comparison.png', dpi=300)
#         plt.show()
        
#         # 6. Calculate and display metrics for test period only
#         test_results = results_df[results_df['Date'] >= '2020-01-01']
        
#         print("\nPerformance Metrics (Jan 2020 - Dec 2024):")
#         print("="*50)
        
#         cw_returns = test_results['CW_Value'].pct_change().dropna()
#         ew_returns = test_results['EW_Value'].pct_change().dropna()
#         cvar_returns = test_results['CVaR_Value'].pct_change().dropna()
#         enhanced_returns = test_results['Enhanced_CVaR_Value'].pct_change().dropna()
        
#         print("\nCap Weighted Strategy:")
#         cw_metrics = calculate_performance_metrics(cw_returns)
#         for k, v in cw_metrics.items():
#             print(f"{k:>20}: {v:.4f}")
        
#         print("\nEqual Weighted Strategy:")
#         ew_metrics = calculate_performance_metrics(ew_returns)
#         for k, v in ew_metrics.items():
#             print(f"{k:>20}: {v:.4f}")
        
#         print("\nStandard CVaR Strategy:")
#         cvar_metrics = calculate_performance_metrics(cvar_returns)
#         for k, v in cvar_metrics.items():
#             print(f"{k:>20}: {v:.4f}")
        
#         print("\nEnhanced CVaR Strategy:")
#         enhanced_metrics = calculate_performance_metrics(enhanced_returns)
#         for k, v in enhanced_metrics.items():
#             print(f"{k:>20}: {v:.4f}")
        
#         # 7. Save regime model analysis
#         print("\nRegime Model Feature Importance:")
#         print(regime_model.feature_importance)
        
#         with open('/Volumes/workspace/mixture/etoro_pi/taskC_performance_metrics.txt', 'w') as f:
#             f.write("Cap Weighted Strategy:\n")
#             for k, v in cw_metrics.items():
#                 f.write(f"{k:>20}: {v:.4f}\n")
            
#             f.write("\nEqual Weighted Strategy:\n")
#             for k, v in ew_metrics.items():
#                 f.write(f"{k:>20}: {v:.4f}\n")
            
#             f.write("\nStandard CVaR Strategy:\n")
#             for k, v in cvar_metrics.items():
#                 f.write(f"{k:>20}: {v:.4f}\n")
            
#             f.write("\nEnhanced CVaR Strategy:\n")
#             for k, v in enhanced_metrics.items():
#                 f.write(f"{k:>20}: {v:.4f}\n")
            
#             f.write("\nRegime Model Feature Importance:\n")
#             f.write(regime_model.feature_importance.to_string())
        
#         print("\nAnalysis complete. Files saved:")
#         print("- taskB_enhanced_portfolio_results.csv")
#         print("- taskB_enhanced_portfolio_comparison.png")
#         print("- taskB_enhanced_performance_metrics.txt")
        
#     except Exception as e:
#         print(f"Fatal error in main execution: {str(e)}")
#         raise