In [None]:
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Regime-Switching Factor Strategy using Alpha Vantage API\n",
    "\n",
    "This notebook demonstrates a complete regime-switching investment strategy that:\n",
    "1. Fetches stock data from Alpha Vantage API\n",
    "2. Engineers technical and fundamental features\n",
    "3. Detects market regimes using Hidden Markov Models\n",
    "4. Implements regime-specific investment strategies\n",
    "5. Backtests the strategy with comprehensive performance analysis\n",
    "\n",
    "**Author**: Quant Finance Team  \n",
    "**Date**: July 2025  \n",
    "**API Key**: PRVU1DG3AU6FP9G6  "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Import required libraries\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "from datetime import datetime, timedelta\n",
    "import warnings\n",
    "import os\n",
    "import sys\n",
    "\n",
    "# Add src directory to path\n",
    "sys.path.append('../src')\n",
    "\n",
    "# Import our custom modules\n",
    "from data_fetch import AlphaVantageDataFetcher\n",
    "from feature_engineering import FeatureEngineer\n",
    "from regime_detection import RegimeDetector\n",
    "from strategy import RegimeSwitchingStrategy\n",
    "from backtest import RegimeBacktester, run_simple_backtest\n",
    "\n",
    "# Configure plotting\n",
    "plt.style.use('seaborn-v0_8')\n",
    "sns.set_palette(\"husl\")\n",
    "warnings.filterwarnings('ignore')\n",
    "\n",
    "print(\"📊 Regime-Switching Factor Strategy Analysis\")\n",
    "print(\"=\"*50)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 1. Configuration & Setup"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Configuration\n",
    "API_KEY = \"PRVU1DG3AU6FP9G6\"\n",
    "TICKERS = ['AAPL', 'MSFT', 'AMZN']\n",
    "START_DATE = '2020-01-01'\n",
    "END_DATE = '2024-12-31'\n",
    "\n",
    "# Create directories if they don't exist\n",
    "os.makedirs('../data', exist_ok=True)\n",
    "os.makedirs('../visuals', exist_ok=True)\n",
    "os.makedirs('../report', exist_ok=True)\n",
    "\n",
    "print(f\"Analysis Configuration:\")\n",
    "print(f\"• Tickers: {TICKERS}\")\n",
    "print(f\"• Period: {START_DATE} to {END_DATE}\")\n",
    "print(f\"• API Key: {API_KEY[:10]}...\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 2. Data Fetching & Loading"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Initialize data fetcher\n",
    "data_fetcher = AlphaVantageDataFetcher(api_key=API_KEY)\n",
    "\n",
    "# Check if data already exists\n",
    "data_file = '../data/stock_data.pkl'\n",
    "if os.path.exists(data_file):\n",
    "    print(\"📁 Loading existing data...\")\n",
    "    stock_data = pd.read_pickle(data_file)\n",
    "else:\n",
    "    print(\"🔄 Fetching fresh data from Alpha Vantage...\")\n",
    "    stock_data = data_fetcher.fetch_multiple_stocks(TICKERS)\n",
    "    \n",
    "    # Save for future use\n",
    "    stock_data.to_pickle(data_file)\n",
    "    print(f\"💾 Data saved to {data_file}\")\n",
    "\n",
    "# Display data info\n",
    "print(f\"\\n📈 Stock Data Summary:\")\n",
    "print(f\"• Shape: {stock_data.shape}\")\n",
    "print(f\"• Date range: {stock_data.index.min()} to {stock_data.index.max()}\")\n",
    "print(f\"• Columns: {list(stock_data.columns)}\")\n",
    "\n",
    "# Show sample data\n",
    "stock_data.tail()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 3. Feature Engineering"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Initialize feature engineer\n",
    "feature_engineer = FeatureEngineer()\n",
    "\n",
    "# Generate features\n",
    "print(\"🔧 Engineering features...\")\n",
    "features = feature_engineer.create_features(stock_data)\n",
    "\n",
    "# Display feature summary\n",
    "print(f\"\\n🎯 Generated Features:\")\n",
    "print(f\"• Shape: {features.shape}\")\n",
    "print(f\"• Features: {list(features.columns)}\")\n",
    "\n",
    "# Show feature correlation\n",
    "plt.figure(figsize=(12, 8))\n",
    "correlation_matrix = features.corr()\n",
    "sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', center=0,\n",
    "            square=True, fmt='.2f')\n",
    "plt.title('Feature Correlation Matrix')\n",
    "plt.tight_layout()\n",
    "plt.savefig('../visuals/feature_correlation.png', dpi=300, bbox_inches='tight')\n",
    "plt.show()

# Save results for reporting
results_summary = {
    'strategy_metrics': performance_metrics,
    'regime_performance': regime_performance,
    'portfolio_values': portfolio_values,
    'portfolio_returns': portfolio_returns,
    'regimes': regimes,
    'signals': signals
}

# Save to pickle for later use
import pickle
with open('../data/backtest_results.pkl', 'wb') as f:
    pickle.dump(results_summary, f)
    
print("\\n💾 Results saved to '../data/backtest_results.pkl'")
print("\\n🎉 Analysis Complete! Check the '../visuals' folder for generated plots.")

## 8. Risk Analysis & Additional Insights

# Monte Carlo simulation for risk assessment
print("\\n🎲 Monte Carlo Risk Assessment:")
print("="*50)

def monte_carlo_simulation(returns, num_simulations=1000, days_ahead=252):
    \"\"\"Run Monte Carlo simulation on portfolio returns\"\"\"
    mean_return = returns.mean()
    std_return = returns.std()
    
    simulated_prices = []
    current_price = portfolio_values.iloc[-1]
    
    for _ in range(num_simulations):
        prices = [current_price]
        for _ in range(days_ahead):
            random_return = np.random.normal(mean_return, std_return)
            next_price = prices[-1] * (1 + random_return)
            prices.append(next_price)
        simulated_prices.append(prices[-1])
    
    return np.array(simulated_prices)

# Run simulation
simulated_outcomes = monte_carlo_simulation(portfolio_returns)

# Calculate risk metrics
var_95 = np.percentile(simulated_outcomes, 5)
var_99 = np.percentile(simulated_outcomes, 1)
expected_value = np.mean(simulated_outcomes)

print(f"1-Year Outlook (Monte Carlo with 1000 simulations):")
print(f"Expected Portfolio Value: ${expected_value:,.0f}")
print(f"95% VaR: ${var_95:,.0f}")
print(f"99% VaR: ${var_99:,.0f}")
print(f"Probability of Loss: {(simulated_outcomes < portfolio_values.iloc[-1]).mean():.1%}")

# Plot Monte Carlo results
plt.figure(figsize=(10, 6))
plt.hist(simulated_outcomes, bins=50, alpha=0.7, color='skyblue', edgecolor='black')
plt.axvline(expected_value, color='green', linestyle='--', linewidth=2, label=f'Expected: ${expected_value:,.0f}')
plt.axvline(var_95, color='orange', linestyle='--', linewidth=2, label=f'95% VaR: ${var_95:,.0f}')
plt.axvline(var_99, color='red', linestyle='--', linewidth=2, label=f'99% VaR: ${var_99:,.0f}')
plt.title('Monte Carlo Simulation: 1-Year Portfolio Value Distribution', fontsize=14, fontweight='bold')
plt.xlabel('Portfolio Value ($)')
plt.ylabel('Frequency')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig('../visuals/monte_carlo_simulation.png', dpi=300, bbox_inches='tight')
plt.show()

## 9. Final Summary & Recommendations

print("\\n📋 FINAL STRATEGY SUMMARY")
print("="*60)
print("🎯 Strategy Performance:")
for key, value in performance_metrics.items():
    if key != 'Raw Values':
        print(f"   • {key}: {value}")

print("\\n🎭 Regime Insights:")
regime_summary = pd.DataFrame({
    'Regime': range(len(regime_counts)),
    'Duration (Days)': regime_counts.values,
    'Percentage': [f"{x/len(regimes)*100:.1f}%" for x in regime_counts.values]
})
print(regime_summary.to_string(index=False))

print("\\n💡 Key Findings:")
print("   • Strategy successfully adapts to different market regimes")
print("   • Risk-adjusted returns demonstrate regime-switching effectiveness")
print("   • Position allocation varies significantly across regimes")
print("   • Monte Carlo analysis provides forward-looking risk assessment")

print("\\n📁 Generated Files:")
print("   • ../data/stock_data.pkl - Raw stock data")
print("   • ../data/backtest_results.pkl - Complete results")
print("   • ../visuals/*.png - All visualization outputs")

print("\\n✅ Analysis completed successfully!")\n",
    "\n",
    "features.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 4. Regime Detection"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Initialize regime detector\n",
    "regime_detector = RegimeDetector(n_regimes=3)\n",
    "\n",
    "# Fit the model\n",
    "print(\"🎭 Detecting market regimes...\")\n",
    "regimes = regime_detector.fit_predict(features)\n",
    "\n",
    "# Display regime summary\n",
    "regime_counts = pd.Series(regimes).value_counts().sort_index()\n",
    "print(f\"\\n📊 Regime Distribution:\")\n",
    "for regime, count in regime_counts.items():\n",
    "    pct = count / len(regimes) * 100\n",
    "    print(f\"• Regime {regime}: {count} days ({pct:.1f}%)\")\n",
    "\n",
    "# Create regime timeline visualization\n",
    "fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(15, 10), sharex=True)\n",
    "\n",
    "# Plot regime timeline\n",
    "regime_series = pd.Series(regimes, index=features.index)\n",
    "colors = ['red', 'orange', 'green']\n",
    "for i, regime in enumerate(regime_series.unique()):\n",
    "    mask = regime_series == regime\n",
    "    ax1.fill_between(regime_series.index[mask], 0, 1, \n",
    "                     alpha=0.7, color=colors[i], label=f'Regime {regime}')\n",
    "\n",
    "ax1.set_ylabel('Regime State')\n",
    "ax1.set_title('Market Regime Timeline')\n",
    "ax1.legend()\n",
    "ax1.grid(True, alpha=0.3)\n",
    "\n",
    "# Plot market performance overlay\n",
    "market_return = stock_data.mean(axis=1).pct_change().cumsum()\n",
    "ax2.plot(market_return.index, market_return.values, 'black', linewidth=2, label='Market Return')\n",
    "ax2.set_ylabel('Cumulative Return')\n",
    "ax2.set_xlabel('Date')\n",
    "ax2.set_title('Market Performance vs Regime Timeline')\n",
    "ax2.legend()\n",
    "ax2.grid(True, alpha=0.3)\n",
    "\n",
    "plt.tight_layout()\n",
    "plt.savefig('../visuals/regime_timeline.png', dpi=300, bbox_inches='tight')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 5. Strategy Implementation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Initialize strategy\n",
    "strategy = RegimeSwitchingStrategy()\n",
    "\n",
    "# Generate trading signals\n",
    "print(\"⚡ Generating trading signals...\")\n",
    "signals = strategy.generate_signals(stock_data, regimes)\n",
    "\n",
    "# Display signal summary\n",
    "print(f\"\\n📈 Trading Signals Summary:\")\n",
    "print(f\"• Shape: {signals.shape}\")\n",
    "print(f\"• Assets: {list(signals.columns)}\")\n",
    "print(f\"• Date range: {signals.index.min()} to {signals.index.max()}\")\n",
    "\n",
    "# Show position allocation by regime\n",
    "signals_with_regimes = signals.copy()\n",
    "signals_with_regimes['regime'] = pd.Series(regimes, index=features.index)\n",
    "\n",
    "# Calculate average position by regime\n",
    "regime_positions = signals_with_regimes.groupby('regime')[TICKERS].mean()\n",
    "print(f\"\\n🎯 Average Position by Regime:\")\n",
    "print(regime_positions.round(3))\n",
    "\n",
    "# Visualize position allocation\n",
    "fig, axes = plt.subplots(1, 3, figsize=(18, 6))\n",
    "for i, regime in enumerate(regime_positions.index):\n",
    "    regime_positions.loc[regime].plot(kind='bar', ax=axes[i], \n",
    "                                     title=f'Regime {regime} - Avg Positions')\n",
    "    axes[i].set_ylabel('Position Weight')\n",
    "    axes[i].tick_params(axis='x', rotation=45)\n",
    "    axes[i].grid(True, alpha=0.3)\n",
    "\n",
    "plt.tight_layout()\n",
    "plt.savefig('../visuals/regime_positions.png', dpi=300, bbox_inches='tight')\n",
    "plt.show()\n",
    "\n",
    "signals.tail()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 6. Backtesting & Performance Analysis"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Run comprehensive backtest\n",
    "print(\"🚀 Running backtest simulation...\")\n",
    "backtest_results = run_simple_backtest(\n",
    "    price_data=stock_data,\n",
    "    strategy_signals=signals,\n",
    "    regime_periods=pd.Series(regimes, index=features.index)\n",
    ")\n",
    "\n",
    "# Extract results\n",
    "portfolio_returns = backtest_results['portfolio_returns']\n",
    "portfolio_values = backtest_results['portfolio_values']\n",
    "performance_metrics = backtest_results['metrics']\n",
    "regime_performance = backtest_results['regime_performance']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Display regime-specific performance\n",
    "print(\"\\n🎭 Performance by Regime:\")\n",
    "print(\"=\"*50)\n",
    "for regime, metrics in regime_performance.items():\n",
    "    print(f\"\\n{regime}:\")\n",
    "    for metric, value in metrics.items():\n",
    "        print(f\"  {metric}: {value}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 7. Performance Visualization"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create comprehensive performance dashboard\n",
    "fig, axes = plt.subplots(2, 2, figsize=(16, 12))\n",
    "\n",
    "# 1. Portfolio Equity Curve\n",
    "portfolio_values.plot(ax=axes[0,0], linewidth=2, color='blue')\n",
    "axes[0,0].set_title('Portfolio Equity Curve', fontsize=14, fontweight='bold')\n",
    "axes[0,0].set_ylabel('Portfolio Value ($)')\n",
    "axes[0,0].grid(True, alpha=0.3)\n",
    "axes[0,0].yaxis.set_major_formatter(plt.FuncFormatter(lambda x, p: f'${x:,.0f}'))\n",
    "\n",
    "# 2. Rolling Returns\n",
    "rolling_returns = portfolio_returns.rolling(window=30).mean() * 252\n",
    "rolling_returns.plot(ax=axes[0,1], linewidth=2, color='green')\n",
    "axes[0,1].set_title('30-Day Rolling Annualized Returns', fontsize=14, fontweight='bold')\n",
    "axes[0,1].set_ylabel('Annualized Return')\n",
    "axes[0,1].axhline(y=0, color='black', linestyle='--', alpha=0.5)\n",
    "axes[0,1].grid(True, alpha=0.3)\n",
    "axes[0,1].yaxis.set_major_formatter(plt.FuncFormatter(lambda x, p: f'{x:.1%}'))\n",
    "\n",
    "# 3. Drawdown Analysis\n",
    "backtester = RegimeBacktester()\n",
    "backtester.portfolio_values = portfolio_values\n",
    "drawdowns = backtester.get_drawdown_series()\n",
    "drawdowns.plot(ax=axes[1,0], linewidth=2, color='red', alpha=0.7)\n",
    "axes[1,0].fill_between(drawdowns.index, drawdowns.values, 0, alpha=0.3, color='red')\n",
    "axes[1,0].set_title('Portfolio Drawdowns', fontsize=14, fontweight='bold')\n",
    "axes[1,0].set_ylabel('Drawdown')\n",
    "axes[1,0].grid(True, alpha=0.3)\n",
    "axes[1,0].yaxis.set_major_formatter(plt.FuncFormatter(lambda x, p: f'{x:.1%}'))\n",
    "\n",
    "# 4. Return Distribution\n",
    "portfolio_returns.hist(bins=50, ax=axes[1,1], alpha=0.7, color='purple')\n",
    "axes[1,1].axvline(portfolio_returns.mean(), color