In [3]:
import sys
from pathlib import Path
import pandas as pd
import numpy as np
import pickle
import time
import matplotlib.pyplot as plt
import seaborn as sns

sys.path.insert(0, str(Path.cwd().parent))
from src.garch_models import (
    fit_garch_frequentist, fit_garch_bayesian, fit_garch_hierarchical,
    forecast_garch_rolling
)
from src.results_manager import ResultsManager

DATA_DIR = Path('../data/processed')
FIG_DIR = Path('../figures')
FIG_DIR.mkdir(exist_ok=True)

TICKERS = ['AAPL', 'MSFT','GOOGL', 'AMZN', 'TSLA', 'META', 'NVDA' ]  # 7 stocks 
PRIOR_SETS = ['medium']
HORIZONS = [1, 5, 22]
N_SAMPLES = 1000
N_BURNIN = 200

# Plotting style
sns.set_style("whitegrid")
plt.rcParams['figure.figsize'] = (14, 8)

# Load data
with open(DATA_DIR / 'train_test_split.pkl', 'rb') as f:
    data = pickle.load(f)

results_mgr = ResultsManager()

print(f"Stocks: {TICKERS}")
print(f"Prior sets: {PRIOR_SETS}")
print(f"Horizons: {HORIZONS}")


print("GARCH(1,1) FREQUENTIST (MLE)")
freq_results = {}
for ticker in TICKERS:
    train = data['train'][ticker].values
    test = data['test'][ticker].values
    
    print(f"\n{ticker}...", end=" ")
    
    # Fit
    result = fit_garch_frequentist(train)
    freq_results[ticker] = result
    
    # Forecast multi-horizon volatility
    forecasts = forecast_garch_rolling(train, test, 
                                           result['alpha'], result['beta'], 
                                           result['omega'], HORIZONS)
    freq_results[ticker]['forecasts'] = forecasts
    
    # Save
    results_mgr.save_garch(ticker, 'freq', result)
    
    print(f"α={result['alpha']:.4f}, β={result['beta']:.4f}, "
          f"MSE[h=1]={forecasts['h_1']['mse_volatility']:.6f}")



print("GARCH(1,1) BAYESIAN (Gibbs)")
bayes_results = {}
for ticker in TICKERS:
    bayes_results[ticker] = {}
    train = data['train'][ticker].values
    test = data['test'][ticker].values
    
    for prior_set in PRIOR_SETS:
        print(f"\n{ticker} + {prior_set}...", end=" ")
        
        # Fit
        result = fit_garch_bayesian(train, prior_set=prior_set, 
                                   n_samples=N_SAMPLES, n_burnin=N_BURNIN)
        bayes_results[ticker][prior_set] = result
        
        # Forecast multi-horizon
        forecasts = forecast_garch_rolling(train, test, 
                                               result['alpha_mean'], 
                                               result['beta_mean'],
                                               result['omega_mean'], 
                                               HORIZONS)
        bayes_results[ticker][prior_set]['forecasts'] = forecasts
        
        # Save
        results_mgr.save_garch(ticker, 'bayes', result, prior_set=prior_set)
        
        print(f"α={result['alpha_mean']:.4f}±{result['alpha_std']:.4f}, "
              f"β={result['beta_mean']:.4f}±{result['beta_std']:.4f}, "
              f"Time={result['runtime']:.1f}s, MSE[h=1]={forecasts['h_1']['mse_volatility']:.6f}")



print("GARCH(1,1) HIERARCHICAL (All 7 stocks together)")
# Prepare data dict
data_dict = {ticker: data['train'][ticker].values for ticker in TICKERS}

print("Fitting hierarchical model...", end=" ")
hier_result = fit_garch_hierarchical(data_dict, n_samples=N_SAMPLES, n_burnin=N_BURNIN)
print(f"Done! Time={hier_result['meta']['runtime']:.1f}s")

# Forecast each ticker
hier_forecasts = {}
for ticker in TICKERS:
    test = data['test'][ticker].values
    train = data['train'][ticker].values
    
    alpha_hier = hier_result[ticker]['alpha_mean']
    beta_hier = hier_result[ticker]['beta_mean']
    omega_hier = hier_result[ticker]['omega_mean']
    
    forecasts = forecast_garch_rolling(train, test, 
                                           alpha_hier, beta_hier, omega_hier, 
                                           HORIZONS)
    hier_forecasts[ticker] = forecasts
    
    print(f"  {ticker}: α={alpha_hier:.4f}, β={beta_hier:.4f}, "
          f"(shrunk from α={bayes_results[ticker]['medium']['alpha_mean']:.4f}, "
          f"β={bayes_results[ticker]['medium']['beta_mean']:.4f}), "
          f"MSE[h=22]={forecasts['h_1']['mse_volatility']:.6f}")

results_mgr.save_hierarchical(model_type='garch', results=hier_result)

Stocks: ['AAPL', 'MSFT', 'GOOGL', 'AMZN', 'TSLA', 'META', 'NVDA']
Prior sets: ['medium']
Horizons: [1, 5, 22]
GARCH(1,1) FREQUENTIST (MLE)

AAPL... Saved GARCH: AAPL_garch_freq.pkl
α=0.3322, β=0.1000, MSE[h=22]=0.000478

MSFT... Saved GARCH: MSFT_garch_freq.pkl
α=0.3322, β=0.1000, MSE[h=22]=0.000235

GOOGL... Saved GARCH: GOOGL_garch_freq.pkl
α=0.3322, β=0.1000, MSE[h=22]=0.000425

AMZN... Saved GARCH: AMZN_garch_freq.pkl
α=0.3322, β=0.6575, MSE[h=22]=0.000521

TSLA... Saved GARCH: TSLA_garch_freq.pkl
α=0.0001, β=0.1000, MSE[h=22]=0.001771

META... Saved GARCH: META_garch_freq.pkl
α=0.3322, β=0.1000, MSE[h=22]=0.000635

NVDA... Saved GARCH: NVDA_garch_freq.pkl
α=0.0001, β=0.1000, MSE[h=22]=0.000929
GARCH(1,1) BAYESIAN (Gibbs)

AAPL + medium... Saved GARCH: AAPL_garch_bayes_medium.pkl
α=0.0655±0.0400, β=0.8502±0.0627, Time=35.4s, MSE[h=22]=0.000478

MSFT + medium... Saved GARCH: MSFT_garch_bayes_medium.pkl
α=0.0707±0.0419, β=0.8416±0.0686, Time=35.3s, MSE[h=22]=0.000235

GOOGL + medium.

WindowsPath('../results/hierarchical/hierarchical_garch.pkl')

In [19]:
print("COMPREHENSIVE COMPARISON")
vol_horizons = [1, 5, 22] 
return_horizons = [1, 5, 22] 
# Create comparison table
comparison_data = []

for ticker in TICKERS:
    # GARCH-Frequentist
    comparison_data.append({
        'Stock': ticker,
        'Model': 'GARCH-Freq',
        'Prior': 'N/A',
        'Alpha': freq_results[ticker]['alpha'],
        'Beta': freq_results[ticker]['beta'],
        'h=1_MSE': freq_results[ticker]['forecasts']['h_1']['mse_return'],
        'h=5_MSE': freq_results[ticker]['forecasts']['h_5']['mse_return'],
        'h=22_MSE': freq_results[ticker]['forecasts']['h_22']['mse_return'],
        'h=1_volMSE': freq_results[ticker]['forecasts']['h_1']['mse_volatility'],
        'h=5_volMSE': freq_results[ticker]['forecasts']['h_5']['mse_volatility'],
        'h=22_volMSE': freq_results[ticker]['forecasts']['h_22']['mse_volatility'],
    })
    
    # GARCH-Bayesian (for each prior set)
    for prior_set in PRIOR_SETS:
        comparison_data.append({
            'Stock': ticker,
            'Model': 'GARCH-Bayes',
            'Prior': prior_set,
            'Alpha': bayes_results[ticker][prior_set]['alpha_mean'],
            'Beta': bayes_results[ticker][prior_set]['beta_mean'],
            'h=1_MSE': bayes_results[ticker][prior_set]['forecasts']['h_1']['mse_return'],
            'h=5_MSE': bayes_results[ticker][prior_set]['forecasts']['h_5']['mse_return'],
            'h=22_MSE': bayes_results[ticker][prior_set]['forecasts']['h_22']['mse_return'],
            'h=1_volMSE': bayes_results[ticker][prior_set]['forecasts']['h_1']['mse_volatility'],
            'h=5_volMSE': bayes_results[ticker][prior_set]['forecasts']['h_5']['mse_volatility'], 
            'h=22_volMSE': bayes_results[ticker][prior_set]['forecasts']['h_22']['mse_volatility'],
        })
    
    # GARCH-Hierarchical
    comparison_data.append({
        'Stock': ticker,
        'Model': 'GARCH-Hier',
        'Prior': 'Hier',
        'Alpha': hier_result[ticker]['alpha_mean'],
        'Beta': hier_result[ticker]['beta_mean'],
        'h=1_MSE': hier_forecasts[ticker]['h_1']['mse_return'],
        'h=5_MSE': hier_forecasts[ticker]['h_5']['mse_return'],
        'h=22_MSE': hier_forecasts[ticker]['h_22']['mse_return'],
        'h=1_volMSE': hier_forecasts[ticker]['h_1']['mse_volatility'],
        'h=5_volMSE': hier_forecasts[ticker]['h_5']['mse_volatility'], # ADDED h=5_volMSE
        'h=22_volMSE': hier_forecasts[ticker]['h_22']['mse_volatility'],
    })

comp_df = pd.DataFrame(comparison_data)

print(comp_df[['Stock', 'Model', 'Prior', 'Alpha', 'Beta', 'h=1_MSE', 'h=5_MSE', 'h=22_MSE', 'h=1_volMSE', 'h=5_volMSE', 'h=22_volMSE']].to_string(index=False))

comp_df.to_csv(FIG_DIR / 'garch_comparison_full.csv', index=False)


print("\nPLOTTING Volatility MSE Bar Charts")

num_vol_plots = len(vol_horizons)
total_plots_needed = num_vol_plots

n_cols = min(total_plots_needed, 3) 
n_rows = int(np.ceil(total_plots_needed / n_cols))

fig1, axes1 = plt.subplots(n_rows, n_cols, figsize=(n_cols * 5.5, n_rows * 4.5), constrained_layout=False)
axes1 = axes1.flatten() 

model_colors = ['#FF6B6B', '#4ECDC4', '#45B7D1']

last_assigned_ax_idx = -1 

for idx, h in enumerate(vol_horizons):
    current_ax_idx = idx
    
    if current_ax_idx < len(axes1):
        ax = axes1[current_ax_idx]
        last_assigned_ax_idx = current_ax_idx 
        
        col_name = f'h={h}_volMSE'
        
        if col_name in comp_df.columns:
            pivot_data = comp_df.pivot_table(values=col_name, index='Stock', columns='Model', aggfunc='first')
            
            if not pivot_data.empty:
                pivot_data.plot(kind='bar', ax=ax, width=0.8, color=model_colors, logy=True)
                ax.set_title(f'Volatility MSE (h={h} steps) - LOG SCALE', fontsize=10, fontweight='bold')
                ax.set_xlabel('Stock', fontsize=9)
                ax.set_ylabel('Vol MSE (log)', fontsize=9)
                ax.legend(title='Model', fontsize=8, title_fontsize=9)
                ax.grid(axis='y', alpha=0.3)
                ax.tick_params(axis='x', rotation=45, labelsize=8)
                ax.tick_params(axis='y', labelsize=8)
            else:
                ax.set_title(f'No data for h={h}', fontsize=10)
                ax.axis('off')
        else:
            ax.set_title(f'Column "{col_name}" not found', fontsize=10)
            ax.axis('off')
    else:
        print(f"Warning: Not enough subplots in 'axes1' array to plot horizon h={h}. Skipping remaining plots.")
        break

# Remove any unused subplots
start_removing_idx = last_assigned_ax_idx + 1 if last_assigned_ax_idx != -1 else 0

for i in range(start_removing_idx, len(axes1)):
    if axes1[i] is not None:
        try:
            fig1.delaxes(axes1[i])
        except ValueError:
            pass

fig1.suptitle('GARCH Model Comparison: Volatility Forecasting', fontsize=14, fontweight='bold')
plt.tight_layout(rect=[0, 0.03, 1, 0.95]) 
plt.savefig(FIG_DIR / 'garch_mse_comparison.png', dpi=300, bbox_inches='tight')
print(f"Saved: {FIG_DIR / 'garch_mse_comparison.png'}")
plt.close(fig1)


fig2, ax = plt.subplots(figsize=(10, 6)) 

vol_winners = {'GARCH-Freq': 0, 'GARCH-Bayes': 0, 'GARCH-Hier': 0}

for stock in TICKERS:
    stock_df = comp_df[comp_df['Stock'] == stock]
    
    for h in [1, 5, 22]: # ADDED h=5
        col = f'h={h}_volMSE'
        
        if col in stock_df.columns and not stock_df[col].isnull().all():
            best_row = stock_df.loc[stock_df[col].idxmin()]
            model_name = best_row['Model']
            
            min_mse_per_model = stock_df.groupby('Model')[col].min()
            
            if not min_mse_per_model.empty:
                overall_best_model = min_mse_per_model.idxmin()
                if overall_best_model in vol_winners:
                    vol_winners[overall_best_model] += 1
                else:
                    vol_winners[overall_best_model] = vol_winners.get(overall_best_model, 0) + 1
        else:
            print(f"Warning: Column '{col}' not found or all NaN for stock {stock}. Skipping.")

colors_winner = ['#FF6B6B', '#4ECDC4', '#45B7D1']

actual_winners_models = [m for m in vol_winners.keys()]
actual_winners_counts = [vol_winners[m] for m in actual_winners_models]

ax.bar(actual_winners_models, actual_winners_counts, color=colors_winner, alpha=0.8, edgecolor='black')
ax.set_title('Volatility MSE Winners Across Horizons (Lower = Better)', fontweight='bold')
ax.set_ylabel('Number of Wins')
ax.set_xlabel('Model')
ax.tick_params(axis='x', rotation=0)
ax.grid(axis='y', alpha=0.3)

plt.tight_layout()
plt.savefig(FIG_DIR / 'garch_winners_volatility_mse.png', dpi=300, bbox_inches='tight')
print(f"Saved: {FIG_DIR / 'garch_winners_volatility_mse.png'}")
plt.close(fig2)

COMPREHENSIVE COMPARISON
Stock       Model  Prior    Alpha     Beta  h=1_MSE  h=5_MSE  h=22_MSE   h=1_volMSE   h=5_volMSE  h=22_volMSE
 AAPL  GARCH-Freq    N/A 0.332214 0.100000 0.000455 0.000459  0.000478 1.718662e+03 1.717445e+03 1.717086e+03
 AAPL GARCH-Bayes medium 0.065491 0.850244 0.000455 0.000459  0.000478 1.335745e+00 1.302053e+00 1.292027e+00
 AAPL  GARCH-Hier   Hier 0.100209 0.808743 0.000455 0.000459  0.000478 1.364526e+02 1.361083e+02 1.360059e+02
 MSFT  GARCH-Freq    N/A 0.332214 0.100000 0.000246 0.000245  0.000235 1.448364e+03 1.447477e+03 1.447337e+03
 MSFT GARCH-Bayes medium 0.070687 0.841620 0.000246 0.000245  0.000235 1.317873e+00 1.291252e+00 1.286982e+00
 MSFT  GARCH-Hier   Hier 0.100116 0.808050 0.000246 0.000245  0.000235 1.359211e+02 1.356480e+02 1.356039e+02
GOOGL  GARCH-Freq    N/A 0.332215 0.100000 0.000435 0.000436  0.000425 2.735879e+03 2.734125e+03 2.733867e+03
GOOGL GARCH-Bayes medium 0.067098 0.845771 0.000435 0.000436  0.000425 1.293495e+00 1.255649e+0