# Hybrid GARCH-Machine Learning Model for Stock Volatility Prediction

This notebook demonstrates the usage of our hybrid model that combines GARCH and machine learning approaches for enhanced stock volatility prediction.

In [None]:
import sys
sys.path.append('..')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta

from src.data.data_loader import StockDataLoader
from src.models.hybrid_model import HybridModel

%matplotlib inline
plt.style.use('seaborn')

## 1. Data Collection

Let's start by collecting historical data for some major tech stocks.

In [None]:
# Define stock symbols and date range
symbols = ['AAPL', 'MSFT', 'GOOGL', 'AMZN', 'META']
start_date = '2010-01-01'
end_date = datetime.now().strftime('%Y-%m-%d')

# Initialize data loader and fetch data
loader = StockDataLoader(symbols=symbols, start_date=start_date, end_date=end_date)
train_data, val_data = loader.get_clean_data()

print(f"Training data shape: {train_data.shape}")
print(f"Validation data shape: {val_data.shape}")

## 2. Data Visualization

Let's visualize the historical volatility patterns.

In [None]:
plt.figure(figsize=(15, 8))
for symbol in symbols:
    symbol_data = train_data[train_data['Symbol'] == symbol]
    plt.plot(symbol_data.index, symbol_data['RealizedVol'], label=symbol, alpha=0.7)
    
plt.title('Historical Realized Volatility')
plt.xlabel('Date')
plt.ylabel('Annualized Volatility')
plt.legend()
plt.grid(True)
plt.show()

## 3. Model Training

Now let's train our hybrid model using both GARCH and machine learning components.

In [None]:
# Initialize and train the hybrid model
model = HybridModel(
    ml_model_type='rf',  # Use Random Forest as the ML component
    garch_p=1,
    garch_q=1,
    ensemble_method='weighted'  # Use weighted averaging for combining predictions
)

# Fit the model and get performance metrics
metrics = model.fit(train_data)

print("\nModel Performance Metrics:")
for metric, value in metrics.items():
    print(f"{metric}: {value:.4f}")

## 4. Model Evaluation

Let's evaluate our model's performance on the validation set.

In [None]:
# Generate predictions for validation data
val_predictions = model.predict(val_data)

# Create a DataFrame with actual vs predicted values
results = pd.DataFrame({
    'Actual': val_data['RealizedVol'],
    'Predicted': val_predictions
})

# Scatter plot of actual vs predicted values
plt.figure(figsize=(10, 6))
plt.scatter(results['Actual'], results['Predicted'], alpha=0.5)
plt.plot([results['Actual'].min(), results['Actual'].max()],
         [results['Actual'].min(), results['Actual'].max()],
         'r--', lw=2)
plt.xlabel('Actual Volatility')
plt.ylabel('Predicted Volatility')
plt.title('Actual vs Predicted Volatility')
plt.grid(True)
plt.show()

# Calculate and display error metrics
mse = np.mean((results['Actual'] - results['Predicted'])**2)
rmse = np.sqrt(mse)
mae = np.mean(np.abs(results['Actual'] - results['Predicted']))
r2 = 1 - (np.sum((results['Actual'] - results['Predicted'])**2) /
          np.sum((results['Actual'] - results['Actual'].mean())**2))

print(f"\nValidation Set Metrics:")
print(f"MSE: {mse:.6f}")
print(f"RMSE: {rmse:.6f}")
print(f"MAE: {mae:.6f}")
print(f"R²: {r2:.6f}")

## 5. Feature Importance Analysis

Let's analyze which features are most important for our predictions.

In [None]:
if model.ml_model.model_type in ['rf', 'gb']:
    # Get feature importance from the ML model
    feature_importance = pd.DataFrame({
        'Feature': model.ml_model.feature_columns,
        'Importance': model.ml_model.model.feature_importances_
    }).sort_values('Importance', ascending=False)
    
    # Plot feature importance
    plt.figure(figsize=(12, 6))
    sns.barplot(x='Importance', y='Feature', data=feature_importance.head(10))
    plt.title('Top 10 Most Important Features')
    plt.xlabel('Feature Importance')
    plt.tight_layout()
    plt.show()

## 6. Save the Model

Finally, let's save our trained model for future use.

In [None]:
model_dir = '../models/saved_models/hybrid_model'
model.save_model(model_dir)
print(f"Model saved to {model_dir}")