# EGARCH-SVM Hybrid Model

In [1]:
import numpy as np
import pandas as pd
from arch import arch_model
from sklearn.svm import SVR
from sklearn.preprocessing import StandardScaler

In [2]:
# The dataset contains 2019 observations and 9 columns.
file_path = "..\data\ALGO_daily.csv"
df = pd.read_csv(file_path)
df.set_index(df.columns[0], inplace=True)
df.head()

Unnamed: 0_level_0,open,high,low,close,return,return2,RV,lnRV,lnRV_lag1,lnRV_5D_lag1,lnRV_7D_lag1,lnRV_22D_lag1,lnRV_30D_lag1,lnRV_60D_lag1,lnRV_lag3,lnRV_5D_lag3,lnRV_7D_lag3,lnRV_22D_lag3,lnRV_30D_lag3,lnRV_60D_lag3
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
2019-06-23,2.19,2.234,1.8,1.934,-0.125224,0.015681,0.020962,-3.865058,,,,,,,,,,,,
2019-06-24,1.932,2.02,1.301,1.416,-0.311754,0.097191,0.025926,-3.652501,-3.865058,,,,,,,,,,,
2019-06-25,1.417,1.92,1.314,1.54,0.083946,0.007047,0.042346,-3.161881,-3.652501,,,,,,,,,,,
2019-06-26,1.538,1.875,1.457,1.595,0.035091,0.001231,0.035729,-3.331801,-3.161881,,,,,,-3.865058,,,,,
2019-06-27,1.595,1.597,1.371,1.419,-0.116921,0.013671,0.019125,-3.956779,-3.331801,,,,,,-3.652501,,,,,


In [3]:
# Train-Test Split (80% train, 20% test)
train_size = int(0.8 * len(df))
test_size = len(df) - train_size
r_train, r_test = df['return'][:train_size], df['return'][train_size:]
RV_train, RV_test = df['RV'][:train_size], df['RV'][train_size:]

print("Training Set:", r_train.shape, RV_train.shape)
print("Test Set:", r_test.shape, RV_test.shape)
print(r_train.index[0], r_test.index[0])

Training Set: (1662,) (1662,)
Test Set: (416,) (416,)
2019-06-23 2024-01-10


In [None]:
def garch_svr_hybrid(log_returns):
    """
    Parameters:
    log_returns (array-like): Daily log return series r₁,..rₙ for the asset
    
    Returns:
    tuple: (forecasted_epsilon_squared, garch_forecast, svr_forecast)
    """
    
    # Step 1: Estimate GARCH parameters with ML method
    # Using GARCH(1,1) model
    garch_model = arch_model(log_returns, vol='Garch', p=1, q=1, rescale=False)
    garch_results = garch_model.fit(disp='off')
    
    # Step 2: Compute in-sample conditional variances σ²₁,..,σ²ₙ
    conditional_variances = garch_results.conditional_volatility**2
    
    # Step 3: Compute sequence M₁,..,Mₙ where Mₜ = ε²ₜ - σ²ₜ
    residuals = garch_results.resid
    epsilon_squared = residuals**2
    M_sequence = epsilon_squared - conditional_variances
    
    # Prepare data matrix for SVR training
    X = np.column_stack([
        conditional_variances[:-1],  # σ²ₜ₋₁
        M_sequence[:-1],             # Mₜ₋₁
        epsilon_squared[:-1]         # ε²ₜ₋₁
    ])
    y = M_sequence[1:]               # Mₜ (target)
    
    # Standardize features
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    
    # Step 4: Train SVR on the data matrix
    svr_model = SVR(kernel='rbf', C=1.0, epsilon=0.1)
    svr_model.fit(X_scaled, y)
    
    # Prepare the most recent data point for forecasting
    last_X = np.array([
        conditional_variances.iloc[-1],   # σ²ₙ
        M_sequence.iloc[-1],              # Mₙ
        epsilon_squared.iloc[-1]          # ε²ₙ
    ]).reshape(1, -1)
    last_X_scaled = scaler.transform(last_X)
    
    # Step 5: Compute one-day-ahead forecast of Mₙ₊₁ with SVR
    M_forecast = svr_model.predict(last_X_scaled)[0]
    
    # Step 6: Compute one-day-ahead forecast of σₙ₊₁ using GARCH model
    garch_forecast = garch_results.forecast(horizon=1)
    sigma_forecast = garch_forecast.variance.values[-1, 0]
    
    # Step 7: Compute forecasted value of ε²ₙ₊₁ = Mₙ₊₁ + σ²ₙ₊₁
    epsilon_squared_forecast = M_forecast + sigma_forecast
    
    return epsilon_squared_forecast, sigma_forecast, M_forecast


In [13]:
# Run the hybrid model
forecast, sigma_fc, M_fc = garch_svr_hybrid(r_train)

print(f"Forecasted ε²ₙ₊₁: {forecast:.6f}")
print(f"GARCH forecast σ²ₙ₊₁: {sigma_fc:.6f}")
print(f"SVR forecast Mₙ₊₁: {M_fc:.6f}")

Forecasted ε²ₙ₊₁: 0.091059
GARCH forecast σ²ₙ₊₁: 0.003721
SVR forecast Mₙ₊₁: 0.087338


In [29]:
predicted_volatility = []
for i in range(len(r_test)):
    log_returns = pd.concat([r_train, r_test[:i]])
    _, sigma_fc, _ = garch_svr_hybrid(log_returns)
    predicted_volatility.append(sigma_fc)

In [30]:
predicted_volatility = pd.DataFrame(predicted_volatility, index = r_test.index)

In [31]:
predicted_volatility

Unnamed: 0_level_0,0
timestamp,Unnamed: 1_level_1
2024-01-10,0.003721
2024-01-11,0.003977
2024-01-12,0.003559
2024-01-13,0.004015
2024-01-14,0.003250
...,...
2025-02-24,0.001613
2025-02-25,0.003663
2025-02-26,0.003309
2025-02-27,0.002992
