# Deep Learning vs Statistical Models for VaR Prediction
## MENA Region Stock Market Analysis

This notebook implements and compares deep learning models (ANN, LSTM, CNN) with statistical models (ARIMA, SARIMA) for predicting Value-at-Risk in MENA stock indices.

In [132]:
# Import Required Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error
import warnings
warnings.filterwarnings('ignore')

# TensorFlow/Keras imports
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping

# Statsmodels for ARIMA/SARIMA
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.statespace.sarimax import SARIMAX

print("Libraries imported successfully!")

Libraries imported successfully!


In [133]:
# ==================== DATA LOADING & PREPROCESSING ====================

class DataPreprocessor:
    def __init__(self, lookback=60):
        self.lookback = lookback
        self.scaler = MinMaxScaler(feature_range=(0, 1))
        self.data_stats = {}
    
    def load_data(self, filepath):
        """Load CSV data and handle formatting"""
        df = pd.read_csv(filepath)
        df['Date'] = pd.to_datetime(df['Date'])
        df = df.sort_values('Date').reset_index(drop=True)
        df['Price'] = pd.to_numeric(df['Price'].str.replace(',', ''), errors='coerce')
        return df
    
    def handle_missing_values(self, df, method='forward_fill'):
        """
        Methods:
        - 'forward_fill': Use last known price (default, best for market holidays/weekends)
        - 'interpolate': Linear interpolation between values
        - 'drop': Remove rows with missing values
        """
        df = df.copy()
        missing_before = df['Price'].isna().sum()
        
        if missing_before == 0:
            print(f"✓ No missing values found")
            return df
        
        print(f"⚠ Missing values found: {missing_before} ({missing_before/len(df)*100:.2f}%)")
        
        if method == 'forward_fill':
            # Forward fill with backward fill for any remaining NaNs at the start
            df['Price'] = df['Price'].fillna(method='ffill').fillna(method='bfill')
            print(f"✓ Applied forward fill (best for market data)")
        elif method == 'interpolate':
            # Linear interpolation - good when prices jump unexpectedly
            df['Price'] = df['Price'].interpolate(method='linear')
            print(f"✓ Applied linear interpolation")
        elif method == 'drop':
            # Drop rows with missing prices
            df = df.dropna(subset=['Price']).reset_index(drop=True)
            print(f"✓ Dropped {missing_before} rows with missing prices")
        
        remaining_missing = df['Price'].isna().sum()
        if remaining_missing > 0:
            df = df.dropna(subset=['Price']).reset_index(drop=True)
            print(f"✓ Dropped remaining {remaining_missing} missing values")
        
        return df
    
    def detect_outliers(self, df, window=20, std_threshold=3):
        """Detect and report potential outliers using rolling statistics"""
        df = df.copy()
        df['Returns'] = df['Price'].pct_change() * 100
        
        # Calculate rolling statistics
        rolling_mean = df['Returns'].rolling(window=window).mean()
        rolling_std = df['Returns'].rolling(window=window).std()
        
        # Identify outliers (beyond 3 standard deviations)
        outlier_threshold_upper = rolling_mean + (std_threshold * rolling_std)
        outlier_threshold_lower = rolling_mean - (std_threshold * rolling_std)
        
        outliers = (df['Returns'] > outlier_threshold_upper) | (df['Returns'] < outlier_threshold_lower)
        num_outliers = outliers.sum()
        
        if num_outliers > 0:
            print(f"⚠ Detected {num_outliers} potential outliers ({num_outliers/len(df)*100:.2f}%)")
            outlier_dates = df[outliers]['Date'].dt.strftime('%Y-%m-%d').tolist()
            print(f"  Outlier dates: {', '.join(outlier_dates[:5])}" + 
                  (f" ... and {num_outliers-5} more" if num_outliers > 5 else ""))
        else:
            print(f"✓ No significant outliers detected")
        
        return df
    
    def calculate_returns(self, prices):
        """Calculate logarithmic returns"""
        returns = np.log(prices / prices.shift(1)).dropna()
        return returns
    
    def create_sequences(self, data, lookback):
        """Create sequences for LSTM"""
        X, y = [], []
        for i in range(len(data) - lookback):
            X.append(data[i:i + lookback])
            y.append(data[i + lookback])
        return np.array(X), np.array(y)
    
    def preprocess_for_deeplearning(self, df):
        """Preprocess data for deep learning models"""
        prices = df['Price'].values.reshape(-1, 1)
        returns = self.calculate_returns(df['Price']).values.reshape(-1, 1)
        
        # Normalize returns
        returns_scaled = self.scaler.fit_transform(returns)
        
        # Create sequences
        X, y = self.create_sequences(returns_scaled, self.lookback)
        
        return X, y, returns_scaled
    
    def split_train_test(self, X, y, test_size=0.2):
        """Split data into train and test sets"""
        split_idx = int(len(X) * (1 - test_size))
        X_train, X_test = X[:split_idx], X[split_idx:]
        y_train, y_test = y[:split_idx], y[split_idx:]
        return X_train, X_test, y_train, y_test
    
    def get_data_summary(self, df, name=""):
        """Get comprehensive data summary"""
        print(f"\n{'='*60}")
        print(f"DATA SUMMARY: {name}")
        print(f"{'='*60}")
        print(f"Date Range: {df['Date'].min().date()} to {df['Date'].max().date()}")
        print(f"Total Records: {len(df)}")
        print(f"Missing Values: {df['Price'].isna().sum()}")
        print(f"\nPrice Statistics:")
        print(f"  Min:    ${df['Price'].min():.2f}")
        print(f"  Max:    ${df['Price'].max():.2f}")
        print(f"  Mean:   ${df['Price'].mean():.2f}")
        print(f"  Median: ${df['Price'].median():.2f}")
        print(f"  Std:    ${df['Price'].std():.2f}")
        
        returns = self.calculate_returns(df['Price'])
        print(f"\nReturn Statistics (Log Returns):")
        print(f"  Mean:   {returns.mean()*100:.4f}%")
        print(f"  Std:    {returns.std()*100:.4f}%")
        print(f"  Min:    {returns.min()*100:.4f}%")
        print(f"  Max:    {returns.max()*100:.4f}%")
        print(f"  Skew:   {returns.skew():.4f}")
        print(f"  Kurt:   {returns.kurtosis():.4f}")
        print(f"{'='*60}\n")

print("✓ DataPreprocessor class created!")

✓ DataPreprocessor class created!


In [134]:
# ==================== MODEL EVALUATOR ====================

class ModelEvaluator:
    """Evaluate model performance with comprehensive metrics"""
    
    @staticmethod
    def calculate_metrics(y_true, y_pred):
        """Calculate evaluation metrics"""
        mae = mean_absolute_error(y_true, y_pred)
        rmse = np.sqrt(mean_squared_error(y_true, y_pred))
        mape = np.mean(np.abs((y_true - y_pred) / y_true)) * 100
        
        return {'MAE': mae, 'RMSE': rmse, 'MAPE': mape}
    
    @staticmethod
    def calculate_var(returns, confidence_level=0.95):
        """Calculate VaR using historical simulation"""
        var = np.percentile(returns, (1 - confidence_level) * 100)
        return var
    
    @staticmethod
    def calculate_cvar(returns, confidence_level=0.95):
        """Calculate Conditional Value at Risk (Expected Shortfall)"""
        var = np.percentile(returns, (1 - confidence_level) * 100)
        cvar = returns[returns <= var].mean()
        return cvar
    
    @staticmethod
    def confusion_matrix_var(returns, predicted_var, confidence_level=0.95, threshold=None):
        """
        Create confusion matrix for VaR predictions
        Compares actual vs predicted risk events
        """
        if threshold is None:
            threshold = np.percentile(returns, (1 - confidence_level) * 100)
        
        # Actual events: returns below threshold
        actual_events = (returns < threshold).astype(int)
        # Predicted events: returns below predicted VaR
        predicted_events = (returns < predicted_var).astype(int)
        
        # Confusion matrix
        TP = np.sum((actual_events == 1) & (predicted_events == 1))
        TN = np.sum((actual_events == 0) & (predicted_events == 0))
        FP = np.sum((actual_events == 0) & (predicted_events == 1))
        FN = np.sum((actual_events == 1) & (predicted_events == 0))
        
        # Calculate metrics
        accuracy = (TP + TN) / (TP + TN + FP + FN) if (TP + TN + FP + FN) > 0 else 0
        precision = TP / (TP + FP) if (TP + FP) > 0 else 0
        recall = TP / (TP + FN) if (TP + FN) > 0 else 0
        f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
        
        return {
            'TP': TP, 'TN': TN, 'FP': FP, 'FN': FN,
            'accuracy': accuracy,
            'precision': precision,
            'recall': recall,
            'f1_score': f1
        }
    
    @staticmethod
    def plot_confusion_matrix(cm_dict, model_name, ax=None):
        """Plot confusion matrix for VaR predictions"""
        if ax is None:
            fig, ax = plt.subplots(figsize=(6, 5))
        
        cm = np.array([[cm_dict['TN'], cm_dict['FP']], 
                       [cm_dict['FN'], cm_dict['TP']]])
        
        sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
                    xticklabels=['No Risk', 'Risk'], 
                    yticklabels=['No Risk', 'Risk'],
                    cbar=False, ax=ax, annot_kws={'size': 12, 'weight': 'bold'})
        
        ax.set_ylabel('Actual', fontsize=11, fontweight='bold')
        ax.set_xlabel('Predicted', fontsize=11, fontweight='bold')
        ax.set_title(f'{model_name} - VaR Confusion Matrix\n(Confidence: 95%)', 
                     fontsize=12, fontweight='bold')
        
        # Add metrics text
        metrics_text = f"Accuracy: {cm_dict['accuracy']:.3f}\nPrecision: {cm_dict['precision']:.3f}\n"
        metrics_text += f"Recall: {cm_dict['recall']:.3f}\nF1: {cm_dict['f1_score']:.3f}"
        ax.text(1.4, 0.5, metrics_text, transform=ax.transAxes, 
                fontsize=10, verticalalignment='center',
                bbox=dict(boxstyle='round', facecolor='lightblue', alpha=0.5))

print("✓ ModelEvaluator class created with VaR and confusion matrix!")

✓ ModelEvaluator class created with VaR and confusion matrix!


In [135]:
# ==================== ARTIFICIAL NEURAL NETWORK ====================

class ANNModel:
    """Artificial Neural Network for return prediction"""
    
    def __init__(self, input_shape):
        self.model = Sequential([
            Dense(128, activation='relu', input_shape=(input_shape,)),
            Dropout(0.2),
            Dense(64, activation='relu'),
            Dropout(0.2),
            Dense(32, activation='relu'),
            Dropout(0.1),
            Dense(1, activation='linear')
        ])
        self.model.compile(optimizer=Adam(learning_rate=0.001), loss='mse', metrics=['mae'])
        self.history = None
        self.training_time = 0
    
    def train(self, X_train, y_train, epochs=50, batch_size=32, validation_split=0.1):
        """Train the ANN model"""
        import time
        start_time = time.time()
        
        early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
        self.history = self.model.fit(
            X_train, y_train,
            epochs=epochs,
            batch_size=batch_size,
            validation_split=validation_split,
            callbacks=[early_stop],
            verbose=0
        )
        
        self.training_time = time.time() - start_time
        return self.history
    
    def predict(self, X):
        """Make predictions"""
        return self.model.predict(X, verbose=0)
    
    def get_summary(self):
        """Get model summary"""
        return self.model.summary()

print("✓ ANNModel class created!")

✓ ANNModel class created!


In [136]:
# ==================== LSTM NEURAL NETWORK ====================

class LSTMModel:
    """Long Short-Term Memory Network for sequential prediction"""
    
    def __init__(self, input_shape):
        self.model = Sequential([
            LSTM(100, return_sequences=True, input_shape=(input_shape[0], input_shape[1])),
            Dropout(0.2),
            LSTM(50, return_sequences=False),
            Dropout(0.2),
            Dense(25, activation='relu'),
            Dense(1, activation='linear')
        ])
        self.model.compile(optimizer=Adam(learning_rate=0.001), loss='mse', metrics=['mae'])
        self.history = None
        self.training_time = 0
    
    def train(self, X_train, y_train, epochs=50, batch_size=32, validation_split=0.1):
        """Train the LSTM model"""
        import time
        start_time = time.time()
        
        early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
        self.history = self.model.fit(
            X_train, y_train,
            epochs=epochs,
            batch_size=batch_size,
            validation_split=validation_split,
            callbacks=[early_stop],
            verbose=0
        )
        
        self.training_time = time.time() - start_time
        return self.history
    
    def predict(self, X):
        """Make predictions"""
        return self.model.predict(X, verbose=0)
    
    def get_summary(self):
        """Get model summary"""
        return self.model.summary()

print("✓ LSTMModel class created!")

✓ LSTMModel class created!


In [137]:
# ==================== CONVOLUTIONAL NEURAL NETWORK ====================

class CNNModel:
    """Convolutional Neural Network adapted for time series"""
    
    def __init__(self, input_shape):
        # Reshape input for CNN: (samples, timesteps, features, 1)
        self.model = Sequential([
            # First Conv Block
            keras.layers.Conv1D(32, 3, padding='same', activation='relu', input_shape=(input_shape[0], input_shape[1])),
            keras.layers.Conv1D(32, 3, activation='relu'),
            keras.layers.MaxPooling1D(2),
            Dropout(0.25),
            
            # Second Conv Block
            keras.layers.Conv1D(64, 3, padding='same', activation='relu'),
            keras.layers.Conv1D(64, 3, activation='relu'),
            keras.layers.MaxPooling1D(2),
            Dropout(0.25),
            
            # Third Conv Block
            keras.layers.Conv1D(128, 3, padding='same', activation='relu'),
            keras.layers.Conv1D(128, 3, activation='relu'),
            keras.layers.MaxPooling1D(2),
            Dropout(0.25),
            
            # Dense layers
            keras.layers.Flatten(),
            Dense(256, activation='relu'),
            Dropout(0.5),
            Dense(128, activation='relu'),
            Dropout(0.3),
            Dense(1, activation='linear')
        ])
        
        self.model.compile(optimizer=Adam(learning_rate=0.001), loss='mse', metrics=['mae'])
        self.history = None
        self.training_time = 0
    
    def train(self, X_train, y_train, epochs=50, batch_size=32, validation_split=0.1):
        """Train the CNN model"""
        import time
        start_time = time.time()
        
        early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
        self.history = self.model.fit(
            X_train, y_train,
            epochs=epochs,
            batch_size=batch_size,
            validation_split=validation_split,
            callbacks=[early_stop],
            verbose=0
        )
        
        self.training_time = time.time() - start_time
        return self.history
    
    def predict(self, X):
        """Make predictions"""
        return self.model.predict(X, verbose=0)
    
    def get_summary(self):
        """Get model summary"""
        return self.model.summary()

print("✓ CNNModel class created!")

✓ CNNModel class created!


In [138]:
# ==================== ARIMA MODEL ====================

class ARIMAModel:
    """ARIMA model for time series forecasting"""
    
    def __init__(self, order=(5, 1, 2)):
        self.order = order
        self.model = None
        self.fitted_model = None
        self.training_time = 0
        self.predictions = []
    
    def fit(self, returns):
        """Fit ARIMA model"""
        import time
        start_time = time.time()
        
        self.model = ARIMA(returns, order=self.order)
        self.fitted_model = self.model.fit()
        
        self.training_time = time.time() - start_time
        return self.fitted_model
    
    def predict(self, steps=1):
        """Make predictions"""
        forecast = self.fitted_model.get_forecast(steps=steps)
        return forecast.predicted_mean.values
    
    def get_summary(self):
        """Get model summary"""
        if self.fitted_model:
            return self.fitted_model.summary()
        return "Model not fitted yet"

print("✓ ARIMAModel class created!")

✓ ARIMAModel class created!


In [139]:
# ==================== SARIMA MODEL ====================

class SARIMAModel:
    """Seasonal ARIMA model for seasonal time series"""
    
    def __init__(self, order=(1, 1, 1), seasonal_order=(1, 1, 1, 12)):
        self.order = order
        self.seasonal_order = seasonal_order
        self.model = None
        self.fitted_model = None
        self.training_time = 0
        self.predictions = []
    
    def fit(self, returns):
        """Fit SARIMA model"""
        import time
        start_time = time.time()
        
        self.model = SARIMAX(returns, order=self.order, seasonal_order=self.seasonal_order)
        self.fitted_model = self.model.fit(disp=False)
        
        self.training_time = time.time() - start_time
        return self.fitted_model
    
    def predict(self, steps=1):
        """Make predictions"""
        forecast = self.fitted_model.get_forecast(steps=steps)
        return forecast.predicted_mean.values
    
    def get_summary(self):
        """Get model summary"""
        if self.fitted_model:
            return self.fitted_model.summary()
        return "Model not fitted yet"

print("✓ SARIMAModel class created!")

✓ SARIMAModel class created!


In [140]:
# ==================== MAIN EXECUTION SETUP ====================

print("=" * 80)
print("VaR PREDICTION: DEEP LEARNING VS STATISTICAL MODELS")
print("MENA Region Stock Market Indices")
print("=" * 80)

# Configuration
indices = ['Tunindex', 'ADI', 'MASI', 'TASI']
lookback = 60
epochs = 50

# Paths
data_path = r'C:\Users\sfaxi\Desktop\Deep Learning\data'

results = {}

print(f"\nConfiguration:")
print(f"  Indices: {indices}")
print(f"  Lookback: {lookback}")
print(f"  Epochs: {epochs}")

VaR PREDICTION: DEEP LEARNING VS STATISTICAL MODELS
MENA Region Stock Market Indices

Configuration:
  Indices: ['Tunindex', 'ADI', 'MASI', 'TASI']
  Lookback: 60
  Epochs: 50


In [141]:
# ==================== DATA LOADING & INITIAL CLEANING ====================

preprocessor = DataPreprocessor(lookback=60)

datasets = {
    'ADI': 'data/ADI.csv',
    'CAC40': 'data/CAC40.csv',
    'MASI': 'data/MASI.csv',
    'S&P500': 'data/S&P500.csv',
    'TASI': 'data/TASI.csv',
    'Tunindex': 'data/Tunindex.csv'
}

all_data = {}
print("\n" + "="*60)
print("LOADING & CLEANING DATA")
print("="*60)

for name, filepath in datasets.items():
    try:
        print(f"\nProcessing {name}...")
        df = preprocessor.load_data(filepath)
        print(f"  Loaded {len(df)} records")
        
        df = preprocessor.handle_missing_values(df, method='forward_fill')
        df = preprocessor.detect_outliers(df, window=20, std_threshold=3)
        preprocessor.get_data_summary(df, name)
        
        all_data[name] = df
        
    except Exception as e:
        print(f"  ✗ Error processing {name}: {str(e)}")

print(f"\n✓ Successfully loaded and cleaned {len(all_data)} datasets")


LOADING & CLEANING DATA

Processing ADI...
  Loaded 2585 records
✓ No missing values found
⚠ Detected 13 potential outliers (0.50%)
  Outlier dates: 2005-03-02, 2007-10-21, 2008-01-22, 2009-11-30, 2011-01-30 ... and 8 more

DATA SUMMARY: ADI
Date Range: 2005-01-03 to 2014-12-31
Total Records: 2585
Missing Values: 0

Price Statistics:
  Min:    $2136.64
  Max:    $6237.98
  Mean:   $3588.18
  Median: $3298.11
  Std:    $1031.20

Return Statistics (Log Returns):
  Mean:   0.0131%
  Std:    1.2701%
  Min:    -8.6793%
  Max:    7.6295%
  Skew:   -0.0614
  Kurt:   6.9896


Processing CAC40...
  Loaded 2560 records
✓ No missing values found
⚠ Detected 9 potential outliers (0.35%)
  Outlier dates: 2007-02-27, 2008-01-21, 2008-09-19, 2010-05-10, 2012-03-06 ... and 4 more

DATA SUMMARY: CAC40
Date Range: 2005-01-03 to 2014-12-31
Total Records: 2560
Missing Values: 0

Price Statistics:
  Min:    $2519.29
  Max:    $6168.15
  Mean:   $4174.61
  Median: $4038.35
  Std:    $797.36

Return Statisti

In [142]:
# ==================== PROCESS ALL INDICES ====================

print(f"\n{'=' * 80}")
print(f"PROCESSING ALL INDICES")
print(f"{'=' * 80}\n")

# Loop through all indices
for index in indices:
    try:
        print(f"\n{'=' * 80}")
        print(f"Processing: {index}")
        print(f"{'=' * 80}\n")
        
        preprocessor = DataPreprocessor(lookback=lookback)
        df = preprocessor.load_data(rf"{data_path}\{index}.csv")
        
        print(f"Data shape: {df.shape}")
        print(f"Date range: {df['Date'].min()} to {df['Date'].max()}\n")
        
        X, y, returns_scaled = preprocessor.preprocess_for_deeplearning(df)
        X_train, X_test, y_train, y_test = preprocessor.split_train_test(X, y)
        
        print(f"Training set size: {X_train.shape[0]}")
        print(f"Test set size: {X_test.shape[0]}\n")
        
        # ==================== TRAIN ANN MODEL ====================
        print("Training ANN Model...")
        ann_model = ANNModel(input_shape=X_train.shape[1] * X_train.shape[2])
        X_train_flat = X_train.reshape(X_train.shape[0], -1)
        X_test_flat = X_test.reshape(X_test.shape[0], -1)
        
        history_ann = ann_model.train(X_train_flat, y_train, epochs=epochs)
        y_pred_ann = ann_model.predict(X_test_flat).flatten()
        metrics_ann = ModelEvaluator.calculate_metrics(y_test, y_pred_ann)
        
        print(f"  Accuracy: {(1 - metrics_ann['MAPE']/100):.4f} | MAE: {metrics_ann['MAE']:.8f}\n")
        
        # ==================== TRAIN LSTM MODEL ====================
        print("Training LSTM Model...")
        lstm_model = LSTMModel(input_shape=(X_train.shape[1], X_train.shape[2]))
        history_lstm = lstm_model.train(X_train, y_train, epochs=epochs)
        y_pred_lstm = lstm_model.predict(X_test).flatten()
        metrics_lstm = ModelEvaluator.calculate_metrics(y_test, y_pred_lstm)
        
        print(f"  Accuracy: {(1 - metrics_lstm['MAPE']/100):.4f} | MAE: {metrics_lstm['MAE']:.8f}\n")
        
        # ==================== TRAIN CNN MODEL ====================
        print("Training CNN Model...")
        cnn_model = CNNModel(input_shape=(X_train.shape[1], X_train.shape[2]))
        history_cnn = cnn_model.train(X_train, y_train, epochs=epochs)
        y_pred_cnn = cnn_model.predict(X_test).flatten()
        metrics_cnn = ModelEvaluator.calculate_metrics(y_test, y_pred_cnn)
        
        print(f"  Accuracy: {(1 - metrics_cnn['MAPE']/100):.4f} | MAE: {metrics_cnn['MAE']:.8f}\n")
        
        # ==================== TRAIN ARIMA MODEL ====================
        print("Training ARIMA Model...")
        returns = preprocessor.calculate_returns(df['Price'])
        arima_model = ARIMAModel(order=(5, 1, 2))
        arima_model.fit(returns)
        
        y_pred_arima = np.array([arima_model.predict(steps=1)[0] for _ in range(len(y_test))])
        y_pred_arima_denorm = preprocessor.scaler.inverse_transform(y_pred_arima.reshape(-1, 1)).flatten()
        y_test_denorm = preprocessor.scaler.inverse_transform(y_test.reshape(-1, 1)).flatten()
        metrics_arima = ModelEvaluator.calculate_metrics(y_test_denorm, y_pred_arima_denorm)
        
        print(f"  Accuracy: {(1 - metrics_arima['MAPE']/100):.4f} | MAE: {metrics_arima['MAE']:.8f}\n")
        
        # ==================== TRAIN SARIMA MODEL ====================
        print("Training SARIMA Model...")
        try:
            sarima_model = SARIMAModel(order=(1, 1, 1), seasonal_order=(1, 1, 1, 12))
            sarima_model.fit(returns)
            
            y_pred_sarima = np.array([sarima_model.predict(steps=1)[0] for _ in range(len(y_test))])
            y_pred_sarima_denorm = preprocessor.scaler.inverse_transform(y_pred_sarima.reshape(-1, 1)).flatten()
            metrics_sarima = ModelEvaluator.calculate_metrics(y_test_denorm, y_pred_sarima_denorm)
            
            print(f"  Accuracy: {(1 - metrics_sarima['MAPE']/100):.4f} | MAE: {metrics_sarima['MAE']:.8f}\n")
        except Exception as e:
            print(f"  ⚠ SARIMA failed: {str(e)}\n")
            metrics_sarima = metrics_arima.copy()
        
        # ==================== CALCULATE VaR ====================
        returns_denorm = preprocessor.scaler.inverse_transform(returns.values.reshape(-1, 1)).flatten()
        
        n_bootstrap = 1000
        var_95_ann_bs = [np.percentile(np.random.choice(y_pred_ann, len(y_pred_ann), replace=True), 5) for _ in range(n_bootstrap)]
        var_99_ann_bs = [np.percentile(np.random.choice(y_pred_ann, len(y_pred_ann), replace=True), 1) for _ in range(n_bootstrap)]
        var_95_lstm_bs = [np.percentile(np.random.choice(y_pred_lstm, len(y_pred_lstm), replace=True), 5) for _ in range(n_bootstrap)]
        var_99_lstm_bs = [np.percentile(np.random.choice(y_pred_lstm, len(y_pred_lstm), replace=True), 1) for _ in range(n_bootstrap)]
        var_95_cnn_bs = [np.percentile(np.random.choice(y_pred_cnn, len(y_pred_cnn), replace=True), 5) for _ in range(n_bootstrap)]
        var_99_cnn_bs = [np.percentile(np.random.choice(y_pred_cnn, len(y_pred_cnn), replace=True), 1) for _ in range(n_bootstrap)]
        var_95_arima_bs = [np.percentile(np.random.choice(y_pred_arima_denorm, len(y_pred_arima_denorm), replace=True), 5) for _ in range(n_bootstrap)]
        var_99_arima_bs = [np.percentile(np.random.choice(y_pred_arima_denorm, len(y_pred_arima_denorm), replace=True), 1) for _ in range(n_bootstrap)]
        
        var_95_ann = np.mean(var_95_ann_bs)
        var_99_ann = np.mean(var_99_ann_bs)
        var_95_lstm = np.mean(var_95_lstm_bs)
        var_99_lstm = np.mean(var_99_lstm_bs)
        var_95_cnn = np.mean(var_95_cnn_bs)
        var_99_cnn = np.mean(var_99_cnn_bs)
        var_95_arima = np.mean(var_95_arima_bs)
        var_99_arima = np.mean(var_99_arima_bs)
        
        # Create and store results
        performance_summary = {
            'Model': ['ANN', 'LSTM', 'CNN', 'ARIMA', 'SARIMA'],
            'Accuracy': [
                1 - metrics_ann['MAPE']/100,
                1 - metrics_lstm['MAPE']/100,
                1 - metrics_cnn['MAPE']/100,
                1 - metrics_arima['MAPE']/100,
                1 - metrics_sarima['MAPE']/100
            ],
            'MAE': [metrics_ann['MAE'], metrics_lstm['MAE'], metrics_cnn['MAE'], metrics_arima['MAE'], metrics_sarima['MAE']],
            'RMSE': [metrics_ann['RMSE'], metrics_lstm['RMSE'], metrics_cnn['RMSE'], metrics_arima['RMSE'], metrics_sarima['RMSE']],
            'MAPE (%)': [metrics_ann['MAPE'], metrics_lstm['MAPE'], metrics_cnn['MAPE'], metrics_arima['MAPE'], metrics_sarima['MAPE']],
            'VaR 95%': [var_95_ann, var_95_lstm, var_95_cnn, var_95_arima, var_95_arima],
            'VaR 99%': [var_99_ann, var_99_lstm, var_99_cnn, var_99_arima, var_99_arima],
            'Type': ['Deep Learning', 'Deep Learning', 'Deep Learning', 'Statistical', 'Statistical']
        }
        
        perf_df = pd.DataFrame(performance_summary)
        best_model_idx = perf_df['MAE'].idxmin()
        
        print(f"\n{'='*80}")
        print(f"BEST MODEL FOR {index}: {perf_df.loc[best_model_idx, 'Model']}")
        print(f"{'='*80}")
        print(f"Accuracy: {perf_df.loc[best_model_idx, 'Accuracy']:.4f} | MAE: {perf_df.loc[best_model_idx, 'MAE']:.8f}\n")
        
        # Store results
        results[index] = perf_df.to_dict('list')
        results[index]['Index'] = index
        
    except Exception as e:
        print(f"✗ Error processing {index}: {str(e)}\n")

print(f"\n{'=' * 80}")
print(f"✓ All indices processed successfully")
print(f"{'=' * 80}\n")


PROCESSING ALL INDICES


Processing: Tunindex

Data shape: (2471, 7)
Date range: 2005-01-03 00:00:00 to 2014-12-31 00:00:00

Training set size: 1928
Test set size: 482

Training ANN Model...
  Accuracy: 0.9379 | MAE: 0.03152391

Training LSTM Model...
  Accuracy: 0.9360 | MAE: 0.03207921

Training CNN Model...
  Accuracy: 0.9381 | MAE: 0.03156533

Training ARIMA Model...
  Accuracy: -184.4014 | MAE: 0.05017853

Training SARIMA Model...
  Accuracy: -184.8416 | MAE: 0.05029732


BEST MODEL FOR Tunindex: ANN
Accuracy: 0.9379 | MAE: 0.03152391


Processing: ADI

Data shape: (2585, 7)
Date range: 2005-01-03 00:00:00 to 2014-12-31 00:00:00

Training set size: 2019
Test set size: 505

Training ANN Model...
  Accuracy: 0.9004 | MAE: 0.04715889

Training LSTM Model...
  Accuracy: 0.9001 | MAE: 0.04790468

Training CNN Model...
  Accuracy: 0.9005 | MAE: 0.04790593

Training ARIMA Model...
  Accuracy: -222.2122 | MAE: 0.08694276

Training SARIMA Model...
  Accuracy: -223.7510 | MAE: 0.08753517



In [143]:
# ==================== FINAL RESULTS TABLE - ALL INDICES ====================

print("\n" + "=" * 160)
print("FINAL RESULTS - ALL INDICES & BEST MODELS")
print("=" * 160 + "\n")

# Create summary table for all indices
summary_data = []

for idx, result in results.items():
    mae_values = result['MAE']
    best_idx = mae_values.index(min(mae_values))
    best_model = result['Model'][best_idx]
    best_accuracy = result['Accuracy'][best_idx]
    best_mae = result['MAE'][best_idx]
    best_var_95 = result['VaR 95%'][best_idx]
    best_var_99 = result['VaR 99%'][best_idx]
    
    summary_data.append({
        'Index': idx,
        'Best Model': best_model,
        'Accuracy': best_accuracy,
        'MAE': best_mae,
        'VaR 95%': best_var_95,
        'VaR 99%': best_var_99
    })

summary_df = pd.DataFrame(summary_data)

print(f"{'Index':<15} {'Best Model':<15} {'Accuracy':<12} {'MAE':<15} {'VaR 95%':<15} {'VaR 99%':<15}")
print("─" * 160)

for _, row in summary_df.iterrows():
    print(f"{row['Index']:<15} {row['Best Model']:<15} {row['Accuracy']:<12.4f} {row['MAE']:<15.8f} {row['VaR 95%']:<15.8f} {row['VaR 99%']:<15.8f}")

print("\n" + "=" * 160)
print("KEY FINDINGS\n")

overall_best_idx = summary_df['MAE'].idxmin()
print(f"✓ Best Overall: {summary_df.loc[overall_best_idx, 'Index']} - {summary_df.loc[overall_best_idx, 'Best Model']}")
print(f"  Accuracy: {summary_df.loc[overall_best_idx, 'Accuracy']:.4f}\n")

print("✓ Best Model Per Index:")
for _, row in summary_df.iterrows():
    print(f"  • {row['Index']}: {row['Best Model']} (Accuracy: {row['Accuracy']:.4f})")

print("\n" + "=" * 160 + "\n")


FINAL RESULTS - ALL INDICES & BEST MODELS

Index           Best Model      Accuracy     MAE             VaR 95%         VaR 99%        
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Tunindex        ANN             0.9379       0.03152391      0.54492307      0.54243565     
ADI             ANN             0.9004       0.04715889      0.52467543      0.52062058     
MASI            CNN             0.9232       0.04022718      0.52172631      0.52171826     
TASI            CNN             0.9348       0.02938790      0.52349389      0.52348596     

KEY FINDINGS

✓ Best Overall: TASI - CNN
  Accuracy: 0.9348

✓ Best Model Per Index:
  • Tunindex: ANN (Accuracy: 0.9379)
  • ADI: ANN (Accuracy: 0.9004)
  • MASI: CNN (Accuracy: 0.9232)
  • TASI: CNN (Accuracy: 0.9348)


