In [45]:
import numpy as np
from scipy.stats import norm

def newey_west_bandwidth(T):
    """
    Calculate the Newey-West bandwidth for HAC variance estimation.
    
    Parameters:
        T (int): Number of observations.
    
    Returns:
        int: Optimal lag length.
    """
    return int(np.floor(4 * (T / 100) ** (2 / 9)))

def diebold_mariano_test(actual, forecast1, forecast2, loss="squared", significance_level=0.05):
    """
    Perform the Diebold-Mariano test for predictive accuracy.
    
    Parameters:
        actual (array-like): Actual observed values.
        forecast1 (array-like): Forecasted values from model 1.
        forecast2 (array-like): Forecasted values from model 2.
        loss (str): Loss function, either "squared" or "absolute".
        significance_level (float): Significance level for the hypothesis test.
    
    Returns:
        dict: Dictionary containing test statistic, p-value, and decision.
    """
    # Ensure inputs are numpy arrays
    actual, forecast1, forecast2 = np.array(actual), np.array(forecast1), np.array(forecast2)
    
    # Compute forecast errors
    e1 = actual - forecast1
    e2 = actual - forecast2
    
    # Compute loss differentials
    if loss == "squared":
        d = (e1 ** 2) - (e2 ** 2)
    elif loss == "absolute":
        d = np.abs(e1) - np.abs(e2)
    else:
        raise ValueError("Loss function must be 'squared' or 'absolute'")
    
    # Mean of the loss differential
    d_mean = np.mean(d)
    
    # Determine the Newey-West optimal lag
    T = len(d)
    optimal_lag = newey_west_bandwidth(T)
    
    # HAC variance estimation using Bartlett weights
    gamma = [np.sum(d[:T - lag] * d[lag:]) / T for lag in range(optimal_lag + 1)]
    hac_var = gamma[0] + 2 * sum((1 - lag / (optimal_lag + 1)) * gamma[lag] for lag in range(1, optimal_lag + 1))
    
    # Diebold-Mariano statistic
    dm_stat = d_mean / np.sqrt(hac_var / T)
    
    # p-value
    p_value = 2 * (1 - norm.cdf(np.abs(dm_stat)))  # Two-sided test
    
    # Decision
    reject_null = p_value < significance_level
    
    return {
        "DM_statistic": dm_stat,
        "p_value": p_value,
        "optimal_lag": optimal_lag,
        "reject_null": reject_null,
        "decision": "Reject H0" if reject_null else "Fail to reject H0"
    }



In [48]:
# Actual values and forecasts
np.random.seed(1234)
Horizon = 100
actual = np.random.normal(0, 1, Horizon)
forecast1 = np.random.normal(0, 1, Horizon)
forecast2 = np.random.normal(0, 1, Horizon)

# Perform Diebold-Mariano test
result = diebold_mariano_test(actual, forecast1, forecast2, loss="squared", significance_level=0.05)

# Display results
print("Diebold-Mariano Test Results:")
print(f"DM Statistic: {result['DM_statistic']:.4f}")
print(f"P-Value: {result['p_value']:.4f}")
print(f"Decision: {result['decision']}")


Diebold-Mariano Test Results:
DM Statistic: -0.8304
P-Value: 0.4063
Decision: Fail to reject H0
