The goal of this notebook is to carry out the Diebold-Mariano Equal Predictive Ability test.

# Import packages

In [1]:
import numpy as np
import pandas as pd
import statsmodels.formula.api as smf

import scipy.stats

# Custom functions

In [2]:
def newey_west_var_est(loss_differential):
    df = pd.DataFrame({'loss':loss_differential})

    reg = smf.ols('loss ~ 1',data=df).fit(cov_type='HAC', cov_kwds={'maxlags': 1})

    return reg.bse[0]

In [3]:
def squared_error(x, y):
    return (x - y)**2

In [4]:
def dm_epa_test_stat(loss_differential, ld_var):
    return np.mean(loss_differential) / np.sqrt(ld_var / len(loss_differential))

In [5]:
def critical_value(significance_level, n):
    return scipy.stats.t.ppf(q=1-significance_level, df=n-1)

In [6]:
def p_value(test_stat, n):
    return scipy.stats.t.sf(np.abs(test_stat), n-1) * 2

In [7]:
def dm_epa_test(full_model_predictions, control_model_predictions, y_true, alpha):

    n = len(full_model_predictions)
    
    full_model_loss = np.array([squared_error(full_model_predictions[i], y_true[i]) for i in range(n)])
    control_model_loss = np.array([squared_error(control_model_predictions[i], y_true[i]) for i in range(n)])
    loss_differential = full_model_loss - control_model_loss

    ld_var = newey_west_var_est(loss_differential)

    test_stat = dm_epa_test_stat(loss_differential, ld_var)

    c_val = critical_value(alpha, n-1)

    p_val = p_value(test_stat, n)

    return test_stat > c_val, p_val

# Constants / Parameters

In [8]:
n = 10 # Number of periods
alpha = 0.05 # Significance level for the test

# Generate dummy data

In [9]:
full_model_predictions = np.random.normal(size=n)
control_model_predictions = np.random.normal(size=n)
y_true = np.random.normal(size=n)

# Carry out the test

In [10]:
dm_epa_test(full_model_predictions, control_model_predictions, y_true, alpha)

(True, 0.003923164683899836)

# Archive

In [10]:
full_model_loss = np.array([squared_error(full_model_predictions[i], y_true[i]) for i in range(n)])
control_model_loss = np.array([squared_error(control_model_predictions[i], y_true[i]) for i in range(n)])
loss_differential = full_model_loss - control_model_loss

In [11]:
loss_differential

array([ 1.63076361, -0.37546618,  4.13710595, -0.93489127, -1.95766985,
        1.649415  ,  0.29445781,  2.50587887,  2.29383309,  1.30011835])

In [15]:
df = pd.DataFrame({'loss':loss_differential})

reg = smf.ols('loss ~ 1',data=df).fit(cov_type='HAC', cov_kwds={'maxlags': 1})

In [17]:
reg.summary()



0,1,2,3
Dep. Variable:,loss,R-squared:,0.0
Model:,OLS,Adj. R-squared:,0.0
Method:,Least Squares,F-statistic:,
Date:,"Tue, 03 Jan 2023",Prob (F-statistic):,
Time:,22:45:25,Log-Likelihood:,-19.606
No. Observations:,10,AIC:,41.21
Df Residuals:,9,BIC:,41.51
Df Model:,0,,
Covariance Type:,HAC,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,1.0544,0.479,2.202,0.028,0.116,1.993

0,1,2,3
Omnibus:,0.021,Durbin-Watson:,2.435
Prob(Omnibus):,0.99,Jarque-Bera (JB):,0.22
Skew:,-0.076,Prob(JB):,0.896
Kurtosis:,2.289,Cond. No.,1.0


In [21]:
ld_var = reg.bse[0]

In [22]:
test_stat = np.mean(loss_differential) / np.sqrt(ld_var / len(loss_differential))

In [27]:
test_stat > critical_value(alpha, n)

True