The goal of this notebook is to carry out the Clark-West Equal Predictive Ability test.

# Import packages

In [None]:
import numpy as np
import pandas as pd
import statsmodels.formula.api as smf

import scipy.stats

# Custom functions

In [None]:
def squared_error(x, y):
    return (x - y)**2

In [None]:
def f(true_values, model1_predictions, model2_predictions):
    return squared_error(true_values, model1_predictions) - \
        (squared_error(true_values, model2_predictions) - squared_error(model1_predictions, model2_predictions))

In [None]:
def clark_west_epa_test_stat(true_values, model1_predictions, model2_predictions):

    df = pd.DataFrame({'loss': f(true_values, model1_predictions, model2_predictions)})

    reg = smf.ols('loss ~ 1', data=df).fit(cov_type='HAC', cov_kwds={'maxlags': 1})

    return reg.params[0] / reg.bse[0]

In [None]:
def critical_value(significance_level, n):
    """
    Note that we are assuming a 2-sided hypothesis test here.
    """
    return scipy.stats.t.ppf(q=1-significance_level/2, df=n-1)

In [None]:
def p_value(test_stat, n):
    """
    Note that we are assuming a 2-sided hypothesis test here.
    """
    return scipy.stats.t.sf(np.abs(test_stat), n-1) * 2

In [None]:
def clark_west_epa_test(full_model_predictions, control_model_predictions, y_true, alpha):

    n = len(full_model_predictions)

    test_stat = clark_west_epa_test_stat(y_true, control_model_predictions, full_model_predictions)

    c_val = critical_value(alpha, n-1)

    p_val = p_value(test_stat, n)

    return test_stat > c_val, p_val

# Constants / Parameters

In [None]:
n = 10 # Number of periods
alpha = 0.05 # Significance level for the test

# Generate dummy data

In [None]:
full_model_predictions = np.random.normal(size=n)
control_model_predictions = np.random.normal(size=n)
y_true = np.random.normal(size=n)

# Carry out the test

In [None]:
clark_west_epa_test(full_model_predictions, control_model_predictions, y_true, alpha)

# Archive

In [None]:
reg = clark_west_epa(y_true, control_model_predictions, full_model_predictions)

reg.params[0] / reg.bse[0]

reg.summary()

In [None]:
# Test p-value function
p_value(1.96, 10000)

In [None]:
full_model_loss = np.array([squared_error(full_model_predictions[i], y_true[i]) for i in range(n)])
control_model_loss = np.array([squared_error(control_model_predictions[i], y_true[i]) for i in range(n)])
loss_differential = full_model_loss - control_model_loss

In [None]:
loss_differential

In [None]:
df = pd.DataFrame({'loss':loss_differential})

reg = smf.ols('loss ~ 1',data=df).fit(cov_type='HAC', cov_kwds={'maxlags': 1})

In [None]:
reg.summary()

In [None]:
ld_var = reg.bse[0]

In [None]:
test_stat = np.mean(loss_differential) / np.sqrt(ld_var / len(loss_differential))

In [None]:
test_stat > critical_value(alpha, n)