In [1]:
import numpy as np
from pathlib import Path

from mcnemar import mcnemar_test, statsmodels_mcnemar

In [2]:
preds_dir = Path("./sample_predictions/")

In [3]:
preds1 = np.genfromtxt(preds_dir / "model_1_predictions.csv", delimiter="\n")
preds2 = np.genfromtxt(preds_dir / "model_2_predictions.csv", delimiter="\n")
gts = np.genfromtxt(preds_dir / "ground_truth_labels.csv", delimiter="\n")

In [4]:
# statsmodels' exact McNemar test.
print(f"{statsmodels_mcnemar(preds1, preds2, gts):.8f}")

0.02430651


In [5]:
# Our exact McNemar test implementation.
print(f"{mcnemar_test(preds1, preds2, gts, 'exact'):.8f}")

0.02430651


In [6]:
# Our mid-p McNemar test implementation.
print(f"{mcnemar_test(preds1, preds2, gts, 'mid-p'):.8f}")

0.01667385


The mid-p McNemar test's output can be verified to be the same as achieved using the following MATLAB code.

```
>> preds1 = csvread("sample_predictions/model_1_predictions.csv");
>> preds2 = csvread("sample_predictions/model_2_predictions.csv");
>> gts = csvread("sample_predictions/ground_truth_labels.csv");

>> [h, p, e1, e2] = testcholdout(preds1, preds2, gts);
>> fprintf("%.8e", p);
1.66738478e-02
```