-
Notifications
You must be signed in to change notification settings - Fork 851
/
mcnemar.py
111 lines (89 loc) · 3.61 KB
/
mcnemar.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
# Sebastian Raschka 2014-2017
# mlxtend Machine Learning Library Extensions
#
# Author: Sebastian Raschka <sebastianraschka.com>
#
# License: BSD 3 clause
import numpy as np
from scipy.stats import chisqprob
from scipy.stats import binom
def mcnemar_table(y_target, y_model1, y_model2):
"""
Compute a 2x2 contigency table for McNemar's test.
Parameters
-----------
y_target : array-like, shape=[n_samples]
True class labels as 1D NumPy array.
y_model1 : array-like, shape=[n_samples]
Predicted class labels from model as 1D NumPy array.
y_model2 : array-like, shape=[n_samples]
Predicted class labels from model 2 as 1D NumPy array.
Returns
----------
tb : array-like, shape=[2, 2]
2x2 contingency table with the following contents:
a: tb[0, 0]: # of samples that both models predicted correctly
b: tb[0, 1]: # of samples that model 1 got right and model 2 got wrong
c: tb[1, 0]: # of samples that model 2 got right and model 1 got wrong
d: tb[1, 1]: # of samples that both models predicted incorrectly
"""
for ary in (y_target, y_model1, y_model2):
if len(ary.shape) != 1:
raise ValueError('One or more input arrays are not 1-dimensional.')
if y_target.shape[0] != y_model1.shape[0]:
raise ValueError('y_target and y_model1 contain a different number'
' of elements.')
if y_target.shape[0] != y_model2.shape[0]:
raise ValueError('y_target and y_model2 contain a different number'
' of elements.')
m1_vs_true = (y_target == y_model1).astype(int)
m2_vs_true = (y_target == y_model2).astype(int)
plus_true = m1_vs_true + m2_vs_true
minus_true = m1_vs_true - m2_vs_true
tb = np.zeros((2, 2), dtype=int)
tb[0, 0] = np.sum(plus_true == 2)
tb[1, 1] = np.sum(plus_true == 0)
tb[1, 0] = np.sum(minus_true == 1)
tb[0, 1] = np.sum(minus_true == -1)
return tb
def mcnemar(ary, corrected=True, exact=False):
"""
McNemar test for paired nominal data
Parameters
-----------
ary : array-like, shape=[2, 2]
2 x 2 contigency table (as returned by evaluate.mcnemar_table),
where
a: ary[0, 0]: # of samples that both models predicted correctly
b: ary[0, 1]: # of samples that model 1 got right and model 2 got wrong
c: ary[1, 0]: # of samples that model 2 got right and model 1 got wrong
d: aryCell [1, 1]: # of samples that both models predicted incorrectly
corrected : array-like, shape=[n_samples] (default: True)
Uses Edward's continuity correction for chi-squared if `True`
exact : bool, (default: False)
If `True`, uses an exact binomial test comparing b to
a binomial distribution with n = b + c and p = 0.5.
It is highly recommended to use `exact=True` for sample sizes < 25
since chi-squared is not well-approximated
by the chi-squared distribution!
Returns
-----------
chi2, p : float or None, float
Returns the chi-squared value and the p-value;
if `exact=True` (default: `False`), `chi2` is `None`
"""
if not ary.shape == (2, 2):
raise ValueError('Input array must be a 2x2 array.')
b = ary[0, 1]
c = ary[1, 0]
n = b + c
if not exact:
if corrected:
chi2 = (abs(ary[0, 1] - ary[1, 0]) - 1.0)**2 / float(n)
else:
chi2 = (ary[0, 1] - ary[1, 0])**2 / float(n)
p = chisqprob(chi2, 1)
else:
p = 2. * sum([binom.pmf(k=i, n=n, p=0.5, loc=0) for i in range(b, n)])
chi2 = None
return chi2, p