-
Notifications
You must be signed in to change notification settings - Fork 2.8k
/
_adnorm.py
139 lines (120 loc) · 4.2 KB
/
_adnorm.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
# -*- coding: utf-8 -*-
"""
Created on Sun Sep 25 21:23:38 2011
Author: Josef Perktold and Scipy developers
License : BSD-3
"""
import numpy as np
from scipy import stats
from statsmodels.tools.validation import array_like, bool_like, int_like
def anderson_statistic(x, dist='norm', fit=True, params=(), axis=0):
"""
Calculate the Anderson-Darling a2 statistic.
Parameters
----------
x : array_like
The data to test.
dist : {'norm', callable}
The assumed distribution under the null of test statistic.
fit : bool
If True, then the distribution parameters are estimated.
Currently only for 1d data x, except in case dist='norm'.
params : tuple
The optional distribution parameters if fit is False.
axis : int
If dist is 'norm' or fit is False, then data can be an n-dimensional
and axis specifies the axis of a variable.
Returns
-------
{float, ndarray}
The Anderson-Darling statistic.
"""
x = array_like(x, 'x', ndim=None)
fit = bool_like(fit, 'fit')
axis = int_like(axis, 'axis')
y = np.sort(x, axis=axis)
nobs = y.shape[axis]
if fit:
if dist == 'norm':
xbar = np.expand_dims(np.mean(x, axis=axis), axis)
s = np.expand_dims(np.std(x, ddof=1, axis=axis), axis)
w = (y - xbar) / s
z = stats.norm.cdf(w)
# print z
elif callable(dist):
params = dist.fit(x)
# print params
z = dist.cdf(y, *params)
print(z)
else:
raise ValueError("dist must be 'norm' or a Callable")
else:
if callable(dist):
z = dist.cdf(y, *params)
else:
raise ValueError('if fit is false, then dist must be callable')
i = np.arange(1, nobs + 1)
sl1 = [None] * x.ndim
sl1[axis] = slice(None)
sl1 = tuple(sl1)
sl2 = [slice(None)] * x.ndim
sl2[axis] = slice(None, None, -1)
sl2 = tuple(sl2)
s = np.sum((2 * i[sl1] - 1.0) / nobs * (np.log(z) + np.log1p(-z[sl2])),
axis=axis)
a2 = -nobs - s
return a2
def normal_ad(x, axis=0):
"""
Anderson-Darling test for normal distribution unknown mean and variance.
Parameters
----------
x : array_like
The data array.
axis : int
The axis to perform the test along.
Returns
-------
ad2 : float
Anderson Darling test statistic.
pval : float
The pvalue for hypothesis that the data comes from a normal
distribution with unknown mean and variance.
See Also
--------
statsmodels.stats.diagnostic.anderson_statistic
The Anderson-Darling a2 statistic.
statsmodels.stats.diagnostic.kstest_fit
Kolmogorov-Smirnov test with estimated parameters for Normal or
Exponential distributions.
"""
ad2 = anderson_statistic(x, dist='norm', fit=True, axis=axis)
n = x.shape[axis]
ad2a = ad2 * (1 + 0.75 / n + 2.25 / n ** 2)
if np.size(ad2a) == 1:
if (ad2a >= 0.00 and ad2a < 0.200):
pval = 1 - np.exp(-13.436 + 101.14 * ad2a - 223.73 * ad2a ** 2)
elif ad2a < 0.340:
pval = 1 - np.exp(-8.318 + 42.796 * ad2a - 59.938 * ad2a ** 2)
elif ad2a < 0.600:
pval = np.exp(0.9177 - 4.279 * ad2a - 1.38 * ad2a ** 2)
elif ad2a <= 13:
pval = np.exp(1.2937 - 5.709 * ad2a + 0.0186 * ad2a ** 2)
else:
pval = 0.0 # is < 4.9542108058458799e-31
else:
bounds = np.array([0.0, 0.200, 0.340, 0.600])
pval0 = lambda ad2a: np.nan * np.ones_like(ad2a)
pval1 = lambda ad2a: 1 - np.exp(
-13.436 + 101.14 * ad2a - 223.73 * ad2a ** 2)
pval2 = lambda ad2a: 1 - np.exp(
-8.318 + 42.796 * ad2a - 59.938 * ad2a ** 2)
pval3 = lambda ad2a: np.exp(0.9177 - 4.279 * ad2a - 1.38 * ad2a ** 2)
pval4 = lambda ad2a: np.exp(1.2937 - 5.709 * ad2a + 0.0186 * ad2a ** 2)
pvalli = [pval0, pval1, pval2, pval3, pval4]
idx = np.searchsorted(bounds, ad2a, side='right')
pval = np.nan * np.ones_like(ad2a)
for i in range(5):
mask = (idx == i)
pval[mask] = pvalli[i](ad2a[mask])
return ad2, pval