diff --git a/statsmodels/robust/_qn.pyx b/statsmodels/robust/_qn.pyx index 2bb1118a1f5..dcef2e15ebc 100644 --- a/statsmodels/robust/_qn.pyx +++ b/statsmodels/robust/_qn.pyx @@ -1,71 +1,65 @@ #!python #cython: wraparound=False, boundscheck=False, cdivision=True -cimport cython import numpy as np -cimport numpy as np -DTYPE_INT = np.int -DTYPE_DOUBLE = np.float64 -ctypedef np.float64_t DTYPE_DOUBLE_t -ctypedef np.int_t DTYPE_INT_t - - -def _high_weighted_median(np.ndarray[DTYPE_DOUBLE_t] a, np.ndarray[DTYPE_INT_t] weights): +def _high_weighted_median(double[::1] a, int[::1] weights): """ Computes a weighted high median of a. This is defined as the smallest a[j] such that the sum over all a[i]<=a[j] is strictly greater than half the total sum of the weights """ cdef: - DTYPE_INT_t n = a.shape[0] - np.ndarray[DTYPE_DOUBLE_t] sorted_a = np.zeros((n,), dtype=DTYPE_DOUBLE) - np.ndarray[DTYPE_DOUBLE_t] a_cand = np.zeros((n,), dtype=DTYPE_DOUBLE) - np.ndarray[DTYPE_INT_t] weights_cand = np.zeros((n,), dtype=DTYPE_INT) - Py_ssize_t i= 0 - DTYPE_INT_t kcand = 0 - DTYPE_INT_t wleft, wright, wmid, wtot, wrest = 0 - DTYPE_DOUBLE_t trial = 0 - wtot = np.sum(weights) + double[::1] a_cp = np.copy(a) + int[::1] weights_cp = np.copy(weights) + int n = a_cp.shape[0] + double[::1] sorted_a = np.zeros((n,), dtype=np.double) + double[::1] a_cand = np.zeros((n,), dtype=np.double) + int[::1] weights_cand = np.zeros((n,), dtype=np.intc) + int kcand = 0 + int wleft, wright, wmid, wtot, wrest = 0 + double trial = 0 + wtot = np.sum(weights_cp) while True: wleft = 0 wmid = 0 wright = 0 for i in range(n): - sorted_a[i] = a[i] - sorted_a.partition(n//2) + sorted_a[i] = a_cp[i] + sorted_a = np.partition(sorted_a, kth=n//2) trial = sorted_a[n//2] for i in range(n): - if a[i] < trial: - wleft = wleft + weights[i] - elif a[i] > trial: - wright = wright + weights[i] + if a_cp[i] < trial: + wleft = wleft + weights_cp[i] + elif a_cp[i] > trial: + wright = wright + weights_cp[i] else: - wmid = wmid + weights[i] + wmid = wmid + weights_cp[i] kcand = 0 if 2 * (wrest + wleft) > wtot: for i in range(n): - if a[i] < trial: - a_cand[kcand] = a[i] - weights_cand[kcand] = weights[i] + if a_cp[i] < trial: + a_cand[kcand] = a_cp[i] + weights_cand[kcand] = weights_cp[i] kcand = kcand + 1 elif 2 * (wrest + wleft + wmid) <= wtot: for i in range(n): - if a[i] > trial: - a_cand[kcand] = a[i] - weights_cand[kcand] = weights[i] + if a_cp[i] > trial: + a_cand[kcand] = a_cp[i] + weights_cand[kcand] = weights_cp[i] kcand = kcand + 1 wrest = wrest + wleft + wmid else: - return trial + break n = kcand for i in range(n): - a[i] = a_cand[i] - weights[i] = weights_cand[i] + a_cp[i] = a_cand[i] + weights_cp[i] = weights_cand[i] + return trial -def _qn(np.ndarray[DTYPE_DOUBLE_t] a, DTYPE_DOUBLE_t c): +def _qn(double[:] a, double c): """ Computes the Qn robust estimator of scale, a more efficient alternative to the MAD. The implementation follows the algorithm described in Croux @@ -85,22 +79,22 @@ def _qn(np.ndarray[DTYPE_DOUBLE_t] a, DTYPE_DOUBLE_t c): The Qn robust estimator of scale """ cdef: - DTYPE_INT_t n = a.shape[0] - DTYPE_INT_t h = n/2 + 1 - DTYPE_INT_t k = h * (h - 1) / 2 - DTYPE_INT_t n_left = n * (n + 1) / 2 - DTYPE_INT_t n_right = n * n - DTYPE_INT_t k_new = k + n_left + int n = a.shape[0] + int h = n/2 + 1 + int k = h * (h - 1) / 2 + int n_left = n * (n + 1) / 2 + int n_right = n * n + int k_new = k + n_left Py_ssize_t i, j, jh, l = 0 - DTYPE_INT_t sump, sumq = 0 - DTYPE_DOUBLE_t trial, output = 0 - np.ndarray[DTYPE_DOUBLE_t] a_sorted = np.sort(a) - np.ndarray[DTYPE_INT_t] left = np.array([n - i + 1 for i in range(0, n)], dtype=DTYPE_INT) - np.ndarray[DTYPE_INT_t] right = np.array([n if i <= h else n - (i - h) for i in range(0, n)], dtype=DTYPE_INT) - np.ndarray[DTYPE_INT_t] weights = np.zeros((n,), dtype=DTYPE_INT) - np.ndarray[DTYPE_DOUBLE_t] work = np.zeros((n,), dtype=DTYPE_DOUBLE) - np.ndarray[DTYPE_INT_t] p = np.zeros((n,), dtype=DTYPE_INT) - np.ndarray[DTYPE_INT_t] q = np.zeros((n,), dtype=DTYPE_INT) + int sump, sumq = 0 + double trial, output = 0 + double[::1] a_sorted = np.sort(a) + int[::1] left = np.array([n - i + 1 for i in range(0, n)], dtype=np.intc) + int[::1] right = np.array([n if i <= h else n - (i - h) for i in range(0, n)], dtype=np.intc) + int[::1] weights = np.zeros((n,), dtype=np.intc) + double[::1] work = np.zeros((n,), dtype=np.double) + int[::1] p = np.zeros((n,), dtype=np.intc) + int[::1] q = np.zeros((n,), dtype=np.intc) while n_right - n_left > n: j = 0 for i in range(1, n): @@ -121,7 +115,7 @@ def _qn(np.ndarray[DTYPE_DOUBLE_t] a, DTYPE_DOUBLE_t c): j = j - 1 q[i] = j sump = np.sum(p) - sumq = np.sum(q - 1) + sumq = np.sum(q) - n if k_new <= sump: right = np.copy(p) n_right = sump diff --git a/statsmodels/robust/scale.py b/statsmodels/robust/scale.py index 21faaba2d22..807b94cdab0 100644 --- a/statsmodels/robust/scale.py +++ b/statsmodels/robust/scale.py @@ -87,8 +87,10 @@ def iqr(a, c=Gaussian.ppf(3/4) - Gaussian.ppf(1/4), axis=0): def qn_scale(a, c=1 / (np.sqrt(2) * Gaussian.ppf(5 / 8)), axis=0): """ - Computes the Qn robust estimator of scale, a more efficient alternative - to the MAD. The Qn estimator of the array a of length n is defined as + Computes the Qn robust estimator of scale + + The Qn scale estimator is a more efficient alternative to the MAD. + The Qn scale estimator of an array a of length n is defined as c * {abs(a[i] - a[j]): i