Skip to content

Commit

Permalink
ENH: turn quicksort into introsort
Browse files Browse the repository at this point in the history
Introsort is regular quicksort but changing to a heapsort when not
enough progress is made. This retains the good quicksort performance
while changing the worst case runtime from O(N^2) to O(N*log(N))
  • Loading branch information
juliantaylor committed Jul 21, 2016
1 parent f3c994a commit c3cea45
Show file tree
Hide file tree
Showing 4 changed files with 63 additions and 0 deletions.
6 changes: 6 additions & 0 deletions doc/release/1.12.0-notes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,12 @@ numpy.sctypes now includes bytes on Python3 too
Previously, it included str (bytes) and unicode on Python2, but only str
(unicode) on Python3.

quicksort has been changed to an introsort
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The quicksort kind of ``np.sort`` and ``np.argsort`` is now an introsort which
is regular quicksort but changing to a heapsort when not enough progress is
made. This retains the good quicksort performance while changing the worst case
runtime from ``O(N^2)`` to ``O(N*log(N))``.

Deprecations
============
Expand Down
6 changes: 6 additions & 0 deletions numpy/core/fromnumeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -776,6 +776,12 @@ def sort(a, axis=-1, kind='quicksort', order=None):
placements are sorted according to the non-nan part if it exists.
Non-nan values are sorted as before.
.. versionadded:: 1.12.0
quicksort has been changed to an introsort which will switch
heapsort when it does not make enough progress. This makes its
worst case O(n*log(n)).
Examples
--------
>>> a = np.array([[1,4],[3,1]])
Expand Down
36 changes: 36 additions & 0 deletions numpy/core/src/npysort/quicksort.c.src
Original file line number Diff line number Diff line change
Expand Up @@ -69,8 +69,13 @@ quicksort_@suff@(void *start, npy_intp num, void *NOT_USED)
@type@ *stack[PYA_QS_STACK];
@type@ **sptr = stack;
@type@ *pm, *pi, *pj, *pk;
npy_intp depth_limit = npy_get_msb(num) * 2;

for (;;) {
if (depth_limit-- < 0) {
heapsort_@suff@(pl, pr - pl + 1, NULL);
goto stack_pop;
}
while ((pr - pl) > SMALL_QUICKSORT) {
/* quicksort partition */
pm = pl + ((pr - pl) >> 1);
Expand Down Expand Up @@ -114,6 +119,7 @@ quicksort_@suff@(void *start, npy_intp num, void *NOT_USED)
}
*pj = vp;
}
stack_pop:
if (sptr == stack) {
break;
}
Expand All @@ -135,9 +141,14 @@ aquicksort_@suff@(void *vv, npy_intp* tosort, npy_intp num, void *NOT_USED)
npy_intp *stack[PYA_QS_STACK];
npy_intp **sptr = stack;
npy_intp *pm, *pi, *pj, *pk, vi;
npy_intp depth_limit = npy_get_msb(num) * 2;

for (;;) {
while ((pr - pl) > SMALL_QUICKSORT) {
if (depth_limit-- < 0) {
aheapsort_@suff@(vv, pl, pr - pl + 1, NULL);
goto stack_pop;
}
/* quicksort partition */
pm = pl + ((pr - pl) >> 1);
if (@TYPE@_LT(v[*pm],v[*pl])) INTP_SWAP(*pm, *pl);
Expand Down Expand Up @@ -181,6 +192,7 @@ aquicksort_@suff@(void *vv, npy_intp* tosort, npy_intp num, void *NOT_USED)
}
*pj = vi;
}
stack_pop:
if (sptr == stack) {
break;
}
Expand Down Expand Up @@ -217,12 +229,17 @@ quicksort_@suff@(void *start, npy_intp num, void *varr)
@type@ *pl = start;
@type@ *pr = pl + (num - 1)*len;
@type@ *stack[PYA_QS_STACK], **sptr = stack, *pm, *pi, *pj, *pk;
npy_intp depth_limit = npy_get_msb(num) * 2;

if (vp == NULL) {
return -NPY_ENOMEM;
}

for (;;) {
if (depth_limit-- < 0) {
heapsort_@suff@(pl, (pr - pl) / len + 1, varr);
goto stack_pop;
}
while ((size_t)(pr - pl) > SMALL_QUICKSORT*len) {
/* quicksort partition */
pm = pl + (((pr - pl)/len) >> 1)*len;
Expand Down Expand Up @@ -268,6 +285,7 @@ quicksort_@suff@(void *start, npy_intp num, void *varr)
}
@TYPE@_COPY(pj, vp, len);
}
stack_pop:
if (sptr == stack) {
break;
}
Expand All @@ -292,8 +310,13 @@ aquicksort_@suff@(void *vv, npy_intp* tosort, npy_intp num, void *varr)
npy_intp *stack[PYA_QS_STACK];
npy_intp **sptr=stack;
npy_intp *pm, *pi, *pj, *pk, vi;
npy_intp depth_limit = npy_get_msb(num) * 2;

for (;;) {
if (depth_limit-- < 0) {
aheapsort_@suff@(vv, pl, pr - pl + 1, varr);
goto stack_pop;
}
while ((pr - pl) > SMALL_QUICKSORT) {
/* quicksort partition */
pm = pl + ((pr - pl) >> 1);
Expand Down Expand Up @@ -338,6 +361,7 @@ aquicksort_@suff@(void *vv, npy_intp* tosort, npy_intp num, void *varr)
}
*pj = vi;
}
stack_pop:
if (sptr == stack) {
break;
}
Expand Down Expand Up @@ -370,12 +394,17 @@ npy_quicksort(void *start, npy_intp num, void *varr)
char *stack[PYA_QS_STACK];
char **sptr = stack;
char *pm, *pi, *pj, *pk;
npy_intp depth_limit = npy_get_msb(num) * 2;

if (vp == NULL) {
return -NPY_ENOMEM;
}

for (;;) {
if (depth_limit-- < 0) {
npy_heapsort(pl, (pr - pl) / elsize + 1, varr);
goto stack_pop;
}
while(pr - pl > SMALL_QUICKSORT*elsize) {
/* quicksort partition */
pm = pl + (((pr - pl) / elsize) >> 1) * elsize;
Expand Down Expand Up @@ -431,6 +460,7 @@ npy_quicksort(void *start, npy_intp num, void *varr)
}
GENERIC_COPY(pj, vp, elsize);
}
stack_pop:
if (sptr == stack) {
break;
}
Expand All @@ -456,8 +486,13 @@ npy_aquicksort(void *vv, npy_intp* tosort, npy_intp num, void *varr)
npy_intp *stack[PYA_QS_STACK];
npy_intp **sptr = stack;
npy_intp *pm, *pi, *pj, *pk, vi;
npy_intp depth_limit = npy_get_msb(num) * 2;

for (;;) {
if (depth_limit-- < 0) {
npy_aheapsort(vv, pl, pr - pl + 1, varr);
goto stack_pop;
}
while ((pr - pl) > SMALL_QUICKSORT) {
/* quicksort partition */
pm = pl + ((pr - pl) >> 1);
Expand Down Expand Up @@ -512,6 +547,7 @@ npy_aquicksort(void *vv, npy_intp* tosort, npy_intp num, void *varr)
}
*pj = vi;
}
stack_pop:
if (sptr == stack) {
break;
}
Expand Down
15 changes: 15 additions & 0 deletions numpy/core/tests/test_multiarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -1349,6 +1349,21 @@ def test_sort(self):
msg = 'test empty array sort with axis=None'
assert_equal(np.sort(a, axis=None), a.ravel(), msg)

def test_sort_degraded(self):
# test degraded dataset would take minutes to run with normal qsort
d = np.arange(1000000)
do = d.copy()
x = d
# create a median of 3 killer where each median is the sorted second
# last element of the quicksort partition
while x.size > 3:
mid = x.size // 2
x[mid], x[-2] = x[-2], x[mid]
x = x[:-2]

assert_equal(np.sort(d), do)
assert_equal(d[np.argsort(d)], do)

def test_copy(self):
def assert_fortran(arr):
assert_(arr.flags.fortran)
Expand Down

0 comments on commit c3cea45

Please sign in to comment.