-
-
Notifications
You must be signed in to change notification settings - Fork 18.8k
Closed
Labels
Milestone
Description
See #50
Using a min-heap
and max-heap
to implement the rolling_min
and rolling_max
functions seems to be about 30% faster than the current implementation.
Something sort of like this:
from heapq import heappush, heapify
from numpy import empty, nan
cimport numpy as np
cdef extern from "math.h":
bint isnan(double x)
cdef np.float64_t NaN = nan
def rolling_min(np.ndarray[np.float64_t] arg, int window):
cdef Py_ssize_t N = len(arg)
cdef np.ndarray[np.double_t] output = empty(N, dtype=float)
cdef list min_heap = []
for i from 0 <= i < N:
x = arg[i]
if isnan(x):
min_heap = []
output[i] = x
else:
heappush(min_heap, arg[i])
if len(min_heap) < window:
output[i] = NaN
else:
output[i] = min_heap[0]
x = arg[i+1-window]
min_heap.remove(x)
heapify(min_heap)
return output
def rolling_max(np.ndarray[np.float64_t] arg, int window):
cdef Py_ssize_t N = len(arg)
cdef np.ndarray[np.double_t] output = empty(N, dtype=float)
cdef list min_heap = []
for i from 0 <= i < N:
x = arg[i]
if isnan(x):
min_heap = []
output[i] = x
else:
heappush(min_heap, -arg[i])
if len(min_heap) < window:
output[i] = NaN
else:
output[i] = -min_heap[0]
x = -arg[i+1-window]
min_heap.remove(x)
heapify(min_heap)
return output