Skip to content

Faster rolling_min/rolling_max #1504

@mrjbq7

Description

@mrjbq7

See #50

Using a min-heap and max-heap to implement the rolling_min and rolling_max functions seems to be about 30% faster than the current implementation.

Something sort of like this:

from heapq import heappush, heapify
from numpy import empty, nan
cimport numpy as np

cdef extern from "math.h":
    bint isnan(double x)

cdef np.float64_t NaN = nan

def rolling_min(np.ndarray[np.float64_t] arg, int window):

    cdef Py_ssize_t N = len(arg)
    cdef np.ndarray[np.double_t] output = empty(N, dtype=float)
    cdef list min_heap = []

    for i from 0 <= i < N:
        x = arg[i]
        if isnan(x):
            min_heap = []
            output[i] = x
        else:
            heappush(min_heap, arg[i])
            if len(min_heap) < window:
                output[i] = NaN
            else:
                output[i] = min_heap[0]
                x = arg[i+1-window]
                min_heap.remove(x)
                heapify(min_heap)
    return output

def rolling_max(np.ndarray[np.float64_t] arg, int window):

    cdef Py_ssize_t N = len(arg)
    cdef np.ndarray[np.double_t] output = empty(N, dtype=float)
    cdef list min_heap = []

    for i from 0 <= i < N:
        x = arg[i]
        if isnan(x):
            min_heap = []
            output[i] = x
        else:
            heappush(min_heap, -arg[i])
            if len(min_heap) < window:
                output[i] = NaN
            else:
                output[i] = -min_heap[0]
                x = -arg[i+1-window]
                min_heap.remove(x)
                heapify(min_heap)
    return output

Metadata

Metadata

Assignees

No one assigned

    Type

    No type

    Projects

    No projects

    Milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions