@@ -70,22 +70,28 @@ cdef bint is_monotonic_increasing_start_end_bounds(
7070# Rolling sum
7171
7272
73- cdef inline float64_t calc_sum(int64_t minp, int64_t nobs, float64_t sum_x) nogil:
73+ cdef inline float64_t calc_sum(int64_t minp, int64_t nobs, float64_t sum_x,
74+ int64_t num_consecutive_same_value, float64_t prev_value
75+ ) nogil:
7476 cdef:
7577 float64_t result
7678
7779 if nobs == 0 == minp:
7880 result = 0
7981 elif nobs >= minp:
80- result = sum_x
82+ if num_consecutive_same_value >= nobs:
83+ result = prev_value * nobs
84+ else :
85+ result = sum_x
8186 else :
8287 result = NaN
8388
8489 return result
8590
8691
8792cdef inline void add_sum(float64_t val, int64_t * nobs, float64_t * sum_x,
88- float64_t * compensation) nogil:
93+ float64_t * compensation, int64_t * num_consecutive_same_value,
94+ float64_t * prev_value) nogil:
8995 """ add a value from the sum calc using Kahan summation """
9096
9197 cdef:
@@ -99,6 +105,14 @@ cdef inline void add_sum(float64_t val, int64_t *nobs, float64_t *sum_x,
99105 compensation[0 ] = t - sum_x[0 ] - y
100106 sum_x[0 ] = t
101107
108+ # GH#42064, record num of same values to remove floating point artifacts
109+ if val == prev_value[0 ]:
110+ num_consecutive_same_value[0 ] += 1
111+ else :
112+ # reset to 1 (include current value itself)
113+ num_consecutive_same_value[0 ] = 1
114+ prev_value[0 ] = val
115+
102116
103117cdef inline void remove_sum(float64_t val, int64_t * nobs, float64_t * sum_x,
104118 float64_t * compensation) nogil:
@@ -120,8 +134,8 @@ def roll_sum(const float64_t[:] values, ndarray[int64_t] start,
120134 ndarray[int64_t] end , int64_t minp ) -> np.ndarray:
121135 cdef:
122136 Py_ssize_t i , j
123- float64_t sum_x , compensation_add , compensation_remove
124- int64_t s , e
137+ float64_t sum_x , compensation_add , compensation_remove , prev_value
138+ int64_t s , e , num_consecutive_same_value
125139 int64_t nobs = 0 , N = len (start)
126140 ndarray[float64_t] output
127141 bint is_monotonic_increasing_bounds
@@ -140,11 +154,13 @@ def roll_sum(const float64_t[:] values, ndarray[int64_t] start,
140154 if i == 0 or not is_monotonic_increasing_bounds or s >= end[i - 1 ]:
141155
142156 # setup
143-
157+ prev_value = values[s]
158+ num_consecutive_same_value = 0
144159 sum_x = compensation_add = compensation_remove = 0
145160 nobs = 0
146161 for j in range (s, e):
147- add_sum(values[j], & nobs, & sum_x, & compensation_add)
162+ add_sum(values[j], & nobs, & sum_x, & compensation_add,
163+ & num_consecutive_same_value, & prev_value)
148164
149165 else :
150166
@@ -154,9 +170,10 @@ def roll_sum(const float64_t[:] values, ndarray[int64_t] start,
154170
155171 # calculate adds
156172 for j in range (end[i - 1 ], e):
157- add_sum(values[j], & nobs, & sum_x, & compensation_add)
173+ add_sum(values[j], & nobs, & sum_x, & compensation_add,
174+ & num_consecutive_same_value, & prev_value)
158175
159- output[i] = calc_sum(minp, nobs, sum_x)
176+ output[i] = calc_sum(minp, nobs, sum_x, num_consecutive_same_value, prev_value )
160177
161178 if not is_monotonic_increasing_bounds:
162179 nobs = 0
@@ -170,14 +187,17 @@ def roll_sum(const float64_t[:] values, ndarray[int64_t] start,
170187# Rolling mean
171188
172189
173- cdef inline float64_t calc_mean(int64_t minp, Py_ssize_t nobs,
174- Py_ssize_t neg_ct, float64_t sum_x) nogil:
190+ cdef inline float64_t calc_mean(int64_t minp, Py_ssize_t nobs, Py_ssize_t neg_ct,
191+ float64_t sum_x, int64_t num_consecutive_same_value,
192+ float64_t prev_value) nogil:
175193 cdef:
176194 float64_t result
177195
178196 if nobs >= minp and nobs > 0 :
179197 result = sum_x / < float64_t> nobs
180- if neg_ct == 0 and result < 0 :
198+ if num_consecutive_same_value >= nobs:
199+ result = prev_value
200+ elif neg_ct == 0 and result < 0 :
181201 # all positive
182202 result = 0
183203 elif neg_ct == nobs and result > 0 :
@@ -191,7 +211,8 @@ cdef inline float64_t calc_mean(int64_t minp, Py_ssize_t nobs,
191211
192212
193213cdef inline void add_mean(float64_t val, Py_ssize_t * nobs, float64_t * sum_x,
194- Py_ssize_t * neg_ct, float64_t * compensation) nogil:
214+ Py_ssize_t * neg_ct, float64_t * compensation,
215+ int64_t * num_consecutive_same_value, float64_t * prev_value) nogil:
195216 """ add a value from the mean calc using Kahan summation """
196217 cdef:
197218 float64_t y, t
@@ -206,6 +227,14 @@ cdef inline void add_mean(float64_t val, Py_ssize_t *nobs, float64_t *sum_x,
206227 if signbit(val):
207228 neg_ct[0 ] = neg_ct[0 ] + 1
208229
230+ # GH#42064, record num of same values to remove floating point artifacts
231+ if val == prev_value[0 ]:
232+ num_consecutive_same_value[0 ] += 1
233+ else :
234+ # reset to 1 (include current value itself)
235+ num_consecutive_same_value[0 ] = 1
236+ prev_value[0 ] = val
237+
209238
210239cdef inline void remove_mean(float64_t val, Py_ssize_t * nobs, float64_t * sum_x,
211240 Py_ssize_t * neg_ct, float64_t * compensation) nogil:
@@ -226,8 +255,8 @@ cdef inline void remove_mean(float64_t val, Py_ssize_t *nobs, float64_t *sum_x,
226255def roll_mean (const float64_t[:] values , ndarray[int64_t] start ,
227256 ndarray[int64_t] end , int64_t minp ) -> np.ndarray:
228257 cdef:
229- float64_t val , compensation_add , compensation_remove , sum_x
230- int64_t s , e
258+ float64_t val , compensation_add , compensation_remove , sum_x , prev_value
259+ int64_t s , e , num_consecutive_same_value
231260 Py_ssize_t nobs , i , j , neg_ct , N = len (start)
232261 ndarray[float64_t] output
233262 bint is_monotonic_increasing_bounds
@@ -245,12 +274,15 @@ def roll_mean(const float64_t[:] values, ndarray[int64_t] start,
245274
246275 if i == 0 or not is_monotonic_increasing_bounds or s >= end[i - 1 ]:
247276
277+ # setup
248278 compensation_add = compensation_remove = sum_x = 0
249279 nobs = neg_ct = 0
250- # setup
280+ prev_value = values[s]
281+ num_consecutive_same_value = 0
251282 for j in range (s, e):
252283 val = values[j]
253- add_mean(val, & nobs, & sum_x, & neg_ct, & compensation_add)
284+ add_mean(val, & nobs, & sum_x, & neg_ct, & compensation_add,
285+ & num_consecutive_same_value, & prev_value)
254286
255287 else :
256288
@@ -262,9 +294,10 @@ def roll_mean(const float64_t[:] values, ndarray[int64_t] start,
262294 # calculate adds
263295 for j in range (end[i - 1 ], e):
264296 val = values[j]
265- add_mean(val, & nobs, & sum_x, & neg_ct, & compensation_add)
297+ add_mean(val, & nobs, & sum_x, & neg_ct, & compensation_add,
298+ & num_consecutive_same_value, & prev_value)
266299
267- output[i] = calc_mean(minp, nobs, neg_ct, sum_x)
300+ output[i] = calc_mean(minp, nobs, neg_ct, sum_x, num_consecutive_same_value, prev_value )
268301
269302 if not is_monotonic_increasing_bounds:
270303 nobs = 0
0 commit comments