Skip to content

Commit

Permalink
fix centered rolling window methods
Browse files Browse the repository at this point in the history
  • Loading branch information
marcvanheerden authored and ritchie46 committed Feb 2, 2022
1 parent 3936272 commit 65c1982
Show file tree
Hide file tree
Showing 4 changed files with 89 additions and 31 deletions.
5 changes: 3 additions & 2 deletions polars/polars-arrow/src/kernels/rolling/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,10 @@ fn det_offsets(i: Idx, window_size: WindowSize, _len: Len) -> (usize, usize) {
(i.saturating_sub(window_size - 1), i + 1)
}
fn det_offsets_center(i: Idx, window_size: WindowSize, len: Len) -> (usize, usize) {
let right_window = (window_size + 1) / 2;
(
i.saturating_sub(window_size / 2),
std::cmp::min(len, i + window_size / 2),
i.saturating_sub(window_size - right_window),
std::cmp::min(len, i + right_window),
)
}

Expand Down
36 changes: 21 additions & 15 deletions polars/polars-arrow/src/kernels/rolling/no_nulls.rs
Original file line number Diff line number Diff line change
Expand Up @@ -157,17 +157,21 @@ where

pub(crate) fn compute_var<T>(vals: &[T]) -> T
where
T: Float + std::iter::Sum,
T: Float + std::ops::AddAssign + std::fmt::Debug,
{
let len = T::from(vals.len()).unwrap();
let mean = vals.iter().copied().sum::<T>() / len;

let mut count = T::zero();
let mut sum = T::zero();
let mut sum_of_squares = T::zero();

for &val in vals {
let v = val - mean;
sum = sum + v * v
sum += val;
sum_of_squares += val * val;
count += T::one();
}
sum / (len - T::one())

let mean = sum / count;
// apply Bessel's correction
((sum_of_squares / count) - mean * mean) / (count - T::one()) * count
}

fn compute_var_weights<T>(vals: &[T], weights: &[T]) -> T
Expand All @@ -176,17 +180,19 @@ where
{
let weighted_iter = vals.iter().zip(weights).map(|(x, y)| *x * *y);

let mut x = T::zero();
let mut xsquare = T::zero();
let mut len = T::zero();
let mut count = T::zero();
let mut sum = T::zero();
let mut sum_of_squares = T::zero();

for val in weighted_iter {
x += val;
xsquare += val * val;
len += T::one();
sum += val;
sum_of_squares += val * val;
count += T::one();
}

((xsquare / len) - (x / len) * (x / len)) / (len - T::one()) * len
let mean = sum / count;
// apply Bessel's correction
((sum_of_squares / count) - mean * mean) / (count - T::one()) * count
}

pub(crate) fn compute_mean<T>(values: &[T]) -> T
Expand Down Expand Up @@ -542,7 +548,7 @@ pub fn rolling_var<T>(
weights: Option<&[f64]>,
) -> ArrayRef
where
T: NativeType + Float + std::iter::Sum,
T: NativeType + Float + std::ops::AddAssign,
{
match (center, weights) {
(true, None) => rolling_apply(
Expand Down
53 changes: 51 additions & 2 deletions polars/polars-core/src/chunked_array/ops/rolling_window.rs
Original file line number Diff line number Diff line change
Expand Up @@ -280,9 +280,10 @@ mod inner_mod {
/// utility
fn window_edges(idx: usize, len: usize, window_size: usize, center: bool) -> (usize, usize) {
let (start, end) = if center {
let right_window = (window_size + 1) / 2;
(
idx.saturating_sub(window_size / 2),
std::cmp::min(len, idx + window_size / 2),
idx.saturating_sub(window_size - right_window),
std::cmp::min(len, idx + right_window),
)
} else {
(idx.saturating_sub(window_size - 1), idx + 1)
Expand Down Expand Up @@ -626,6 +627,29 @@ mod test {
]
);

// check centered rolling window
let a = ca
.rolling_mean(RollingOptions {
window_size: 3,
min_periods: 1,
center: true,
weights: None,
})
.unwrap();
let a = a.f64().unwrap();
assert_eq!(
Vec::from(a),
&[
Some(0.5),
Some(1.0),
Some(1.5),
Some(2.0),
Some(5.0),
Some(5.5),
Some(5.5)
]
);

// integers
let ca = Int32Chunked::new_from_slice("", &[1, 8, 6, 2, 16, 10]);
let out = ca
Expand Down Expand Up @@ -741,6 +765,31 @@ mod test {
Vec::from(out),
&[None, None, Some(17), Some(10), Some(20), Some(34),]
);

// check centered rolling window
let out = ca
.rolling_var(RollingOptions {
window_size: 4,
min_periods: 3,
center: true,
weights: None,
})
.unwrap()
.round(2)
.unwrap();
let out = out.f64().unwrap();

assert_eq!(
Vec::from(out),
&[
None,
Some(17.33),
Some(11.58),
Some(21.58),
Some(24.67),
Some(34.33)
]
);
}

#[test]
Expand Down
26 changes: 14 additions & 12 deletions py-polars/tests/test_lazy.py
Original file line number Diff line number Diff line change
Expand Up @@ -632,17 +632,11 @@ def test_rolling(fruits_cars: pl.DataFrame) -> None:
pl.col("A").rolling_sum(3, min_periods=1).alias("4"),
pl.col("A").rolling_sum(3).alias("4b"),
# below we use .round purely for the ability to do .frame_equal()
pl.col("A").rolling_std(3, min_periods=1).round(decimals=4).alias("std"),
pl.col("A").rolling_std(3).alias("std2"),
pl.col("A").rolling_var(3, min_periods=1).round(decimals=4).alias("var"),
pl.col("A").rolling_var(3).alias("var2"),
pl.col("A").rolling_std(3).round(1).alias("std"),
pl.col("A").rolling_var(3).round(1).alias("var"),
]
)

# TODO: rolling_std & rolling_var return nan instead of null if it cant compute
out[0, "std"] = None
out[0, "var"] = None

assert out.frame_equal(
pl.DataFrame(
{
Expand All @@ -654,14 +648,22 @@ def test_rolling(fruits_cars: pl.DataFrame) -> None:
"3b": [None, None, 3, 4, 5],
"4": [1, 3, 6, 9, 12],
"4b": [None, None, 6, 9, 12],
"std": [None, 0.7071, 1, 1, 1],
"std2": [None, None, 1, 1, 1],
"var": [None, 0.5, 1, 1, 1],
"var2": [None, None, 1, 1, 1],
"std": [None, None, 1.0, 1.0, 1.0],
"var": [None, None, 1.0, 1.0, 1.0],
}
)
)

out_nan = df.select(
[
pl.col("A").rolling_std(3, min_periods=1).round(decimals=4).alias("std"),
pl.col("A").rolling_var(3, min_periods=1).round(decimals=1).alias("var"),
]
)

assert out_nan[0, "std"] != out_nan[0, "std"] # true if value is NaN
assert out_nan[0, "var"] != out_nan[0, "var"] # true if value is NaN


def test_rolling_apply() -> None:
s = pl.Series("A", [1.0, 2.0, 9.0, 2.0, 13.0], dtype=pl.Float64)
Expand Down

0 comments on commit 65c1982

Please sign in to comment.