Skip to content

Commit

Permalink
python: More rolling_window functions
Browse files Browse the repository at this point in the history
This adds:

* rolling_std
* rolling_var
* rolling_quantile
* rolling_median
* rolling_skew
  • Loading branch information
ritchie46 committed Sep 18, 2021
1 parent 4e1013d commit 1b89323
Show file tree
Hide file tree
Showing 8 changed files with 276 additions and 2 deletions.
40 changes: 40 additions & 0 deletions polars/polars-core/src/chunked_array/ops/rolling_window.rs
Original file line number Diff line number Diff line change
Expand Up @@ -444,6 +444,46 @@ where
}
}

impl<T> ChunkedArray<T>
where
T: PolarsFloatType,
{
pub fn rolling_apply_float<F>(&self, window_size: usize, f: F) -> Result<Self>
where
F: Fn(&ChunkedArray<T>) -> Option<T::Native>,
T::Native: Zero,
{
let ca = self.rechunk();
let arr = ca.downcast_iter().next().unwrap();

let arr_container = ChunkedArray::<T>::new_from_slice("", &[T::Native::zero()]);
let array_ptr = &arr_container.chunks()[0];
let ptr = Arc::as_ptr(array_ptr) as *mut dyn Array as *mut PrimitiveArray<T::Native>;

let mut builder = PrimitiveChunkedBuilder::<T>::new(self.name(), self.len());
for _ in 0..window_size - 1 {
builder.append_null();
}

for offset in 0..self.len() + 1 - window_size {
let arr_window = arr.slice(offset, window_size);

// Safety.
// ptr is not dropped as we are in scope
// We are also the only owner of the contents of the Arc
// we do this to reduce heap allocs.
unsafe {
*ptr = arr_window;
}

let out = f(&arr_container);
builder.append_option(out);
}

Ok(builder.finish())
}
}

impl ChunkRollApply for ListChunked {}
impl ChunkRollApply for Utf8Chunked {}
impl ChunkRollApply for BooleanChunked {}
Expand Down
41 changes: 40 additions & 1 deletion polars/polars-lazy/src/dsl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1491,10 +1491,49 @@ impl Expr {
self,
window_size: usize,
f: Arc<dyn Fn(&Series) -> Series + Send + Sync>,
output_type: GetOutput,
) -> Expr {
self.apply(
move |s| s.rolling_apply(window_size, f.as_ref()),
GetOutput::same_type(),
output_type,
)
}

#[cfg_attr(docsrs, doc(cfg(feature = "rolling_window")))]
#[cfg(feature = "rolling_window")]
/// Apply a custom function over a rolling/ moving window of the array.
/// Prefer this over rolling_apply in case of floating point numbers as this is faster.
/// This has quite some dynamic dispatch, so prefer rolling_min, max, mean, sum over this.
pub fn rolling_apply_float<F>(self, window_size: usize, f: F) -> Expr
where
F: 'static + Fn(&Float64Chunked) -> Option<f64> + Send + Sync + Copy,
{
self.apply(
move |s| {
let out = match s.dtype() {
DataType::Float64 => s
.f64()
.unwrap()
.rolling_apply_float(window_size, f)
.map(|ca| ca.into_series()),
_ => s
.cast::<Float64Type>()?
.f64()
.unwrap()
.rolling_apply_float(window_size, f)
.map(|ca| ca.into_series()),
}?;
if let DataType::Float32 = s.dtype() {
out.cast_with_dtype(&DataType::Float32)
} else {
Ok(out)
}
},
GetOutput::map_field(|field| match field.data_type() {
DataType::Float64 => field.clone(),
DataType::Float32 => Field::new(field.name(), DataType::Float32),
_ => Field::new(field.name(), DataType::Float64),
}),
)
}

Expand Down
5 changes: 5 additions & 0 deletions py-polars/docs/source/reference/expression.rst
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,11 @@ Computations
Expr.rolling_mean
Expr.rolling_sum
Expr.rolling_apply
Expr.rolling_std
Expr.rolling_var
Expr.rolling_median
Expr.rolling_quantile
Expr.rolling_skew
Expr.hash
Expr.abs
Expr.rank
Expand Down
5 changes: 5 additions & 0 deletions py-polars/docs/source/reference/series.rst
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,11 @@ Computations
Series.rolling_mean
Series.rolling_sum
Series.rolling_apply
Series.rolling_std
Series.rolling_var
Series.rolling_median
Series.rolling_quantile
Series.rolling_skew
Series.hash
Series.peak_max
Series.peak_min
Expand Down
66 changes: 66 additions & 0 deletions py-polars/polars/eager/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -2468,6 +2468,72 @@ def rolling_apply(
pl.col(self.name).rolling_apply(window_size, function) # type: ignore
)[self.name]

def rolling_std(self, window_size: int) -> "Series":
"""
Compute a rolling std dev
Parameters
----------
window_size
Size of the rolling window
"""
return self.to_frame().select(
pl.col(self.name).rolling_std(window_size) # type: ignore
)[self.name]

def rolling_var(self, window_size: int) -> "Series":
"""
Compute a rolling variance
Parameters
----------
window_size
Size of the rolling window
"""
return self.to_frame().select(
pl.col(self.name).rolling_var(window_size) # type: ignore
)[self.name]

def rolling_median(self, window_size: int) -> "Series":
"""
Compute a rolling median
Parameters
----------
window_size
Size of the rolling window
"""
return self.to_frame().select(
pl.col(self.name).rolling_median(window_size) # type: ignore
)[self.name]

def rolling_quantile(self, window_size: int, quantile: float) -> "Series":
"""
Compute a rolling quantile
Parameters
----------
window_size
Size of the rolling window
quantile
quantile to compute
"""
return self.to_frame().select(
pl.col(self.name).rolling_quantile(window_size, quantile) # type: ignore
)[self.name]

def rolling_skew(self, window_size: int, bias: bool = True) -> "Series":
"""
Compute a rolling skew
window_size
Size of the rolling window
bias
If False, then the calculations are corrected for statistical bias.
"""
return self.to_frame().select(
pl.col(self.name).rolling_skew(window_size, bias) # type: ignore
)[self.name]

def abs(self) -> "Series":
"""
Take absolute values
Expand Down
56 changes: 56 additions & 0 deletions py-polars/polars/lazy/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -1402,6 +1402,62 @@ def rolling_apply(
"""
return wrap_expr(self._pyexpr.rolling_apply(window_size, function))

def rolling_std(self, window_size: int) -> "Expr":
"""
Compute a rolling std dev
Parameters
----------
window_size
Size of the rolling window
"""
return wrap_expr(self._pyexpr.rolling_std(window_size))

def rolling_var(self, window_size: int) -> "Expr":
"""
Compute a rolling variance
Parameters
----------
window_size
Size of the rolling window
"""
return wrap_expr(self._pyexpr.rolling_var(window_size))

def rolling_median(self, window_size: int) -> "Expr":
"""
Compute a rolling median
Parameters
----------
window_size
Size of the rolling window
"""
return wrap_expr(self._pyexpr.rolling_median(window_size))

def rolling_quantile(self, window_size: int, quantile: float) -> "Expr":
"""
Compute a rolling quantile
Parameters
----------
window_size
Size of the rolling window
quantile
quantile to compute
"""
return wrap_expr(self._pyexpr.rolling_quantile(window_size, quantile))

def rolling_skew(self, window_size: int, bias: bool = True) -> "Expr":
"""
Compute a rolling skew
window_size
Size of the rolling window
bias
If False, then the calculations are corrected for statistical bias.
"""
return wrap_expr(self._pyexpr.rolling_skew(window_size, bias))

def abs(self) -> "Expr":
"""
Take absolute values
Expand Down
60 changes: 59 additions & 1 deletion py-polars/src/lazy/dsl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -613,7 +613,7 @@ impl PyExpr {
};
self.clone()
.inner
.rolling_apply(window_size, Arc::new(function))
.rolling_apply(window_size, Arc::new(function), GetOutput::same_type())
.into()
}

Expand Down Expand Up @@ -764,6 +764,64 @@ impl PyExpr {
.into()
}

pub fn rolling_std(
&self,
window_size: usize,
) -> Self {
self.inner
.clone()
.rolling_apply_float(window_size, |ca| ca.std()
)
.into()
}

pub fn rolling_var(
&self,
window_size: usize,
) -> Self {
self.inner
.clone()
.rolling_apply_float(window_size, |ca| ca.var()
)
.into()
}

pub fn rolling_median(
&self,
window_size: usize,
) -> Self {
self.inner
.clone()
.rolling_apply_float(window_size, |ca| ChunkAgg::median(ca)
)
.into()
}

pub fn rolling_quantile(
&self,
window_size: usize,
quantile: f64
) -> Self {
self.inner
.clone()
.rolling_apply_float(window_size, move |ca| ChunkAgg::quantile(ca, quantile).unwrap()
)
.into()
}

pub fn rolling_skew(
&self,
window_size: usize,
bias: bool
) -> Self {
self.inner
.clone()
.rolling_apply_float(window_size, move |ca| ca.clone().into_series().skew(bias).unwrap()
)
.into()
}


fn lst_max(&self) -> Self {
self.inner
.clone()
Expand Down
5 changes: 5 additions & 0 deletions py-polars/tests/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,11 @@ def test_rolling():
assert a.rolling_min(2).to_list() == [None, 1, 2, 2, 1]
assert a.rolling_max(2).to_list() == [None, 2, 3, 3, 2]
assert a.rolling_sum(2).to_list() == [None, 3, 5, 5, 3]
assert np.isclose(a.rolling_std(2).to_list()[1], 0.7071067811865476)
assert np.isclose(a.rolling_var(2).to_list()[1], 0.5)
assert a.rolling_median(4).to_list() == [None, None, None, 2, 2]
assert a.rolling_quantile(3, 0.5).to_list() == [None, None, 2, 2, 2]
assert a.rolling_skew(4).null_count() == 3


def test_object():
Expand Down

0 comments on commit 1b89323

Please sign in to comment.