Skip to content

Commit

Permalink
fix(rust, python): check null values in asof_join + groupby (#5756)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Dec 9, 2022
1 parent 3b7c0e5 commit 73e8274
Show file tree
Hide file tree
Showing 8 changed files with 53 additions and 20 deletions.
17 changes: 8 additions & 9 deletions polars/polars-core/src/frame/asof_join/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,17 +29,21 @@ pub struct AsOfOptions {

fn check_asof_columns(a: &Series, b: &Series) -> PolarsResult<()> {
if a.dtype() != b.dtype() {
return Err(PolarsError::ComputeError(
Err(PolarsError::ComputeError(
format!(
"keys used in asof-join must have equal dtypes. We got: left: {:?}\tright: {:?}",
a.dtype(),
b.dtype()
)
.into(),
));
))
} else if a.null_count() > 0 || b.null_count() > 0 {
Err(PolarsError::ComputeError(
"asof join must not have null values in 'on' arguments".into(),
))
} else {
Ok(())
}

Ok(())
}

#[derive(Clone, Copy, Debug, PartialEq, Eq)]
Expand Down Expand Up @@ -70,11 +74,6 @@ where
) -> PolarsResult<Vec<Option<IdxSize>>> {
let other = self.unpack_series_matching_type(other)?;

if self.null_count() > 0 || other.null_count() > 0 {
return Err(PolarsError::ComputeError(
"asof join must not have null values in 'on' arguments".into(),
));
}
// cont_slice requires a single chunk
let ca = self.rechunk();
let other = other.rechunk();
Expand Down
2 changes: 1 addition & 1 deletion polars/polars-time/src/windows/groupby.rs
Original file line number Diff line number Diff line change
Expand Up @@ -366,7 +366,7 @@ fn partially_check_sorted(time: &[i64]) {
assert!(time[..std::cmp::min(time.len(), 10)].windows(2).filter_map(|w| match w[0].cmp(&w[1]) {
Ordering::Equal => None,
t => Some(t)
}).all_equal(), "subslice check showed that the values in `groupby_rolling` were not sorted. Pleasure ensure the index column is sorted.");
}).all_equal(), "Subslice check showed that the values in `groupby_rolling` were not sorted. Pleasure ensure the index column is sorted.");
}
}

Expand Down
11 changes: 7 additions & 4 deletions py-polars/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,9 @@ top_k = ["polars/top_k"]
propagate_nans = ["polars/propagate_nans"]
sql = ["polars/sql"]
build_info = ["dep:pyo3-built", "dep:built"]
performant = ["polars/performant"]
timezones = ["polars/timezones"]
cse = ["polars/cse"]

all = [
"json",
Expand All @@ -82,19 +85,21 @@ all = [
"lazy_regex",
"csv-file",
"extract_jsonpath",
"polars/timezones",
"timezones",
"object",
"pivot",
"top_k",
"build_info",
# we need to add this, as maturin fails if we don't
# remove this once polars-algo is released
"polars/algo",
"polars/cse",
"cse",
"propagate_nans",
"polars/groupby_list",
"sql",
"polars/dtype-binary",
"streaming",
"performant",
]

# we cannot conditionaly activate simd
Expand All @@ -104,7 +109,6 @@ all = [
default = [
"all",
"nightly",
"streaming",
]

[dependencies.polars]
Expand All @@ -118,7 +122,6 @@ features = [
"temporal",
"random",
"fmt",
"performant",
"dtype-full",
"rows",
"private",
Expand Down
1 change: 1 addition & 0 deletions py-polars/src/dataframe.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ use polars_core::frame::*;
use polars_core::prelude::QuantileInterpolOptions;
use polars_core::utils::arrow::compute::cast::CastOptions;
use polars_core::utils::try_get_supertype;
#[cfg(feature = "pivot")]
use polars_lazy::frame::pivot::{pivot, pivot_stable};
use pyo3::prelude::*;
use pyo3::types::{PyDict, PyList, PyTuple};
Expand Down
9 changes: 7 additions & 2 deletions py-polars/src/lazy/dataframe.rs
Original file line number Diff line number Diff line change
Expand Up @@ -353,14 +353,19 @@ impl PyLazyFrame {
streaming: bool,
) -> PyLazyFrame {
let ldf = self.ldf.clone();
let ldf = ldf
let mut ldf = ldf
.with_type_coercion(type_coercion)
.with_predicate_pushdown(predicate_pushdown)
.with_simplify_expr(simplify_expr)
.with_slice_pushdown(slice_pushdown)
.with_common_subplan_elimination(cse)
.with_streaming(streaming)
.with_projection_pushdown(projection_pushdown);

#[cfg(feature = "cse")]
{
ldf = ldf.with_common_subplan_elimination(cse);
}

ldf.into()
}

Expand Down
22 changes: 20 additions & 2 deletions py-polars/src/lazy/dsl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -79,12 +79,20 @@ impl PyExpr {

pub fn __getstate__(&self, py: Python) -> PyResult<PyObject> {
// Used in pickle/pickling
let s = serde_json::to_string(&self.inner).unwrap();
Ok(PyBytes::new(py, s.as_bytes()).to_object(py))
#[cfg(feature = "json")]
{
let s = serde_json::to_string(&self.inner).unwrap();
Ok(PyBytes::new(py, s.as_bytes()).to_object(py))
}
#[cfg(not(feature = "json"))]
{
panic!("activate 'json' feature")
}
}

pub fn __setstate__(&mut self, py: Python, state: PyObject) -> PyResult<()> {
// Used in pickle/pickling
#[cfg(feature = "json")]
match state.extract::<&PyBytes>(py) {
Ok(s) => {
// Safety
Expand All @@ -100,6 +108,11 @@ impl PyExpr {
}
Err(e) => Err(e),
}

#[cfg(not(feature = "json"))]
{
panic!("activate 'json' feature")
}
}

pub fn alias(&self, name: &str) -> PyExpr {
Expand Down Expand Up @@ -651,6 +664,7 @@ impl PyExpr {
.into()
}

#[cfg(feature = "lazy_regex")]
pub fn str_replace(&self, pat: PyExpr, val: PyExpr, literal: bool) -> PyExpr {
self.inner
.clone()
Expand All @@ -659,6 +673,7 @@ impl PyExpr {
.into()
}

#[cfg(feature = "lazy_regex")]
pub fn str_replace_all(&self, pat: PyExpr, val: PyExpr, literal: bool) -> PyExpr {
self.inner
.clone()
Expand Down Expand Up @@ -924,6 +939,7 @@ impl PyExpr {
self.inner.clone().dt().with_time_unit(tu.0).into()
}

#[cfg(feature = "timezones")]
pub fn dt_with_time_zone(&self, tz: Option<TimeZone>) -> PyExpr {
self.inner.clone().dt().with_time_zone(tz).into()
}
Expand All @@ -932,10 +948,12 @@ impl PyExpr {
self.inner.clone().dt().cast_time_unit(tu.0).into()
}

#[cfg(feature = "timezones")]
pub fn dt_cast_time_zone(&self, tz: String) -> PyExpr {
self.inner.clone().dt().cast_time_zone(tz).into()
}

#[cfg(feature = "timezones")]
pub fn dt_tz_localize(&self, tz: String) -> PyExpr {
self.inner.clone().dt().tz_localize(tz).into()
}
Expand Down
9 changes: 8 additions & 1 deletion py-polars/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,14 @@ fn spearman_rank_corr(
ddof: u8,
propagate_nans: bool,
) -> dsl::PyExpr {
polars::lazy::dsl::spearman_rank_corr(a.inner, b.inner, ddof, propagate_nans).into()
#[cfg(feature = "propagate_nans")]
{
polars::lazy::dsl::spearman_rank_corr(a.inner, b.inner, ddof, propagate_nans).into()
}
#[cfg(not(feature = "propagate_nans"))]
{
panic!("activate 'popagate_nans'")
}
}

#[pyfunction]
Expand Down
2 changes: 1 addition & 1 deletion py-polars/tests/unit/test_errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,7 @@ def test_err_asof_join_null_values() -> None:
}
)
with pytest.raises(
pl.ComputeError, match="Keys are not allowed to have null values in asof join."
pl.ComputeError, match=".sof join must not have null values in 'on' argument"
):
(
df_coor.sort("timestamp").join_asof(
Expand Down

0 comments on commit 73e8274

Please sign in to comment.