Skip to content

Commit

Permalink
fix(rust, python): block is_null predicate in asof join (#5358)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Oct 27, 2022
1 parent 861e3e3 commit e97088a
Show file tree
Hide file tree
Showing 3 changed files with 57 additions and 3 deletions.
1 change: 0 additions & 1 deletion polars/polars-core/src/frame/hash_join/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,6 @@ pub enum JoinType {
Inner,
Outer,
#[cfg(feature = "asof_join")]
#[cfg_attr(feature = "serde", serde(skip))]
AsOf(AsOfOptions),
Cross,
#[cfg(feature = "semi_anti_join")]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,20 @@ use crate::utils::{aexprs_to_schema, check_input_node, has_aexpr};
#[derive(Default)]
pub struct PredicatePushDown {}

fn join_produces_null(how: &JoinType) -> bool {
#[cfg(feature = "asof_join")]
{
matches!(
how,
JoinType::Left | JoinType::Outer | JoinType::Cross | JoinType::AsOf(_)
)
}
#[cfg(not(feature = "asof_join"))]
{
matches!(how, JoinType::Left | JoinType::Outer | JoinType::Cross)
}
}

impl PredicatePushDown {
fn optional_apply_predicate(
&self,
Expand Down Expand Up @@ -427,7 +441,7 @@ impl PredicatePushDown {
// join might create null values.
|| has_aexpr(predicate, expr_arena, checks_nulls)
// only these join types produce null values
&& matches!(&options.how, JoinType::Left | JoinType::Outer | JoinType::Cross){
&& join_produces_null(&options.how) {
local_predicates.push(predicate);
continue;
}
Expand Down
43 changes: 42 additions & 1 deletion py-polars/tests/unit/test_predicates.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from datetime import date, timedelta
from datetime import date, datetime, timedelta

import polars as pl

Expand Down Expand Up @@ -38,3 +38,44 @@ def test_when_then_implicit_none() -> None:
"literal": ["Foo", "Foo", "Foo", None, None, None],
"bar": ["Foo", "Foo", "Foo", None, None, None],
}


def test_predicate_null_block_asof_join() -> None:
left = pl.DataFrame(
{
"id": [1, 2, 3, 4],
"timestamp": [
datetime(2022, 1, 1, 10, 0),
datetime(2022, 1, 1, 10, 1),
datetime(2022, 1, 1, 10, 2),
datetime(2022, 1, 1, 10, 3),
],
}
).lazy()

right = pl.DataFrame(
{
"id": [1, 2, 3] * 2,
"timestamp": [
datetime(2022, 1, 1, 9, 59, 50),
datetime(2022, 1, 1, 10, 0, 50),
datetime(2022, 1, 1, 10, 1, 50),
datetime(2022, 1, 1, 8, 0, 0),
datetime(2022, 1, 1, 8, 0, 0),
datetime(2022, 1, 1, 8, 0, 0),
],
"value": ["a", "b", "c"] * 2,
}
).lazy()

assert left.join_asof(right, by="id", on="timestamp").filter(
pl.col("value").is_not_null()
).collect().to_dict(False) == {
"id": [1, 2, 3],
"timestamp": [
datetime(2022, 1, 1, 10, 0),
datetime(2022, 1, 1, 10, 1),
datetime(2022, 1, 1, 10, 2),
],
"value": ["a", "b", "c"],
}

0 comments on commit e97088a

Please sign in to comment.