Skip to content

Commit

Permalink
feat(rust, python): error on invalid asof join inputs (#5100)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Oct 5, 2022
1 parent 5aebd7a commit 4034d91
Show file tree
Hide file tree
Showing 2 changed files with 47 additions and 10 deletions.
21 changes: 12 additions & 9 deletions polars/polars-core/src/frame/asof_join/groups.rs
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ fn asof_join_by_numeric<T, S>(
left_asof: &ChunkedArray<T>,
right_asof: &ChunkedArray<T>,
tolerance: Option<AnyValue<'static>>,
) -> Vec<Option<IdxSize>>
) -> PolarsResult<Vec<Option<IdxSize>>>
where
T: PolarsNumericType,
S: PolarsNumericType,
Expand All @@ -162,10 +162,13 @@ where
None => (join_asof_backward_with_indirection, T::Native::zero()),
};
let left_asof = left_asof.rechunk();
let left_asof = left_asof.cont_slice().unwrap();
let err = |_: PolarsError| {
PolarsError::ComputeError("Keys are not allowed to have null values in asof join.".into())
};
let left_asof = left_asof.cont_slice().map_err(err)?;

let right_asof = right_asof.rechunk();
let right_asof = right_asof.cont_slice().unwrap();
let right_asof = right_asof.cont_slice().map_err(err)?;

let n_threads = POOL.current_num_threads();
let splitted_left = split_ca(by_left, n_threads).unwrap();
Expand Down Expand Up @@ -197,7 +200,7 @@ where
debug_assert!(n_tables.is_power_of_two());

// next we probe the right relation
POOL.install(|| {
Ok(POOL.install(|| {
vals_left
.into_par_iter()
.zip(offsets)
Expand Down Expand Up @@ -245,7 +248,7 @@ where
})
.flatten()
.collect()
})
}))
}

fn asof_join_by_utf8<T>(
Expand Down Expand Up @@ -515,13 +518,13 @@ impl DataFrame {
let right_by = right_by_s.bit_repr_large();
asof_join_by_numeric(
&left_by, &right_by, left_asof, right_asof, tolerance,
)
)?
} else {
let left_by = left_by_s.bit_repr_small();
let right_by = right_by_s.bit_repr_small();
asof_join_by_numeric(
&left_by, &right_by, left_asof, right_asof, tolerance,
)
)?
}
}
}
Expand Down Expand Up @@ -561,13 +564,13 @@ impl DataFrame {
let right_by = right_by_s.bit_repr_large();
asof_join_by_numeric(
&left_by, &right_by, left_asof, right_asof, tolerance,
)
)?
} else {
let left_by = left_by_s.bit_repr_small();
let right_by = right_by_s.bit_repr_small();
asof_join_by_numeric(
&left_by, &right_by, left_asof, right_asof, tolerance,
)
)?
}
}
}
Expand Down
36 changes: 35 additions & 1 deletion py-polars/tests/unit/test_errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import io
import typing
from datetime import date
from datetime import date, datetime, timedelta

import numpy as np
import pytest
Expand Down Expand Up @@ -195,3 +195,37 @@ def test_filter_not_of_type_bool() -> None:
pl.ComputeError, match="Filter predicate must be of type Boolean, got"
):
df.filter(pl.col("json_val").str.json_path_match("$.a"))


def test_err_asof_join_null_values() -> None:
n = 5
start_time = datetime(2021, 9, 30)

df_coor = pl.DataFrame(
{
"vessel_id": [1] * n + [2] * n,
"timestamp": [start_time + timedelta(hours=h) for h in range(n)]
+ [start_time + timedelta(hours=h) for h in range(n)],
}
)

df_voyages = pl.DataFrame(
{
"vessel_id": [1, None],
"voyage_id": [1, None],
"voyage_start": [datetime(2022, 1, 1), None],
"voyage_end": [datetime(2022, 1, 20), None],
}
)
with pytest.raises(
pl.ComputeError, match="Keys are not allowed to have null values in asof join."
):
(
df_coor.sort("timestamp").join_asof(
df_voyages.sort("voyage_start"),
right_on="voyage_start",
left_on="timestamp",
by="vessel_id",
strategy="backward",
)
)

0 comments on commit 4034d91

Please sign in to comment.