Skip to content

Commit

Permalink
perf(rust): do not rechunk left joins (#5066)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Oct 1, 2022
1 parent 66c98b9 commit 649648e
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 4 deletions.
12 changes: 9 additions & 3 deletions polars/polars-core/src/frame/hash_join/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -355,12 +355,18 @@ impl DataFrame {

#[cfg(feature = "chunked_ids")]
{
if _check_rechunk {
// a left join create chunked-ids
// the others not yet.
// TODO! change this to other join types once they support chunked-id joins
if _check_rechunk
&& !(matches!(how, JoinType::Left)
|| std::env::var("POLARS_NO_CHUNKED_JOIN").is_ok())
{
let mut left = Cow::Borrowed(self);
let mut right = Cow::Borrowed(other);
if self.should_rechunk() {
if _verbose {
eprintln!("join triggered a rechunk of the left dataframe: {} columns are affected", self.width());
eprintln!("{:?} join triggered a rechunk of the left dataframe: {} columns are affected", how, self.width());
}

let mut tmp_left = self.clone();
Expand All @@ -369,7 +375,7 @@ impl DataFrame {
}
if other.should_rechunk() {
if _verbose {
eprintln!("join triggered a rechunk of the right dataframe: {} columns are affected", other.width());
eprintln!("{:?} join triggered a rechunk of the right dataframe: {} columns are affected", how, other.width());
}
let mut tmp_right = other.clone();
tmp_right.as_single_chunk_par();
Expand Down
4 changes: 3 additions & 1 deletion polars/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -358,7 +358,9 @@
//! * `POLARS_ALLOW_EXTENSION` -> allows for `[ObjectChunked<T>]` to be used in arrow, opening up possibilities like using
//! `T` in complex lazy expressions. However this does require `unsafe` code allow this.
//! * `POLARS_NO_PARQUET_STATISTICS` -> if set, statistics in parquet files are ignored.
//! * `POLARS_PANIC_ON_ERR` -> panic instead of returning an Error..
//! * `POLARS_PANIC_ON_ERR` -> panic instead of returning an Error.
//! * `POLARS_NO_CHUNKED_JOIN` -> Force rechunk before joins.
//!
//!
//! ## User Guide
//! If you want to read more, [check the User Guide](https://pola-rs.github.io/polars-book/).
Expand Down

0 comments on commit 649648e

Please sign in to comment.