Skip to content

Commit

Permalink
ensure main lazyframe gets file cache opt state (#3981)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Jul 11, 2022
1 parent 5f5acb2 commit cfb8984
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 4 deletions.
4 changes: 3 additions & 1 deletion polars/polars-lazy/src/dsl/functions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -526,11 +526,13 @@ pub fn concat<L: AsRef<[LazyFrame]>>(inputs: L, rechunk: bool) -> Result<LazyFra
.get_mut(0)
.ok_or_else(|| PolarsError::ComputeError("empty container given".into()))?,
);
let opt_state = lf.opt_state;
let mut opt_state = lf.opt_state;
let mut lps = Vec::with_capacity(inputs.len());
lps.push(lf.logical_plan);

for lf in &mut inputs[1..] {
// ensure we enable file caching if any lf has it enabled
opt_state.file_caching |= lf.opt_state.file_caching;
let lp = std::mem::take(&mut lf.logical_plan);
lps.push(lp)
}
Expand Down
13 changes: 10 additions & 3 deletions polars/polars-lazy/src/frame/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -939,12 +939,15 @@ impl LazyFrame {
/// }
/// ```
pub fn join<E: AsRef<[Expr]>>(
self,
mut self,
other: LazyFrame,
left_on: E,
right_on: E,
how: JoinType,
) -> LazyFrame {
// if any of the nodes reads from files we must activate this this plan as well.
self.opt_state.file_caching |= other.opt_state.file_caching;

let left_on = left_on.as_ref().to_vec();
let right_on = right_on.as_ref().to_vec();
self.join_builder()
Expand Down Expand Up @@ -1432,7 +1435,11 @@ impl JoinBuilder {

/// Finish builder
pub fn finish(self) -> LazyFrame {
let opt_state = self.lf.opt_state;
let mut opt_state = self.lf.opt_state;
let other = self.other.expect("with not set");

// if any of the nodes reads from files we must activate this this plan as well.
opt_state.file_caching |= other.opt_state.file_caching;

let suffix = match self.suffix {
None => Cow::Borrowed("_right"),
Expand All @@ -1443,7 +1450,7 @@ impl JoinBuilder {
.lf
.get_plan_builder()
.join(
self.other.expect("with not set").logical_plan,
other.logical_plan,
self.left_on,
self.right_on,
JoinOptions {
Expand Down
9 changes: 9 additions & 0 deletions py-polars/tests/io/test_parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,3 +176,12 @@ def test_chunked_round_trip() -> None:
df.write_parquet(f)
f.seek(0)
assert pl.read_parquet(f).frame_equal(df)


def test_lazy_self_join_file_cache_prop_3979(io_test_dir: str) -> None:
path = os.path.join(io_test_dir, "small.parquet")
a = pl.scan_parquet(path)
b = pl.DataFrame({"a": [1]}).lazy()

assert a.join(b, how="cross").collect().shape == (3, 17)
assert b.join(a, how="cross").collect().shape == (3, 17)

0 comments on commit cfb8984

Please sign in to comment.