Skip to content

Commit

Permalink
pushdown slice to sort nodes (#3159)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Apr 16, 2022
1 parent 527c80a commit 32c3ee1
Show file tree
Hide file tree
Showing 7 changed files with 57 additions and 8 deletions.
12 changes: 9 additions & 3 deletions polars/polars-core/src/frame/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1683,7 +1683,7 @@ impl DataFrame {
self.rechunk();
let by_column = self.select_series(by_column)?;
let reverse = reverse.into_vec();
self.columns = self.sort_impl(by_column, reverse, false)?.columns;
self.columns = self.sort_impl(by_column, reverse, false, None)?.columns;
Ok(self)
}

Expand All @@ -1694,6 +1694,7 @@ impl DataFrame {
by_column: Vec<Series>,
reverse: Vec<bool>,
nulls_last: bool,
slice: Option<(i64, usize)>,
) -> Result<Self> {
// note that the by_column argument also contains evaluated expression from polars-lazy
// that may not even be present in this dataframe.
Expand All @@ -1702,7 +1703,7 @@ impl DataFrame {
// as expressions are not present (they are renamed to _POLARS_SORT_COLUMN_i.
let first_reverse = reverse[0];
let first_by_column = by_column[0].name().to_string();
let take = match by_column.len() {
let mut take = match by_column.len() {
1 => {
let s = &by_column[0];
s.argsort(SortOptions {
Expand All @@ -1722,6 +1723,11 @@ impl DataFrame {
}
}
};

if let Some((offset, len)) = slice {
take = take.slice(offset, len);
}

// Safety:
// the created indices are in bounds
let mut df = if std::env::var("POLARS_VERT_PAR").is_ok() {
Expand Down Expand Up @@ -1773,7 +1779,7 @@ impl DataFrame {
let by_column = vec![df.column(by_column)?.clone()];
let reverse = vec![options.descending];
df.columns = df
.sort_impl(by_column, reverse, options.nulls_last)?
.sort_impl(by_column, reverse, options.nulls_last, None)?
.columns;
Ok(df)
}
Expand Down
1 change: 1 addition & 0 deletions polars/polars-lazy/src/logical_plan/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -370,6 +370,7 @@ impl LogicalPlanBuilder {
args: SortArguments {
reverse,
nulls_last: null_last,
slice: None,
},
}
.into()
Expand Down
12 changes: 12 additions & 0 deletions polars/polars-lazy/src/logical_plan/optimizer/slice_pushdown.rs
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,19 @@ impl SlicePushDown {
maintain_order,
options
})
}
(Sort {input, by_column, mut args}, Some(state)) => {
// first restart optimization in inputs and get the updated LP
let input_lp = lp_arena.take(input);
let input_lp = self.pushdown(input_lp, None, lp_arena, expr_arena)?;
let input= lp_arena.add(input_lp);

args.slice = Some((state.offset, state.len as usize));
Ok(Sort {
input,
by_column,
args
})
}
(Slice {
input,
Expand Down
1 change: 1 addition & 0 deletions polars/polars-lazy/src/logical_plan/options.rs
Original file line number Diff line number Diff line change
Expand Up @@ -137,4 +137,5 @@ pub struct SortArguments {
pub(crate) reverse: Vec<bool>,
// Can only be true in case of a single column.
pub(crate) nulls_last: bool,
pub(crate) slice: Option<(i64, usize)>,
}
1 change: 1 addition & 0 deletions polars/polars-lazy/src/physical_plan/executors/sort.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ impl Executor for SortExec {
by_columns,
std::mem::take(&mut self.args.reverse),
self.args.nulls_last,
self.args.slice,
)
}
}
36 changes: 32 additions & 4 deletions polars/polars-lazy/src/tests/optimization_checks.rs
Original file line number Diff line number Diff line change
Expand Up @@ -175,11 +175,12 @@ pub fn test_slice_pushdown_join() -> Result<()> {

let (mut expr_arena, mut lp_arena) = get_arenas();
let lp = q.clone().optimize(&mut lp_arena, &mut expr_arena).unwrap();
assert!((&lp_arena).iter(lp).any(|(_, lp)| {
assert!((&lp_arena).iter(lp).all(|(_, lp)| {
use ALogicalPlan::*;
match lp {
Join { options, .. } => options.slice == Some((1, 3)),
_ => false,
Slice { .. } => false,
_ => true,
}
}));
let out = q.collect()?;
Expand All @@ -203,11 +204,12 @@ pub fn test_slice_pushdown_groupby() -> Result<()> {

let (mut expr_arena, mut lp_arena) = get_arenas();
let lp = q.clone().optimize(&mut lp_arena, &mut expr_arena).unwrap();
assert!((&lp_arena).iter(lp).any(|(_, lp)| {
assert!((&lp_arena).iter(lp).all(|(_, lp)| {
use ALogicalPlan::*;
match lp {
Aggregate { options, .. } => options.slice == Some((1, 3)),
_ => false,
Slice { .. } => false,
_ => true,
}
}));
let out = q.collect()?;
Expand All @@ -216,6 +218,32 @@ pub fn test_slice_pushdown_groupby() -> Result<()> {
Ok(())
}

#[test]
pub fn test_slice_pushdown_sort() -> Result<()> {
let _guard = SINGLE_LOCK.lock().unwrap();
let q = scan_foods_parquet(false).limit(100);

let q = q.sort("category", SortOptions::default()).slice(1, 3);

// test if optimization continued beyond the sort node
assert!(slice_at_scan(q.clone()));

let (mut expr_arena, mut lp_arena) = get_arenas();
let lp = q.clone().optimize(&mut lp_arena, &mut expr_arena).unwrap();
assert!((&lp_arena).iter(lp).all(|(_, lp)| {
use ALogicalPlan::*;
match lp {
Sort { args, .. } => args.slice == Some((1, 3)),
Slice { .. } => false,
_ => true,
}
}));
let out = q.collect()?;
assert_eq!(out.shape(), (3, 4));

Ok(())
}

#[test]
pub fn test_predicate_block_cast() -> Result<()> {
let df = df![
Expand Down
2 changes: 1 addition & 1 deletion py-polars/src/dataframe.rs
Original file line number Diff line number Diff line change
Expand Up @@ -684,7 +684,7 @@ impl PyDataFrame {
Ok(df.into())
}

pub fn rechunk(&mut self) -> Self {
pub fn rechunk(&self) -> Self {
self.df.agg_chunks().into()
}

Expand Down

0 comments on commit 32c3ee1

Please sign in to comment.