Skip to content

Commit

Permalink
fix sorted window expression (#4349)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Aug 9, 2022
1 parent c03fbd3 commit c1e8174
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 6 deletions.
10 changes: 5 additions & 5 deletions polars/polars-lazy/src/physical_plan/expressions/window.rs
Original file line number Diff line number Diff line change
Expand Up @@ -349,10 +349,7 @@ impl PhysicalExpr for WindowExpr {
// The example below shows the naive version without group tuple mapping

// columns
// a
// b
// a
// a
// a b a a
//
// agg list
// [0, 2, 3]
Expand All @@ -368,6 +365,9 @@ impl PhysicalExpr for WindowExpr {
//
// take by argsorted indexes and voila groups mapped
// [0, 1, 2, 3]

// TODO!
// investigate if sorted arrays can be return directly
let out_column = ac.aggregated();
let mut original_idx = Vec::with_capacity(out_column.len());
match gb.get_groups() {
Expand All @@ -378,7 +378,7 @@ impl PhysicalExpr for WindowExpr {
}
GroupsProxy::Slice { groups, .. } => {
for g in groups {
original_idx.extend(g[0]..g[0] + 1)
original_idx.extend(g[0]..g[0] + g[1])
}
}
};
Expand Down
2 changes: 1 addition & 1 deletion polars/polars-utils/src/sort.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use rayon::{prelude::*, ThreadPool};
/// The second argsort sorts indices from `0` to `len` so can be just assigned to the
/// new index location.
///
/// Besides that we know that all indices are unique ang thus not alias so we can parallelize.
/// Besides that we know that all indices are unique and thus not alias so we can parallelize.
///
/// This sort does not sort in place and will allocate.
///
Expand Down
16 changes: 16 additions & 0 deletions py-polars/tests/test_window.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,3 +212,19 @@ def test_window_functions_list_types() -> None:
"col_list": [[1], [1], [2], [2]],
"list_shifted": [[[], [1]], [[], [1]], [[], [2]], [[], [2]]],
}


def test_sorted_window_expression() -> None:
size = 10
df = pl.DataFrame(
{"a": np.random.randint(10, size=size), "b": np.random.randint(10, size=size)}
)
expr = (pl.col("a") + pl.col("b")).over("b").alias("computed")

out1 = df.with_column(expr).sort("b")

# explicit sort
df = df.sort("b")
out2 = df.with_column(expr)

assert out1.frame_equal(out2)

0 comments on commit c1e8174

Please sign in to comment.