Skip to content

Commit

Permalink
fix(rust, python): don't set sorted flag in argsort (#5410)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Nov 3, 2022
1 parent d82ef3f commit 6669a84
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,6 @@ pub(crate) fn argsort_multiple_impl<T: PartialOrd + Send + IsFloat + Copy>(
}
});
let ca: NoNull<IdxCa> = vals.into_iter().map(|(idx, _v)| idx).collect_trusted();
let mut ca = ca.into_inner();
ca.set_sorted(reverse[0]);
Ok(ca)
// Don't set to sorted. Argsort indices are not sorted.
Ok(ca.into_inner())
}
34 changes: 34 additions & 0 deletions py-polars/tests/unit/test_sort.py
Original file line number Diff line number Diff line change
Expand Up @@ -288,3 +288,37 @@ def test_explicit_list_agg_sort_in_groupby() -> None:
.sort("A")
.frame_equal(df.groupby("A").agg(pl.col("B").sort(reverse=True)).sort("A"))
)


def test_sorted_join_query_5406() -> None:
df = (
pl.DataFrame(
{
"Datetime": [
"2022-11-02 08:00:00",
"2022-11-02 08:00:00",
"2022-11-02 08:01:00",
"2022-11-02 07:59:00",
"2022-11-02 08:02:00",
"2022-11-02 08:02:00",
],
"Group": ["A", "A", "A", "B", "B", "B"],
"Value": [1, 2, 1, 1, 2, 1],
}
)
.with_column(pl.col("Datetime").str.strptime(pl.Datetime, "%Y-%m-%d %H:%M:%S"))
.with_row_count("RowId")
)

df1 = df.sort(by=["Datetime", "RowId"])

filter1 = (
df1.groupby(["Datetime", "Group"])
.agg([pl.all().sort_by("Value", reverse=True).first()])
.sort(["Datetime", "RowId"])
)

out = df1.join(filter1, on="RowId", how="left").select(
pl.exclude(["Datetime_right", "Group_right"])
)
assert out["Value_right"].to_list() == [1, None, 2, 1, 2, None]

0 comments on commit 6669a84

Please sign in to comment.