Skip to content

Commit

Permalink
fix sorted unique (#3837)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Jun 28, 2022
1 parent dc51044 commit 8adb1f6
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 21 deletions.
35 changes: 14 additions & 21 deletions polars/polars-core/src/frame/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2894,37 +2894,30 @@ impl DataFrame {
Some(s) => s.iter().map(|s| &**s).collect(),
None => self.get_column_names(),
};
let gb = self.groupby(names)?;
let groups = gb.get_groups().unwrap_idx();

let finish_maintain_order = |mut groups: Vec<IdxSize>| {
groups.sort_unstable();
let ca = IdxCa::from_vec("", groups);
unsafe { self.take_unchecked(&ca) }
};

let df = match (keep, maintain_order) {
let columns = match (keep, maintain_order) {
(First, true) => {
let iter = groups.iter().map(|g| g.0);
let groups = iter.collect_trusted::<Vec<_>>();
finish_maintain_order(groups)
let gb = self.groupby_stable(names)?;
let groups = gb.get_groups();
self.apply_columns_par(&|s| unsafe { s.agg_first(groups) })
}
(Last, true) => {
let iter = groups.iter().map(|g| g.1[g.1.len() - 1]);
let groups = iter.collect_trusted::<Vec<_>>();
finish_maintain_order(groups)
let gb = self.groupby_stable(names)?;
let groups = gb.get_groups();
self.apply_columns_par(&|s| unsafe { s.agg_last(groups) })
}
(First, false) => {
let iter = groups.iter().map(|g| g.0 as usize);
unsafe { self.take_iter_unchecked(iter) }
let gb = self.groupby(names)?;
let groups = gb.get_groups();
self.apply_columns_par(&|s| unsafe { s.agg_first(groups) })
}
(Last, false) => {
let iter = groups.iter().map(|g| g.1[g.1.len() - 1] as usize);
unsafe { self.take_iter_unchecked(iter) }
let gb = self.groupby(names)?;
let groups = gb.get_groups();
self.apply_columns_par(&|s| unsafe { s.agg_last(groups) })
}
};

Ok(df)
Ok(DataFrame::new_no_checks(columns))
}

/// Get a mask of all the unique rows in the `DataFrame`.
Expand Down
10 changes: 10 additions & 0 deletions py-polars/tests/test_datelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -1194,3 +1194,13 @@ def test_date_offset() -> None:
datetime(2017, 11, 1, 0, 0),
datetime(2018, 11, 1, 0, 0),
]


def test_sorted_unique() -> None:
assert (
pl.DataFrame(
[pl.Series("dt", [date(2015, 6, 24), date(2015, 6, 23)], dtype=pl.Date)]
)
.sort("dt")
.unique()
).to_dict(False) == {"dt": [date(2015, 6, 23), date(2015, 6, 24)]}

0 comments on commit 8adb1f6

Please sign in to comment.