Skip to content

Commit

Permalink
allow nulls last in sort by expressions (#4030)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Jul 15, 2022
1 parent fd77f1f commit 4537237
Show file tree
Hide file tree
Showing 5 changed files with 25 additions and 11 deletions.
5 changes: 3 additions & 2 deletions polars/polars-lazy/src/frame/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -291,13 +291,14 @@ impl LazyFrame {
/// /// Sort DataFrame by 'sepal.width' column
/// fn example(df: DataFrame) -> LazyFrame {
/// df.lazy()
/// .sort_by_exprs(vec![col("sepal.width")], vec![false])
/// .sort_by_exprs(vec![col("sepal.width")], vec![false], false)
/// }
/// ```
pub fn sort_by_exprs<E: AsRef<[Expr]>, B: AsRef<[bool]>>(
self,
by_exprs: E,
reverse: B,
nulls_last: bool,
) -> Self {
let by_exprs = by_exprs.as_ref().to_vec();
let reverse = reverse.as_ref().to_vec();
Expand All @@ -307,7 +308,7 @@ impl LazyFrame {
let opt_state = self.get_opt_state();
let lp = self
.get_plan_builder()
.sort(by_exprs, reverse, false)
.sort(by_exprs, reverse, nulls_last)
.build();
Self::from_logical_plan(lp, opt_state)
}
Expand Down
1 change: 1 addition & 0 deletions polars/polars-lazy/src/tests/tpch.rs
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ fn test_q2() -> Result<()> {
.sort_by_exprs(
[cols(["s_acctbal", "n_name", "s_name", "p_partkey"])],
[true, false, false, false],
false,
)
.limit(100);

Expand Down
6 changes: 1 addition & 5 deletions py-polars/polars/internals/lazy_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -560,17 +560,13 @@ def sort(
nulls_last
Place null values last. Can only be used if sorted by a single column.
"""
if nulls_last and not isinstance(by, str):
raise ValueError(
"nulls_last can only be combined with 'by' argument of type str"
)
if type(by) is str:
return self._from_pyldf(self._ldf.sort(by, reverse, nulls_last))
if type(reverse) is bool:
reverse = [reverse]

by = pli.selection_to_pyexpr_list(by)
return self._from_pyldf(self._ldf.sort_by_exprs(by, reverse))
return self._from_pyldf(self._ldf.sort_by_exprs(by, reverse, nulls_last))

def collect(
self,
Expand Down
9 changes: 7 additions & 2 deletions py-polars/src/lazy/dataframe.rs
Original file line number Diff line number Diff line change
Expand Up @@ -325,10 +325,15 @@ impl PyLazyFrame {
.into()
}

pub fn sort_by_exprs(&self, by_column: Vec<PyExpr>, reverse: Vec<bool>) -> PyLazyFrame {
pub fn sort_by_exprs(
&self,
by_column: Vec<PyExpr>,
reverse: Vec<bool>,
nulls_last: bool,
) -> PyLazyFrame {
let ldf = self.ldf.clone();
let exprs = py_exprs_to_exprs(by_column);
ldf.sort_by_exprs(exprs, reverse).into()
ldf.sort_by_exprs(exprs, reverse, nulls_last).into()
}
pub fn cache(&self) -> PyLazyFrame {
let ldf = self.ldf.clone();
Expand Down
15 changes: 13 additions & 2 deletions py-polars/tests/test_sort.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,8 +94,6 @@ def test_argsort_nulls() -> None:
None,
None,
]
with pytest.raises(ValueError):
a.to_frame().sort(by=["a", "b"], nulls_last=True)


def test_argsort_window_functions() -> None:
Expand All @@ -120,3 +118,16 @@ def test_sort_nans_3740() -> None:
}
)
assert df.sort("val")["key"].to_list() == [2, 4, 1, 5, 3]


def test_sort_by_exps_nulls_last() -> None:
df = pl.DataFrame(
{
"a": [1, 3, -2, None, 1],
}
).with_row_count()

assert df.sort(pl.col("a") ** 2, nulls_last=True).to_dict(False) == {
"row_nr": [0, 4, 2, 1, 3],
"a": [1, 1, -2, 3, None],
}

0 comments on commit 4537237

Please sign in to comment.