Skip to content

Commit

Permalink
make sort by multiple columns parallel (#3549)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Jun 1, 2022
1 parent ed825ab commit 9654f7d
Show file tree
Hide file tree
Showing 7 changed files with 40 additions and 15 deletions.
2 changes: 2 additions & 0 deletions polars/polars-arrow/src/data_types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ impl IsFloat for u8 {}
impl IsFloat for u16 {}
impl IsFloat for u32 {}
impl IsFloat for u64 {}
impl IsFloat for &str {}
impl<T: IsFloat> IsFloat for Option<T> {}

macro_rules! impl_is_float {
($tp:ty) => {
Expand Down
1 change: 1 addition & 0 deletions polars/polars-core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ default = ["docs", "temporal", "private"]
lazy = ["sort_multiple"]

# ~40% faster collect, needed until trustedlength iter stabilizes
# more fast paths
performant = []

# extra utilities for Utf8Chunked
Expand Down
30 changes: 25 additions & 5 deletions polars/polars-core/src/chunked_array/ops/sort/argsort_multiple.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use super::*;
use polars_arrow::data_types::IsFloat;

pub(crate) fn args_validate<T: PolarsDataType>(
ca: &ChunkedArray<T>,
Expand All @@ -23,24 +24,43 @@ pub(crate) fn args_validate<T: PolarsDataType>(
Ok(())
}

pub(crate) fn argsort_multiple_impl<T: PartialOrd>(
mut vals: Vec<(IdxSize, Option<T>)>,
fn sort_cmp<T: PartialOrd + IsFloat + Copy>(a: &T, b: &T) -> Ordering {
if T::is_float() {
match (a.is_nan(), b.is_nan()) {
// safety: we checked nans
(false, false) => unsafe { a.partial_cmp(b).unwrap_unchecked() },
(true, true) => Ordering::Equal,
(true, false) => Ordering::Greater,
(false, true) => Ordering::Less,
}
} else {
// no floats, so we can compare unchecked
unsafe { a.partial_cmp(b).unwrap_unchecked() }
}
}

pub(crate) fn argsort_multiple_impl<T: PartialOrd + Send + IsFloat + Copy>(
mut vals: Vec<(IdxSize, T)>,
other: &[Series],
reverse: &[bool],
) -> Result<IdxCa> {
assert_eq!(reverse.len() - 1, other.len());
let compare_inner: Vec<_> = other
.iter()
.map(|s| s.into_partial_ord_inner())
.collect_trusted();

vals.sort_by(
|tpl_a, tpl_b| match (reverse[0], sort_with_nulls(&tpl_a.1, &tpl_b.1)) {
let first_reverse = reverse[0];
vals.par_sort_by(
|tpl_a, tpl_b| match (first_reverse, sort_cmp(&tpl_a.1, &tpl_b.1)) {
// if ordering is equal, we check the other arrays until we find a non-equal ordering
// if we have exhausted all arrays, we keep the equal ordering.
(_, Ordering::Equal) => {
let idx_a = tpl_a.0 as usize;
let idx_b = tpl_b.0 as usize;
ordering_other_columns(&compare_inner, &reverse[1..], idx_a, idx_b)
unsafe {
ordering_other_columns(&compare_inner, reverse.get_unchecked(1..), idx_a, idx_b)
}
}
(true, Ordering::Less) => Ordering::Greater,
(true, Ordering::Greater) => Ordering::Less,
Expand Down
10 changes: 10 additions & 0 deletions polars/polars-core/src/chunked_array/ops/sort/categorical.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,16 @@
use super::*;
use crate::utils::NoNull;

/// Sort with null values, to reverse, swap the arguments.
fn sort_with_nulls<T: PartialOrd>(a: &Option<T>, b: &Option<T>) -> Ordering {
match (a, b) {
(Some(a), Some(b)) => a.partial_cmp(b).unwrap(),
(None, Some(_)) => Ordering::Less,
(Some(_), None) => Ordering::Greater,
(None, None) => Ordering::Equal,
}
}

/// Default sorting nulls
pub fn order_default_null<T: PartialOrd>(a: &Option<T>, b: &Option<T>) -> Ordering {
sort_with_nulls(a, b)
Expand Down
10 changes: 0 additions & 10 deletions polars/polars-core/src/chunked_array/ops/sort/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,16 +45,6 @@ fn order_reverse_flt<T: Float>(a: &T, b: &T) -> Ordering {
order_default_flt(b, a)
}

/// Sort with null values, to reverse, swap the arguments.
fn sort_with_nulls<T: PartialOrd>(a: &Option<T>, b: &Option<T>) -> Ordering {
match (a, b) {
(Some(a), Some(b)) => a.partial_cmp(b).unwrap(),
(None, Some(_)) => Ordering::Less,
(Some(_), None) => Ordering::Greater,
(None, None) => Ordering::Equal,
}
}

fn sort_branch<T, Fd, Fr>(
slice: &mut [T],
reverse: bool,
Expand Down
1 change: 1 addition & 0 deletions polars/polars-core/src/datatypes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -601,6 +601,7 @@ impl PartialOrd for AnyValue<'_> {
(Int64(l), Int64(r)) => l.partial_cmp(r),
(Float32(l), Float32(r)) => l.partial_cmp(r),
(Float64(l), Float64(r)) => l.partial_cmp(r),
(Utf8(l), Utf8(r)) => l.partial_cmp(r),
_ => None,
}
}
Expand Down
1 change: 1 addition & 0 deletions py-polars/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 9654f7d

Please sign in to comment.