Skip to content

Commit

Permalink
expose nulls_last argument for argsort/DataFrame.sort (#2878)
Browse files Browse the repository at this point in the history
* expose nulls_last argument for argsort/DataFrame.sort

* less compiled code sort
  • Loading branch information
ritchie46 committed Mar 12, 2022
1 parent 64be773 commit bc6632c
Show file tree
Hide file tree
Showing 45 changed files with 584 additions and 368 deletions.
2 changes: 2 additions & 0 deletions polars/polars-core/src/chunked_array/ops/aggregate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,7 @@ fn linear_interpol<T: Float>(bounds: &[Option<T>], idx: i64, float_idx: f64) ->
impl<T> ChunkQuantile<f64> for ChunkedArray<T>
where
T: PolarsIntegerType,
T::Native: Ord,
<T::Native as Simd>::Simd: Add<Output = <T::Native as Simd>::Simd>
+ compute::aggregate::Sum<T::Native>
+ compute::aggregate::SimdOrd<T::Native>,
Expand Down Expand Up @@ -623,6 +624,7 @@ macro_rules! impl_quantile_as_series {
impl<T> QuantileAggSeries for ChunkedArray<T>
where
T: PolarsIntegerType,
T::Native: Ord,
<T::Native as Simd>::Simd: Add<Output = <T::Native as Simd>::Simd>
+ compute::aggregate::Sum<T::Native>
+ compute::aggregate::SimdOrd<T::Native>,
Expand Down
2 changes: 1 addition & 1 deletion polars/polars-core/src/chunked_array/ops/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -517,7 +517,7 @@ pub trait ChunkSort<T> {
fn sort(&self, reverse: bool) -> ChunkedArray<T>;

/// Retrieve the indexes needed to sort this array.
fn argsort(&self, reverse: bool) -> IdxCa;
fn argsort(&self, options: SortOptions) -> IdxCa;

/// Retrieve the indexes need to sort this and the other arrays.
fn argsort_multiple(&self, _other: &[Series], _reverse: &[bool]) -> Result<IdxCa> {
Expand Down
75 changes: 75 additions & 0 deletions polars/polars-core/src/chunked_array/ops/sort/argsort.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
use super::*;

fn default_order<T: PartialOrd>(a: &(IdxSize, T), b: &(IdxSize, T)) -> Ordering {
a.1.partial_cmp(&b.1).unwrap()
}

fn reverse_order<T: PartialOrd>(a: &(IdxSize, T), b: &(IdxSize, T)) -> Ordering {
b.1.partial_cmp(&a.1).unwrap()
}

pub(super) fn argsort<I, J, K>(
name: &str,
iters: I,
options: SortOptions,
null_count: usize,
len: usize,
) -> IdxCa
where
I: IntoIterator<Item = J>,
J: IntoIterator<Item = Option<K>>,
K: PartialOrd + Send + Sync,
{
let reverse = options.descending;
let nulls_last = options.nulls_last;

let mut vals = Vec::with_capacity(len - null_count);

// if we sort reverse, the nulls are last
// and need to be extended to the indices in reverse order
let null_cap = if reverse || nulls_last {
null_count
// if we sort normally, the nulls are first
// and can be extended with the sorted indices
} else {
len
};
let mut nulls_idx = Vec::with_capacity(null_cap);
let mut count: IdxSize = 0;

for arr_iter in iters {
let iter = arr_iter.into_iter().filter_map(|v| {
let i = count;
count += 1;
match v {
Some(v) => Some((i, v)),
None => {
// Safety:
// we allocated enough
unsafe { nulls_idx.push_unchecked(i) };
None
}
}
});
vals.extend(iter);
}

argsort_branch(vals.as_mut_slice(), reverse, default_order, reverse_order);

let iter = vals.into_iter().map(|(idx, _v)| idx);
let idx = if reverse || nulls_last {
let mut idx = Vec::with_capacity(len);
idx.extend(iter);
idx.extend(nulls_idx.into_iter().rev());
idx
} else {
let ptr = nulls_idx.as_ptr() as usize;
nulls_idx.extend(iter);
// we had a realloc
debug_assert_eq!(nulls_idx.as_ptr() as usize, ptr);
nulls_idx
};

let arr = IdxArr::from_data_default(Buffer::from(idx), None);
IdxCa::from_chunks(name, vec![Arc::new(arr)])
}
48 changes: 24 additions & 24 deletions polars/polars-core/src/chunked_array/ops/sort/categorical.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,16 @@
use super::*;
use crate::utils::NoNull;

/// Default sorting nulls
pub fn order_default_null<T: PartialOrd>(a: &Option<T>, b: &Option<T>) -> Ordering {
sort_with_nulls(a, b)
}

/// Default sorting nulls
pub fn order_reverse_null<T: PartialOrd>(a: &Option<T>, b: &Option<T>) -> Ordering {
sort_with_nulls(b, a)
}

impl CategoricalChunked {
#[must_use]
pub fn sort_with(&self, options: SortOptions) -> CategoricalChunked {
Expand Down Expand Up @@ -63,31 +73,18 @@ impl CategoricalChunked {
}

/// Retrieve the indexes needed to sort this array.
pub fn argsort(&self, reverse: bool) -> IdxCa {
pub fn argsort(&self, options: SortOptions) -> IdxCa {
if self.use_lexical_sort() {
let mut count: IdxSize = 0;
// safety: we know the iterators len
let mut vals = self
.iter_str()
.map(|s| {
let i = count;
count += 1;
(i, s)
})
.collect_trusted::<Vec<_>>();

argsort_branch(
vals.as_mut_slice(),
reverse,
|(_, a), (_, b)| order_default_null(a, b),
|(_, a), (_, b)| order_reverse_null(a, b),
);
let ca: NoNull<IdxCa> = vals.into_iter().map(|(idx, _v)| idx).collect_trusted();
let mut ca = ca.into_inner();
ca.rename(self.name());
ca
let iters = [self.iter_str()];
argsort::argsort(
self.name(),
iters,
options,
self.logical().null_count(),
self.len(),
)
} else {
self.logical().argsort(reverse)
self.logical().argsort(options)
}
}

Expand Down Expand Up @@ -140,7 +137,10 @@ mod test {
let out = ca.sort(false);
assert_order(&out, init);

let out = ca_lexical.argsort(false);
let out = ca_lexical.argsort(SortOptions {
descending: false,
..Default::default()
});
assert_eq!(out.into_no_null_iter().collect::<Vec<_>>(), &[2, 1, 0, 3]);
}

Expand Down

0 comments on commit bc6632c

Please sign in to comment.