Skip to content

Commit

Permalink
perf: Use row-encoding for multiple key group by (#15392)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Mar 30, 2024
1 parent b808bc0 commit c39ccae
Show file tree
Hide file tree
Showing 11 changed files with 210 additions and 374 deletions.
15 changes: 15 additions & 0 deletions crates/polars-core/src/chunked_array/object/mod.rs
Expand Up @@ -33,6 +33,14 @@ pub trait PolarsObjectSafe: Any + Debug + Send + Sync + Display {
fn as_any(&self) -> &dyn Any;

fn to_boxed(&self) -> Box<dyn PolarsObjectSafe>;

fn equal(&self, other: &dyn PolarsObjectSafe) -> bool;
}

impl PartialEq for &dyn PolarsObjectSafe {
fn eq(&self, other: &Self) -> bool {
self.equal(*other)
}
}

/// Values need to implement this so that they can be stored into a Series and DataFrame
Expand All @@ -55,6 +63,13 @@ impl<T: PolarsObject> PolarsObjectSafe for T {
fn to_boxed(&self) -> Box<dyn PolarsObjectSafe> {
Box::new(self.clone())
}

fn equal(&self, other: &dyn PolarsObjectSafe) -> bool {
let Some(other) = other.as_any().downcast_ref::<T>() else {
return false;
};
self == other
}
}

pub type ObjectValueIter<'a, T> = std::slice::Iter<'a, T>;
Expand Down
Expand Up @@ -3,7 +3,6 @@ use polars_row::{convert_columns, RowsEncoded, SortField};
use polars_utils::iter::EnumerateIdxTrait;

use super::*;
#[cfg(feature = "dtype-struct")]
use crate::utils::_split_offsets;

pub(crate) fn args_validate<T: PolarsDataType>(
Expand Down Expand Up @@ -88,8 +87,7 @@ pub fn _get_rows_encoded_compat_array(by: &Series) -> PolarsResult<ArrayRef> {
Ok(out)
}

#[cfg(feature = "dtype-struct")]
pub(crate) fn encode_rows_vertical(by: &[Series]) -> PolarsResult<BinaryOffsetChunked> {
pub(crate) fn encode_rows_vertical_par_default(by: &[Series]) -> PolarsResult<BinaryOffsetChunked> {
let n_threads = POOL.current_num_threads();
let len = by[0].len();
let splits = _split_offsets(len, n_threads);
Expand All @@ -108,6 +106,12 @@ pub(crate) fn encode_rows_vertical(by: &[Series]) -> PolarsResult<BinaryOffsetCh
Ok(BinaryOffsetChunked::from_chunk_iter("", chunks?))
}

pub(crate) fn encode_rows_default(by: &[Series]) -> PolarsResult<BinaryOffsetChunked> {
let descending = vec![false; by.len()];
let rows = _get_rows_encoded(by, &descending, false)?;
Ok(BinaryOffsetChunked::with_chunk("", rows.into_array()))
}

pub fn _get_rows_encoded(
by: &[Series],
descending: &[bool],
Expand Down
4 changes: 1 addition & 3 deletions crates/polars-core/src/chunked_array/ops/sort/mod.rs
Expand Up @@ -15,9 +15,7 @@ use rayon::prelude::*;
pub use slice::*;

use crate::prelude::compare_inner::TotalOrdInner;
#[cfg(feature = "dtype-struct")]
use crate::prelude::sort::arg_sort_multiple::_get_rows_encoded_ca;
use crate::prelude::sort::arg_sort_multiple::{arg_sort_multiple_impl, args_validate};
use crate::prelude::sort::arg_sort_multiple::*;
use crate::prelude::*;
use crate::series::IsSorted;
use crate::utils::NoNull;
Expand Down
2 changes: 2 additions & 0 deletions crates/polars-core/src/datatypes/any_value.rs
Expand Up @@ -946,6 +946,8 @@ impl AnyValue<'_> {
// 1.2 at scale 1, and 1.20 at scale 2, are not equal.
*v_l == *v_r && *scale_l == *scale_r
},
#[cfg(feature = "object")]
(Object(l), Object(r)) => l == r,
_ => false,
}
}
Expand Down
15 changes: 15 additions & 0 deletions crates/polars-core/src/datatypes/dtype.rs
Expand Up @@ -241,6 +241,21 @@ impl DataType {
matches!(self, DataType::Binary)
}

pub fn is_object(&self) -> bool {
#[cfg(feature = "object")]
{
matches!(self, DataType::Object(_, _))
}
#[cfg(not(feature = "object"))]
{
false
}
}

pub fn is_null(&self) -> bool {
matches!(self, DataType::Null)
}

pub fn contains_views(&self) -> bool {
use DataType::*;
match self {
Expand Down

0 comments on commit c39ccae

Please sign in to comment.