Skip to content

Commit

Permalink
perf[rust]: cast to physical before iter in hash list (#4735)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Sep 5, 2022
1 parent 154e1b8 commit c8cfc99
Show file tree
Hide file tree
Showing 5 changed files with 25 additions and 22 deletions.
2 changes: 1 addition & 1 deletion polars/polars-core/src/chunked_array/cast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ impl ChunkCast for ListChunked {
} else {
self.clone()
};
ca.with_inner_type(*child_type.clone());
ca.set_inner_dtype(*child_type.clone());
Ok(ca.into_series())
}
_ => Err(PolarsError::ComputeError("Cannot cast list type".into())),
Expand Down
4 changes: 2 additions & 2 deletions polars/polars-core/src/chunked_array/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -610,8 +610,8 @@ impl ListChunked {
}
}

pub(crate) fn with_inner_type(&mut self, dtype: DataType) {
debug_assert_eq!(dtype.to_physical(), self.inner_dtype().to_physical());
pub fn set_inner_dtype(&mut self, dtype: DataType) {
assert_eq!(dtype.to_physical(), self.inner_dtype().to_physical());
let field = Arc::make_mut(&mut self.field);
field.coerce(DataType::List(Box::new(dtype)));
}
Expand Down
2 changes: 1 addition & 1 deletion polars/polars-core/src/series/ops/to_list.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ fn reshape_fast_path(name: &str, s: &Series) -> Series {
};

let mut ca = ListChunked::from_chunks(name, chunks);
ca.with_inner_type(s.dtype().clone());
ca.set_inner_dtype(s.dtype().clone());
ca.set_fast_explode();
ca.into_series()
}
Expand Down
35 changes: 19 additions & 16 deletions polars/polars-ops/src/chunked_array/list/hash.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ use polars_core::export::_boost_hash_combine;
use polars_core::export::ahash::{self, CallHasher};
use polars_core::export::rayon::prelude::*;
use polars_core::utils::NoNull;
use polars_core::POOL;

use super::*;

Expand Down Expand Up @@ -41,7 +42,7 @@ where
hash_agg
}

pub(crate) fn hash(ca: &ListChunked, build_hasher: ahash::RandomState) -> UInt64Chunked {
pub(crate) fn hash(ca: &mut ListChunked, build_hasher: ahash::RandomState) -> UInt64Chunked {
if !ca.inner_dtype().to_physical().is_numeric() {
panic!(
"Hashing a list with a non-numeric inner type not supported. Got dtype: {:?}",
Expand All @@ -52,22 +53,24 @@ pub(crate) fn hash(ca: &ListChunked, build_hasher: ahash::RandomState) -> UInt64
// just some large prime
let null_hash = 1969099309u64;

let out: NoNull<UInt64Chunked> = ca
.par_iter()
.map(|opt_s: Option<Series>| match opt_s {
None => null_hash,
Some(s) => {
let s = s.to_physical_repr();
if s.bit_repr_is_large() {
let ca = s.bit_repr_large();
hash_agg(&ca, &build_hasher)
} else {
let ca = s.bit_repr_small();
hash_agg(&ca, &build_hasher)
ca.set_inner_dtype(ca.inner_dtype().to_physical());

let out: NoNull<UInt64Chunked> = POOL.install(|| {
ca.par_iter()
.map(|opt_s: Option<Series>| match opt_s {
None => null_hash,
Some(s) => {
if s.bit_repr_is_large() {
let ca = s.bit_repr_large();
hash_agg(&ca, &build_hasher)
} else {
let ca = s.bit_repr_small();
hash_agg(&ca, &build_hasher)
}
}
}
})
.collect();
})
.collect()
});

let mut out = out.into_inner();
out.rename(ca.name());
Expand Down
4 changes: 2 additions & 2 deletions polars/polars-ops/src/series/ops/various.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@ pub trait SeriesMethods: SeriesSealed {
let s = self.as_series().to_physical_repr();
match s.dtype() {
DataType::List(_) => {
let ca = s.list().unwrap();
crate::chunked_array::hash::hash(ca, build_hasher)
let mut ca = s.list().unwrap().clone();
crate::chunked_array::hash::hash(&mut ca, build_hasher)
}
_ => UInt64Chunked::from_vec(s.name(), s.0.vec_hash(build_hasher)),
}
Expand Down

0 comments on commit c8cfc99

Please sign in to comment.