Skip to content

Commit

Permalink
Reduce compiler bloat (#2866)
Browse files Browse the repository at this point in the history
* remove some generic code causing bloat and minor fixes to hashing collect

* remove hash_join_inner from series trait

* remove left join from series trait

* remove outer join from series trait
  • Loading branch information
ritchie46 committed Mar 10, 2022
1 parent 36f2c62 commit 161c233
Show file tree
Hide file tree
Showing 14 changed files with 838 additions and 1,005 deletions.
93 changes: 12 additions & 81 deletions polars/polars-core/src/chunked_array/upstream_traits.rs
Original file line number Diff line number Diff line change
Expand Up @@ -215,90 +215,21 @@ where
let s: &Series = owned_s.borrow();
let capacity = get_iter_capacity(&it);
let estimated_s_size = std::cmp::min(s.len(), 1 << 18);
// use specialized builder for most common types
match s.dtype() {
DataType::UInt32 => primitive_series_collect(
nulls_so_far,
it,
s,
&mut ListPrimitiveChunkedBuilder::<u32>::new(
"collected",
capacity,
capacity * estimated_s_size,
s.dtype().clone(),
),
),
DataType::Int32 => primitive_series_collect(
nulls_so_far,
it,
s,
&mut ListPrimitiveChunkedBuilder::<i32>::new(
"collected",
capacity,
capacity * estimated_s_size,
s.dtype().clone(),
),
),
DataType::UInt64 => primitive_series_collect(
nulls_so_far,
it,
s,
&mut ListPrimitiveChunkedBuilder::<u64>::new(
"collected",
capacity,
capacity * estimated_s_size,
s.dtype().clone(),
),
),
DataType::Int64 => primitive_series_collect(
nulls_so_far,
it,
s,
&mut ListPrimitiveChunkedBuilder::<i64>::new(
"collected",
capacity,
capacity * estimated_s_size,
s.dtype().clone(),
),
),
DataType::Float32 => primitive_series_collect(
nulls_so_far,
it,
s,
&mut ListPrimitiveChunkedBuilder::<f32>::new(
"collected",
capacity,
capacity * estimated_s_size,
s.dtype().clone(),
),
),
DataType::Float64 => primitive_series_collect(
nulls_so_far,
it,
s,
&mut ListPrimitiveChunkedBuilder::<f64>::new(
"collected",
capacity,
capacity * estimated_s_size,
s.dtype().clone(),
),
),
// we first used specialized builders for most common types, but this explodes bloat
// because every iterator collect is a unique instantiation of this function
let mut builder = match s.dtype() {
#[cfg(feature = "object")]
DataType::Object(_) => {
let mut builder =
s.get_list_builder("collected", capacity * estimated_s_size, capacity);
primitive_series_collect(nulls_so_far, it, s, &mut builder)
}
_ => {
let mut builder = get_list_builder(
s.dtype(),
capacity * estimated_s_size,
capacity,
"collected",
);
primitive_series_collect(nulls_so_far, it, s, &mut builder)
s.get_list_builder("collected", capacity * estimated_s_size, capacity)
}
}
_ => get_list_builder(
s.dtype(),
capacity * estimated_s_size,
capacity,
"collected",
),
};
primitive_series_collect(nulls_so_far, it, s, &mut builder)
}
}

Expand Down
20 changes: 16 additions & 4 deletions polars/polars-core/src/frame/groupby/hashing.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,27 @@ use hashbrown::{hash_map::RawEntryMut, HashMap};
use rayon::prelude::*;
use std::hash::{BuildHasher, Hash};

fn finish_group_order(out: Vec<Vec<IdxItem>>, sorted: bool) -> GroupsProxy {
fn finish_group_order(mut out: Vec<Vec<IdxItem>>, sorted: bool) -> GroupsProxy {
if sorted {
let mut out = out.into_iter().flatten().collect::<Vec<_>>();
// we can just take the first value, no need to flatten
let mut out = if out.len() == 1 {
out.pop().unwrap()
} else {
// flattens
out.into_iter().flatten().collect::<Vec<_>>()
};
out.sort_unstable_by_key(|g| g.0);
let mut idx = GroupsIdx::from_iter(out.into_iter());
idx.sorted = true;
GroupsProxy::Idx(idx)
} else {
GroupsProxy::Idx(GroupsIdx::from(out))
// we can just take the first value, no need to flatten
if out.len() == 1 {
GroupsProxy::Idx(GroupsIdx::from(out.pop().unwrap()))
} else {
// flattens
GroupsProxy::Idx(GroupsIdx::from(out))
}
}
}

Expand Down Expand Up @@ -61,7 +73,7 @@ where
idx.sorted = true;
GroupsProxy::Idx(idx)
} else {
GroupsProxy::Idx(hash_tbl.into_iter().map(|(_k, v)| v).collect())
GroupsProxy::Idx(hash_tbl.into_values().collect())
}
}

Expand Down

0 comments on commit 161c233

Please sign in to comment.