Skip to content

Commit

Permalink
Groups slice (#2431)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Jan 21, 2022
1 parent b338ee8 commit 3609a78
Show file tree
Hide file tree
Showing 60 changed files with 1,690 additions and 1,158 deletions.
3 changes: 2 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,6 @@ members = [
"polars/polars-utils",
]

# [patch.crates-io]
[patch.crates-io]
packed_simd_2 = { git = "https://github.com/hkratz/packed_simd", branch = "remove_llvm_asm" }
# packed_simd_2 = { git = "https://github.com/rust-lang/packed_simd", rev = "e57c7ba11386147e6d2cbad7c88f376aab4bdc86" }
28 changes: 19 additions & 9 deletions polars/polars-core/src/chunked_array/builder/list.rs
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,13 @@ impl ListUtf8ChunkedBuilder {
unsafe { values.extend_trusted_len_unchecked(iter) };
self.builder.try_push_valid().unwrap();
}

#[inline]
pub(crate) fn append(&mut self, ca: &Utf8Chunked) {
let value_builder = self.builder.mut_values();
value_builder.try_extend(ca).unwrap();
self.builder.try_push_valid().unwrap();
}
}

impl ListBuilderTrait for ListUtf8ChunkedBuilder {
Expand All @@ -233,9 +240,7 @@ impl ListBuilderTrait for ListUtf8ChunkedBuilder {
self.fast_explode = false;
}
let ca = s.utf8().unwrap();
let value_builder = self.builder.mut_values();
value_builder.try_extend(ca).unwrap();
self.builder.try_push_valid().unwrap();
self.append(ca)
}

fn finish(&mut self) -> ListChunked {
Expand Down Expand Up @@ -274,6 +279,16 @@ impl ListBooleanChunkedBuilder {
unsafe { values.extend_trusted_len_unchecked(iter) };
self.builder.try_push_valid().unwrap();
}

#[inline]
pub(crate) fn append(&mut self, ca: &BooleanChunked) {
if ca.is_empty() {
self.fast_explode = false;
}
let value_builder = self.builder.mut_values();
value_builder.extend(ca);
self.builder.try_push_valid().unwrap();
}
}

impl ListBuilderTrait for ListBooleanChunkedBuilder {
Expand All @@ -295,12 +310,7 @@ impl ListBuilderTrait for ListBooleanChunkedBuilder {
#[inline]
fn append_series(&mut self, s: &Series) {
let ca = s.bool().unwrap();
if ca.is_empty() {
self.fast_explode = false;
}
let value_builder = self.builder.mut_values();
value_builder.extend(ca);
self.builder.try_push_valid().unwrap();
self.append(ca)
}

fn finish(&mut self) -> ListChunked {
Expand Down
2 changes: 1 addition & 1 deletion polars/polars-core/src/chunked_array/cast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ impl ChunkCast for Utf8Chunked {
DataType::Categorical => {
let iter = self.into_iter();
let mut builder = CategoricalChunkedBuilder::new(self.name(), self.len());
builder.from_iter(iter);
builder.drain_iter(iter);
let ca = builder.finish();
Ok(ca.into_series())
}
Expand Down
6 changes: 3 additions & 3 deletions polars/polars-core/src/chunked_array/categorical/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ impl CategoricalChunkedBuilder {
}
impl CategoricalChunkedBuilder {
/// Appends all the values in a single lock of the global string cache.
pub fn from_iter<'a, I>(&mut self, i: I)
pub fn drain_iter<'a, I>(&mut self, i: I)
where
I: IntoIterator<Item = Option<&'a str>>,
{
Expand Down Expand Up @@ -260,8 +260,8 @@ mod test {
// does not interfere with the index mapping
let mut builder1 = CategoricalChunkedBuilder::new("foo", 10);
let mut builder2 = CategoricalChunkedBuilder::new("foo", 10);
builder1.from_iter(vec![None, Some("hello"), Some("vietnam")]);
builder2.from_iter(vec![Some("hello"), None, Some("world")].into_iter());
builder1.drain_iter(vec![None, Some("hello"), Some("vietnam")]);
builder2.drain_iter(vec![Some("hello"), None, Some("world")].into_iter());

let s = builder1.finish().into_series();
assert_eq!(s.str_value(0), "null");
Expand Down
4 changes: 2 additions & 2 deletions polars/polars-core/src/chunked_array/object/extension/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@ mod test {
let values = &[Some(foo1), None, Some(foo2), None];
let ca = ObjectChunked::new("", values);

let groups = vec![(0u32, vec![0u32, 1]), (2, vec![2]), (3, vec![3])];
let groups = GroupsProxy::Idx(vec![(0u32, vec![0u32, 1]), (2, vec![2]), (3, vec![3])]);
let out = ca.agg_list(&groups).unwrap();
assert!(matches!(out.dtype(), DataType::List(_)));
assert_eq!(out.len(), groups.len());
Expand All @@ -214,7 +214,7 @@ mod test {
let ca = ObjectChunked::new("", values);

let groups = vec![(0u32, vec![0u32, 1]), (2, vec![2]), (3, vec![3])];
let out = ca.agg_list(&groups).unwrap();
let out = ca.agg_list(&GroupsProxy::Idx(groups)).unwrap();
let a = out.explode().unwrap();

let ca_foo = a.as_any().downcast_ref::<ObjectChunked<Foo>>().unwrap();
Expand Down
2 changes: 1 addition & 1 deletion polars/polars-core/src/chunked_array/ops/full.rs
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ impl ChunkFullNull for CategoricalChunked {
use crate::chunked_array::categorical::CategoricalChunkedBuilder;
let mut builder = CategoricalChunkedBuilder::new(name, length);
let iter = (0..length).map(|_| None);
builder.from_iter(iter);
builder.drain_iter(iter);
builder.finish()
}
}
Expand Down
16 changes: 9 additions & 7 deletions polars/polars-core/src/chunked_array/ops/unique/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use crate::chunked_array::categorical::RevMapping;
#[cfg(feature = "object")]
use crate::chunked_array::object::ObjectType;
use crate::datatypes::PlHashSet;
use crate::frame::groupby::{GroupTuples, IntoGroupTuples};
use crate::frame::groupby::{GroupsProxy, IntoGroupsProxy};
use crate::prelude::*;
use crate::utils::NoNull;
use rayon::prelude::*;
Expand Down Expand Up @@ -50,13 +50,14 @@ pub(crate) fn is_unique_helper2(
}

pub(crate) fn is_unique_helper(
groups: GroupTuples,
groups: GroupsProxy,
len: u32,
unique_val: bool,
duplicated_val: bool,
) -> BooleanChunked {
debug_assert_ne!(unique_val, duplicated_val);
let idx = groups
.into_idx()
.into_iter()
.filter_map(|(first, g)| if g.len() == 1 { Some(first) } else { None })
.collect::<Vec<_>>();
Expand Down Expand Up @@ -128,12 +129,12 @@ where
#[allow(clippy::needless_collect)]
fn mode<T>(ca: &ChunkedArray<T>) -> ChunkedArray<T>
where
ChunkedArray<T>: IntoGroupTuples + ChunkTake,
ChunkedArray<T>: IntoGroupsProxy + ChunkTake,
{
if ca.is_empty() {
return ca.clone();
}
let mut groups = ca.group_tuples(true);
let mut groups = ca.group_tuples(true).into_idx();
groups.sort_unstable_by_key(|k| k.1.len());
let first = &groups[0];

Expand Down Expand Up @@ -168,7 +169,7 @@ macro_rules! arg_unique_ca {

macro_rules! impl_value_counts {
($self:expr) => {{
let group_tuples = $self.group_tuples(true);
let group_tuples = $self.group_tuples(true).into_idx();
let values =
unsafe { $self.take_unchecked(group_tuples.iter().map(|t| t.0 as usize).into()) };
let mut counts: NoNull<UInt32Chunked> = group_tuples
Expand Down Expand Up @@ -208,6 +209,7 @@ where
is_unique_duplicated!(self, true)
}

// TODO! implement on series. Not worth the compile times here.
fn value_counts(&self) -> Result<DataFrame> {
impl_value_counts!(self)
}
Expand Down Expand Up @@ -350,7 +352,7 @@ fn sort_columns(mut columns: Vec<Series>) -> Vec<Series> {

impl ToDummies<Utf8Type> for Utf8Chunked {
fn to_dummies(&self) -> Result<DataFrame> {
let groups = self.group_tuples(true);
let groups = self.group_tuples(true).into_idx();
let col_name = self.name();
let taker = self.take_rand();

Expand All @@ -376,7 +378,7 @@ where
ChunkedArray<T>: ChunkOps + ChunkCompare<T::Native> + ChunkUnique<T>,
{
fn to_dummies(&self) -> Result<DataFrame> {
let groups = self.group_tuples(true);
let groups = self.group_tuples(true).into_idx();
let col_name = self.name();
let taker = self.take_rand();

Expand Down

0 comments on commit 3609a78

Please sign in to comment.