Skip to content

Commit

Permalink
struct add chunk and impl reverse (#3445)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed May 20, 2022
1 parent 24cc2ef commit ed25e0c
Show file tree
Hide file tree
Showing 4 changed files with 67 additions and 10 deletions.
45 changes: 38 additions & 7 deletions polars/polars-core/src/chunked_array/logical/struct_/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,7 @@ use crate::datatypes::*;
pub struct StructChunked {
fields: Vec<Series>,
field: Field,
// needed by iterators
arrow_array: ArrayRef,
chunks: Vec<ArrayRef>,
}

fn fields_to_struct_array(fields: &[Series]) -> (ArrayRef, Vec<Series>) {
Expand All @@ -39,13 +38,45 @@ impl StructChunked {
}

pub(crate) fn arrow_array(&self) -> &ArrayRef {
&self.arrow_array
&self.chunks[0]
}

pub(crate) fn chunks(&self) -> &Vec<ArrayRef> {
&self.chunks
}

pub fn rechunk(&mut self) {
let (arrow_array, fields) = fields_to_struct_array(&self.fields);
self.arrow_array = arrow_array;
self.fields = fields;
self.fields = self.fields.iter().map(|s| s.rechunk()).collect();
self.update_chunks(0);
}

// Should be called after append or extend
pub(crate) fn update_chunks(&mut self, offset: usize) {
let new_fields = self
.fields
.iter()
.map(|s| s.field().to_arrow())
.collect::<Vec<_>>();
let n_chunks = self.fields[0].chunks().len();
for i in offset..n_chunks {
let field_arrays = self
.fields
.iter()
.map(|s| s.to_arrow(i))
.collect::<Vec<_>>();
let arr = Arc::new(StructArray::new(
ArrowDataType::Struct(new_fields.clone()),
field_arrays,
None,
)) as ArrayRef;
match self.chunks.get_mut(i) {
Some(a) => *a = arr,
None => {
self.chunks.push(arr);
}
}
}
self.chunks.truncate(n_chunks);
}

/// Does not check the lengths of the fields
Expand All @@ -62,7 +93,7 @@ impl StructChunked {
Self {
fields,
field,
arrow_array,
chunks: vec![arrow_array],
}
}

Expand Down
12 changes: 12 additions & 0 deletions polars/polars-core/src/series/implementations/struct_.rs
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,11 @@ impl SeriesTrait for SeriesWrap<StructChunked> {
s.chunk_lengths()
}

/// Underlying chunks.
fn chunks(&self) -> &Vec<ArrayRef> {
self.0.chunks()
}

/// Number of chunks in this Series
fn n_chunks(&self) -> usize {
let s = self.0.fields().first().unwrap();
Expand All @@ -105,10 +110,12 @@ impl SeriesTrait for SeriesWrap<StructChunked> {
#[doc(hidden)]
fn append(&mut self, other: &Series) -> Result<()> {
let other = other.struct_()?;
let offset = self.chunks().len();

for (lhs, rhs) in self.0.fields_mut().iter_mut().zip(other.fields()) {
lhs.append(rhs)?;
}
self.0.update_chunks(offset);
Ok(())
}

Expand All @@ -119,6 +126,7 @@ impl SeriesTrait for SeriesWrap<StructChunked> {
for (lhs, rhs) in self.0.fields_mut().iter_mut().zip(other.fields()) {
lhs.extend(rhs)?;
}
self.0.update_chunks(0);
Ok(())
}

Expand Down Expand Up @@ -296,6 +304,10 @@ impl SeriesTrait for SeriesWrap<StructChunked> {
is_not_null.reduce(|lhs, rhs| lhs.bitand(rhs)).unwrap()
}

fn reverse(&self) -> Series {
self.0.apply_fields(|s| s.reverse()).into_series()
}

fn shift(&self, periods: i64) -> Series {
self.0.apply_fields(|s| s.shift(periods)).into_series()
}
Expand Down
4 changes: 1 addition & 3 deletions polars/polars-core/src/series/series_trait.rs
Original file line number Diff line number Diff line change
Expand Up @@ -293,9 +293,7 @@ pub trait SeriesTrait:
}

/// Underlying chunks.
fn chunks(&self) -> &Vec<ArrayRef> {
invalid_operation_panic!(self)
}
fn chunks(&self) -> &Vec<ArrayRef>;

/// Number of chunks in this Series
fn n_chunks(&self) -> usize {
Expand Down
16 changes: 16 additions & 0 deletions py-polars/tests/test_struct.py
Original file line number Diff line number Diff line change
Expand Up @@ -381,3 +381,19 @@ def test_struct_concat_list() -> None:
[{"a": 1, "b": 2}, {"a": 3, "b": 4}, {"a": 6, "b": 7}, {"a": 8, "b": 9}],
[{"a": 1, "b": 2}, {"a": 6, "b": 7}],
]


def test_struct_arr_reverse() -> None:
assert pl.DataFrame(
{
"list_struct": [
[{"a": 1, "b": 2}, {"a": 3, "b": 4}, {"a": 5, "b": 6}],
[{"a": 30, "b": 40}, {"a": 10, "b": 20}, {"a": 50, "b": 60}],
],
}
).with_columns([pl.col("list_struct").arr.reverse()]).to_dict(False) == {
"list_struct": [
[{"a": 5, "b": 6}, {"a": 3, "b": 4}, {"a": 1, "b": 2}],
[{"a": 50, "b": 60}, {"a": 10, "b": 20}, {"a": 30, "b": 40}],
]
}

0 comments on commit ed25e0c

Please sign in to comment.