Skip to content

Commit

Permalink
Improving array refs for to_list (#3231)
Browse files Browse the repository at this point in the history
  • Loading branch information
cjermain committed Apr 27, 2022
1 parent 6c6d7ad commit 0230a2f
Show file tree
Hide file tree
Showing 5 changed files with 88 additions and 82 deletions.
2 changes: 1 addition & 1 deletion polars/polars-core/src/chunked_array/list/iterator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ impl ListChunked {
))
};

let ptr = &series_container.chunks()[0] as *const ArrayRef as *mut ArrayRef;
let ptr = series_container.array_ref(0) as *const ArrayRef as *mut ArrayRef;

AmortizedListIter {
len: self.len(),
Expand Down
14 changes: 1 addition & 13 deletions polars/polars-core/src/chunked_array/logical/struct_/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,23 +18,11 @@ pub struct StructChunked {
arrow_array: ArrayRef,
}

/// Returns an ['ArrayRef'](arrow::array::ArrayRef) for a given
/// [`Series`], handling nested Struct-type series separately.
fn array_ref_for_series(series: &Series) -> ArrayRef {
match series.dtype() {
DataType::Struct(_) => {
let s = series.struct_().unwrap();
s.arrow_array.clone()
}
_ => series.to_arrow(0),
}
}

fn fields_to_struct_array(fields: &[Series]) -> (ArrayRef, Vec<Series>) {
let fields = fields.iter().map(|s| s.rechunk()).collect::<Vec<_>>();

let new_fields = fields.iter().map(|s| s.field().to_arrow()).collect();
let field_arrays = fields.iter().map(array_ref_for_series).collect::<Vec<_>>();
let field_arrays = fields.iter().map(|s| s.to_arrow(0)).collect::<Vec<_>>();
let arr = StructArray::new(ArrowDataType::Struct(new_fields), field_arrays, None);
(Arc::new(arr), fields)
}
Expand Down
19 changes: 14 additions & 5 deletions polars/polars-core/src/series/into.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,18 @@ use crate::prelude::*;
use polars_arrow::compute::cast::cast;

impl Series {
/// Returns a reference to the Arrow ArrayRef
pub fn array_ref(&self, chunk_idx: usize) -> &ArrayRef {
match self.dtype() {
#[cfg(feature = "dtype-struct")]
DataType::Struct(_) => {
let ca = self.struct_().unwrap();
ca.arrow_array()
}
_ => &self.chunks()[chunk_idx] as &ArrayRef,
}
}

/// Convert a chunk in the Series to the correct Arrow type.
/// This conversion is needed because polars doesn't use a
/// 1 on 1 mapping for logical/ categoricals, etc.
Expand Down Expand Up @@ -45,11 +57,8 @@ impl Series {
Arc::from(arr)
}
#[cfg(feature = "dtype-struct")]
DataType::Struct(_) => {
let ca = self.struct_().unwrap();
ca.arrow_array().clone()
}
_ => self.chunks()[chunk_idx].clone(),
DataType::Struct(_) => self.array_ref(chunk_idx).clone(),
_ => self.array_ref(chunk_idx).clone(),
}
}
}
126 changes: 63 additions & 63 deletions py-polars/src/series.rs
Original file line number Diff line number Diff line change
Expand Up @@ -673,74 +673,74 @@ impl PySeries {

let series = &self.series;

let primitive_to_list = |dt: &DataType, series: &Series| match dt {
DataType::Boolean => PyList::new(python, series.bool().unwrap()),
DataType::Utf8 => PyList::new(python, series.utf8().unwrap()),
DataType::UInt8 => PyList::new(python, series.u8().unwrap()),
DataType::UInt16 => PyList::new(python, series.u16().unwrap()),
DataType::UInt32 => PyList::new(python, series.u32().unwrap()),
DataType::UInt64 => PyList::new(python, series.u64().unwrap()),
DataType::Int8 => PyList::new(python, series.i8().unwrap()),
DataType::Int16 => PyList::new(python, series.i16().unwrap()),
DataType::Int32 => PyList::new(python, series.i32().unwrap()),
DataType::Int64 => PyList::new(python, series.i64().unwrap()),
DataType::Float32 => PyList::new(python, series.f32().unwrap()),
DataType::Float64 => PyList::new(python, series.f64().unwrap()),
dt => panic!("to_list() not implemented for {:?}", dt),
};

let pylist = match series.dtype() {
DataType::Categorical(_) => {
PyList::new(python, series.categorical().unwrap().iter_str())
}
DataType::Object(_) => {
let v = PyList::empty(python);
for i in 0..series.len() {
let obj: Option<&ObjectValue> = self.series.get_object(i).map(|any| any.into());
let val = obj.to_object(python);
fn to_list_recursive(python: Python, series: &Series) -> PyObject {
let pylist = match series.dtype() {
DataType::Boolean => PyList::new(python, series.bool().unwrap()),
DataType::UInt8 => PyList::new(python, series.u8().unwrap()),
DataType::UInt16 => PyList::new(python, series.u16().unwrap()),
DataType::UInt32 => PyList::new(python, series.u32().unwrap()),
DataType::UInt64 => PyList::new(python, series.u64().unwrap()),
DataType::Int8 => PyList::new(python, series.i8().unwrap()),
DataType::Int16 => PyList::new(python, series.i16().unwrap()),
DataType::Int32 => PyList::new(python, series.i32().unwrap()),
DataType::Int64 => PyList::new(python, series.i64().unwrap()),
DataType::Float32 => PyList::new(python, series.f32().unwrap()),
DataType::Float64 => PyList::new(python, series.f64().unwrap()),
DataType::Categorical(_) => {
PyList::new(python, series.categorical().unwrap().iter_str())
}
DataType::Object(_) => {
let v = PyList::empty(python);
for i in 0..series.len() {
let obj: Option<&ObjectValue> = series.get_object(i).map(|any| any.into());
let val = obj.to_object(python);

v.append(val).unwrap();
v.append(val).unwrap();
}
v
}
v
}
DataType::List(inner_dtype) => {
let v = PyList::empty(python);
let ca = series.list().unwrap();
for opt_s in ca.amortized_iter() {
match opt_s {
None => {
v.append(python.None()).unwrap();
}
Some(s) => {
let pylst = primitive_to_list(inner_dtype, s.as_ref());
v.append(pylst).unwrap();
DataType::List(_) => {
let v = PyList::empty(python);
let ca = series.list().unwrap();
for opt_s in ca.amortized_iter() {
match opt_s {
None => {
v.append(python.None()).unwrap();
}
Some(s) => {
let pylst = to_list_recursive(python, s.as_ref());
v.append(pylst).unwrap();
}
}
}
v
}
v
}
DataType::Date => {
let ca = series.date().unwrap();
return Wrap(ca).to_object(python);
}
DataType::Datetime(_, _) => {
let ca = series.datetime().unwrap();
return Wrap(ca).to_object(python);
}
DataType::Utf8 => {
let ca = series.utf8().unwrap();
return Wrap(ca).to_object(python);
}
DataType::Struct(_) => {
let ca = series.struct_().unwrap();
return Wrap(ca).to_object(python);
}
DataType::Duration(_) => {
let ca = series.duration().unwrap();
return Wrap(ca).to_object(python);
}
dt => primitive_to_list(dt, series),
};
DataType::Date => {
let ca = series.date().unwrap();
return Wrap(ca).to_object(python);
}
DataType::Datetime(_, _) => {
let ca = series.datetime().unwrap();
return Wrap(ca).to_object(python);
}
DataType::Utf8 => {
let ca = series.utf8().unwrap();
return Wrap(ca).to_object(python);
}
DataType::Struct(_) => {
let ca = series.struct_().unwrap();
return Wrap(ca).to_object(python);
}
DataType::Duration(_) => {
let ca = series.duration().unwrap();
return Wrap(ca).to_object(python);
}
dt => panic!("to_list() not implemented for {:?}", dt),
};
pylist.to_object(python)
}

let pylist = to_list_recursive(python, series);
pylist.to_object(python)
}

Expand Down
9 changes: 9 additions & 0 deletions py-polars/tests/test_df.py
Original file line number Diff line number Diff line change
Expand Up @@ -2081,3 +2081,12 @@ def test_partition_by() -> None:
{"foo": ["B", "B"], "N": [2, 4], "bar": ["m", "m"]},
{"foo": ["C"], "N": [2], "bar": ["l"]},
]


@typing.no_type_check
def test_list_of_list_of_struct() -> None:
expected = [{"list_of_list_of_struct": [[{"a": 1}, {"a": 2}]]}]
pa_df = pa.Table.from_pylist(expected)
df = pl.from_arrow(pa_df)
assert df.rows() == [([[{"a": 1}, {"a": 2}]],)]
assert df.to_dicts() == expected

0 comments on commit 0230a2f

Please sign in to comment.