Skip to content

Commit

Permalink
refactor(rust): shrink anyvalue size (#5770)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Dec 10, 2022
1 parent 24e696a commit e172db7
Show file tree
Hide file tree
Showing 9 changed files with 80 additions and 45 deletions.
8 changes: 7 additions & 1 deletion polars/polars-core/src/chunked_array/logical/struct_/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,13 @@ impl LogicalType for StructChunked {
/// Gets AnyValue from LogicalType
fn get_any_value(&self, i: usize) -> AnyValue<'_> {
if let DataType::Struct(flds) = self.dtype() {
AnyValue::Struct(self.fields.iter().map(|s| s.get(i)).collect(), flds)
// safety: we already have a single chunk and we are
// guarded by the type system.
unsafe {
let arr = &**self.chunks.get_unchecked(0);
let arr = &*(arr as *const dyn Array as *const StructArray);
AnyValue::Struct(i, arr, flds)
}
} else {
unreachable!()
}
Expand Down
31 changes: 24 additions & 7 deletions polars/polars-core/src/chunked_array/ops/any_value.rs
Original file line number Diff line number Diff line change
Expand Up @@ -73,13 +73,7 @@ pub(crate) unsafe fn arr_to_any_value<'a>(
#[cfg(feature = "dtype-struct")]
DataType::Struct(flds) => {
let arr = &*(arr as *const dyn Array as *const StructArray);
let vals = arr
.values()
.iter()
.zip(flds)
.map(|(arr, fld)| arr_to_any_value(&**arr, idx, fld.data_type()))
.collect();
AnyValue::Struct(vals, flds)
AnyValue::Struct(idx, arr, flds)
}
#[cfg(feature = "dtype-datetime")]
DataType::Datetime(tu, tz) => {
Expand Down Expand Up @@ -111,6 +105,29 @@ pub(crate) unsafe fn arr_to_any_value<'a>(
}
}

#[cfg(feature = "dtype-struct")]
impl<'a> AnyValue<'a> {
pub fn _iter_struct_av(&self) -> impl Iterator<Item = AnyValue> {
match self {
AnyValue::Struct(idx, arr, flds) => {
let idx = *idx;
unsafe {
arr.values()
.iter()
.zip(*flds)
.map(move |(arr, fld)| arr_to_any_value(&**arr, idx, fld.data_type()))
}
}
_ => unreachable!(),
}
}

pub fn _materialize_struct_av(&'a self, buf: &mut Vec<AnyValue<'a>>) {
let iter = self._iter_struct_av();
buf.extend(iter)
}
}

macro_rules! get_any_value_unchecked {
($self:ident, $index:expr) => {{
let (chunk_idx, idx) = $self.index_to_chunked_index($index);
Expand Down
5 changes: 3 additions & 2 deletions polars/polars-core/src/chunked_array/ops/is_in.rs
Original file line number Diff line number Diff line change
Expand Up @@ -359,8 +359,9 @@ impl IsIn for StructChunked {
let mut ca: BooleanChunked = if self.len() == 1 && other.len() != 1 {
let mut value = vec![];
let left = self.clone().into_series();
if let AnyValue::Struct(val, _) = left.get(0) {
value = val
let av = left.get(0);
if let AnyValue::Struct(_, _, _) = av {
av._materialize_struct_av(&mut value);
}
other
.list()?
Expand Down
47 changes: 25 additions & 22 deletions polars/polars-core/src/datatypes/any_value.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,10 +52,11 @@ pub enum AnyValue<'a> {
/// Can be used to fmt and implements Any, so can be downcasted to the proper value type.
Object(&'a dyn PolarsObjectSafe),
#[cfg(feature = "dtype-struct")]
Struct(Vec<AnyValue<'a>>, &'a [Field]),
// 3 pointers and thus not larger than string/vec
Struct(usize, &'a StructArray, &'a [Field]),
#[cfg(feature = "dtype-struct")]
StructOwned(Box<(Vec<AnyValue<'a>>, Vec<Field>)>),
/// A UTF8 encoded string type.
/// An UTF8 encoded string type.
Utf8Owned(smartstring::alias::String),
#[cfg(feature = "dtype-binary")]
Binary(&'a [u8]),
Expand Down Expand Up @@ -345,7 +346,9 @@ impl<'a> AnyValue<'a> {
Categorical(_, _) => DataType::Categorical(None),
List(s) => DataType::List(Box::new(s.dtype().clone())),
#[cfg(feature = "dtype-struct")]
Struct(_, field) => DataType::Struct(field.to_vec()),
Struct(_, _, fields) => DataType::Struct(fields.to_vec()),
#[cfg(feature = "dtype-struct")]
StructOwned(payload) => DataType::Struct(payload.1.clone()),
#[cfg(feature = "dtype-binary")]
Binary(_) => DataType::Binary,
_ => unimplemented!(),
Expand Down Expand Up @@ -520,29 +523,29 @@ impl<'a> AnyValue<'a> {
pub fn into_static(self) -> PolarsResult<AnyValue<'static>> {
use AnyValue::*;
let av = match self {
Null => AnyValue::Null,
Int8(v) => AnyValue::Int8(v),
Int16(v) => AnyValue::Int16(v),
Int32(v) => AnyValue::Int32(v),
Int64(v) => AnyValue::Int64(v),
UInt8(v) => AnyValue::UInt8(v),
UInt16(v) => AnyValue::UInt16(v),
UInt32(v) => AnyValue::UInt32(v),
UInt64(v) => AnyValue::UInt64(v),
Boolean(v) => AnyValue::Boolean(v),
Float32(v) => AnyValue::Float32(v),
Float64(v) => AnyValue::Float64(v),
Null => Null,
Int8(v) => Int8(v),
Int16(v) => Int16(v),
Int32(v) => Int32(v),
Int64(v) => Int64(v),
UInt8(v) => UInt8(v),
UInt16(v) => UInt16(v),
UInt32(v) => UInt32(v),
UInt64(v) => UInt64(v),
Boolean(v) => Boolean(v),
Float32(v) => Float32(v),
Float64(v) => Float64(v),
#[cfg(feature = "dtype-date")]
Date(v) => AnyValue::Date(v),
Date(v) => Date(v),
#[cfg(feature = "dtype-time")]
Time(v) => AnyValue::Time(v),
List(v) => AnyValue::List(v),
Utf8(v) => AnyValue::Utf8Owned(v.into()),
Utf8Owned(v) => AnyValue::Utf8Owned(v),
Time(v) => Time(v),
List(v) => List(v),
Utf8(v) => Utf8Owned(v.into()),
Utf8Owned(v) => Utf8Owned(v),
#[cfg(feature = "dtype-binary")]
Binary(v) => AnyValue::BinaryOwned(v.to_vec()),
Binary(v) => BinaryOwned(v.to_vec()),
#[cfg(feature = "dtype-binary")]
BinaryOwned(v) => AnyValue::BinaryOwned(v),
BinaryOwned(v) => BinaryOwned(v),
dt => {
return Err(PolarsError::ComputeError(
format!("cannot get static AnyValue from {}", dt).into(),
Expand Down
6 changes: 5 additions & 1 deletion polars/polars-core/src/fmt.rs
Original file line number Diff line number Diff line change
Expand Up @@ -741,7 +741,11 @@ impl Display for AnyValue<'_> {
#[cfg(feature = "object")]
AnyValue::Object(v) => write!(f, "{}", v),
#[cfg(feature = "dtype-struct")]
AnyValue::Struct(vals, _) => fmt_struct(f, vals),
av @ AnyValue::Struct(_, _, _) => {
let mut avs = vec![];
av._materialize_struct_av(&mut avs);
fmt_struct(f, &avs)
}
#[cfg(feature = "dtype-struct")]
AnyValue::StructOwned(payload) => fmt_struct(f, &payload.0),
}
Expand Down
8 changes: 4 additions & 4 deletions polars/polars-core/src/frame/row.rs
Original file line number Diff line number Diff line change
Expand Up @@ -254,7 +254,7 @@ fn is_nested_null(av: &AnyValue) -> bool {
AnyValue::Null => true,
AnyValue::List(s) => s.null_count() == s.len(),
#[cfg(feature = "dtype-struct")]
AnyValue::Struct(avs, _) => avs.iter().all(is_nested_null),
AnyValue::Struct(_, _, _) => av._iter_struct_av().all(|av| is_nested_null(&av)),
_ => false,
}
}
Expand All @@ -264,10 +264,10 @@ fn infer_dtype_dynamic(av: &AnyValue) -> DataType {
match av {
AnyValue::List(s) if s.null_count() == s.len() => DataType::List(Box::new(DataType::Null)),
#[cfg(feature = "dtype-struct")]
AnyValue::Struct(avs, _) => DataType::Struct(
avs.iter()
AnyValue::Struct(_, _, _) => DataType::Struct(
av._iter_struct_av()
.map(|av| {
let dtype = infer_dtype_dynamic(av);
let dtype = infer_dtype_dynamic(&av);
Field::new("", dtype)
})
.collect(),
Expand Down
2 changes: 1 addition & 1 deletion polars/polars-core/src/series/any_value.rs
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,7 @@ impl<'a> From<&AnyValue<'a>> for DataType {
#[cfg(feature = "dtype-struct")]
StructOwned(payload) => DataType::Struct(payload.1.to_vec()),
#[cfg(feature = "dtype-struct")]
Struct(_, fields) => DataType::Struct(fields.to_vec()),
Struct(_, _, flds) => DataType::Struct(flds.to_vec()),
#[cfg(feature = "dtype-duration")]
Duration(_, tu) => DataType::Duration(*tu),
UInt8(_) => DataType::UInt8,
Expand Down
4 changes: 2 additions & 2 deletions py-polars/src/apply/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@ fn iterator_to_struct<'a>(
capacity: usize,
) -> PyResult<PySeries> {
let (vals, flds) = match &first_value {
AnyValue::Struct(vals, flds) => (&**vals, *flds),
AnyValue::StructOwned(payload) => (&*payload.0, &*payload.1),
av @ AnyValue::Struct(_, _, flds) => (av._iter_struct_av().collect::<Vec<_>>(), &**flds),
AnyValue::StructOwned(payload) => (payload.0.clone(), &*payload.1),
_ => {
return Err(crate::error::ComputeError::new_err(format!(
"expected struct got {:?}",
Expand Down
14 changes: 9 additions & 5 deletions py-polars/src/conversion.rs
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,11 @@ impl<'a> FromPyObject<'a> for Wrap<NullValues> {
}
}

fn struct_dict(py: Python, vals: Vec<AnyValue>, flds: &[Field]) -> PyObject {
fn struct_dict<'a>(
py: Python,
vals: impl Iterator<Item = AnyValue<'a>>,
flds: &[Field],
) -> PyObject {
let dict = PyDict::new(py);
for (fld, val) in flds.iter().zip(vals) {
dict.set_item(fld.name(), Wrap(val)).unwrap()
Expand Down Expand Up @@ -239,8 +243,8 @@ impl IntoPy<PyObject> for Wrap<AnyValue<'_>> {
convert.call1((v,)).unwrap().into_py(py)
}
AnyValue::List(v) => PySeries::new(v).to_list(),
AnyValue::Struct(vals, flds) => struct_dict(py, vals, flds),
AnyValue::StructOwned(payload) => struct_dict(py, payload.0, &payload.1),
ref av @ AnyValue::Struct(_, _, flds) => struct_dict(py, av._iter_struct_av(), flds),
AnyValue::StructOwned(payload) => struct_dict(py, payload.0.into_iter(), &payload.1),
#[cfg(feature = "object")]
AnyValue::Object(v) => {
let s = format!("{}", v);
Expand Down Expand Up @@ -435,8 +439,8 @@ impl ToPyObject for Wrap<&StructChunked> {
// make series::iter() accept a chunk index.
let s = s.rechunk();
let iter = s.iter().map(|av| {
if let AnyValue::Struct(vals, flds) = av {
struct_dict(py, vals, flds)
if let AnyValue::Struct(_, _, flds) = av {
struct_dict(py, av._iter_struct_av(), flds)
} else {
unreachable!()
}
Expand Down

0 comments on commit e172db7

Please sign in to comment.