Skip to content

Commit

Permalink
apply to struct (#2843)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Mar 7, 2022
1 parent 5bb514a commit 96980ff
Show file tree
Hide file tree
Showing 13 changed files with 565 additions and 190 deletions.
2 changes: 2 additions & 0 deletions polars/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,8 @@ dot_diagram = ["polars-lazy/dot_diagram"]
dataframe_arithmetic = ["polars-core/dataframe_arithmetic"]
product = ["polars-core/product"]

series_from_anyvalue = ["polars-core/series_from_anyvalue"]

test = [
"lazy",
"private",
Expand Down
1 change: 1 addition & 0 deletions polars/polars-core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ temporal = ["regex", "chrono"]
random = ["rand", "rand_distr"]
default = ["docs", "temporal", "performant", "private"]
lazy = ["sort_multiple"]
series_from_anyvalue = []

# ~40% faster collect, needed until trustedlength iter stabilizes
performant = []
Expand Down
42 changes: 24 additions & 18 deletions polars/polars-core/src/datatypes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,8 @@ pub enum AnyValue<'a> {
Object(&'a dyn PolarsObjectSafe),
#[cfg(feature = "dtype-struct")]
Struct(Vec<AnyValue<'a>>),
/// A UTF8 encoded string type.
Utf8Owned(String),
}

impl<'a> AnyValue<'a> {
Expand Down Expand Up @@ -426,26 +428,27 @@ impl<'a> AnyValue<'a> {

/// Try to coerce to an AnyValue with static lifetime.
/// This can be done if it does not borrow any values.
pub fn to_static(&self) -> Result<AnyValue<'static>> {
pub fn into_static(self) -> Result<AnyValue<'static>> {
use AnyValue::*;
let av = match self {
Null => AnyValue::Null,
Int8(v) => AnyValue::Int8(*v),
Int16(v) => AnyValue::Int16(*v),
Int32(v) => AnyValue::Int32(*v),
Int64(v) => AnyValue::Int64(*v),
UInt8(v) => AnyValue::UInt8(*v),
UInt16(v) => AnyValue::UInt16(*v),
UInt32(v) => AnyValue::UInt32(*v),
UInt64(v) => AnyValue::UInt64(*v),
Boolean(v) => AnyValue::Boolean(*v),
Float32(v) => AnyValue::Float32(*v),
Float64(v) => AnyValue::Float64(*v),
Int8(v) => AnyValue::Int8(v),
Int16(v) => AnyValue::Int16(v),
Int32(v) => AnyValue::Int32(v),
Int64(v) => AnyValue::Int64(v),
UInt8(v) => AnyValue::UInt8(v),
UInt16(v) => AnyValue::UInt16(v),
UInt32(v) => AnyValue::UInt32(v),
UInt64(v) => AnyValue::UInt64(v),
Boolean(v) => AnyValue::Boolean(v),
Float32(v) => AnyValue::Float32(v),
Float64(v) => AnyValue::Float64(v),
#[cfg(feature = "dtype-date")]
Date(v) => AnyValue::Date(*v),
Date(v) => AnyValue::Date(v),
#[cfg(feature = "dtype-time")]
Time(v) => AnyValue::Time(*v),
List(v) => AnyValue::List(v.clone()),
Time(v) => AnyValue::Time(v),
List(v) => AnyValue::List(v),
Utf8(s) => AnyValue::Utf8Owned(s.to_string()),
dt => {
return Err(PolarsError::ComputeError(
format!("cannot get static AnyValue from {}", dt).into(),
Expand Down Expand Up @@ -509,7 +512,8 @@ impl Display for DataType {
2 => {
write!(
f,
"struct{{{}: {}, {}: {}}}",
"struct[{}]{{'{}': {}, '{}': {}}}",
fields.len(),
fields[0].name(),
fields[0].dtype,
fields[1].name(),
Expand All @@ -519,7 +523,8 @@ impl Display for DataType {
3 => {
write!(
f,
"struct{{{}, {}, {}}}",
"struct[{}]{{'{}', '{}', '{}'}}",
fields.len(),
fields[0].name(),
fields[1].name(),
fields[2].name()
Expand All @@ -528,7 +533,8 @@ impl Display for DataType {
_ => {
write!(
f,
"struct{{{}, ... {}}}",
"struct[{}]{{'{}',...,'{}'}}",
fields.len(),
fields[0].name(),
fields[fields.len() - 1].name()
)
Expand Down
1 change: 1 addition & 0 deletions polars/polars-core/src/fmt.rs
Original file line number Diff line number Diff line change
Expand Up @@ -606,6 +606,7 @@ impl Display for AnyValue<'_> {
AnyValue::Float64(v) => fmt_float(f, width, *v),
AnyValue::Boolean(v) => write!(f, "{}", *v),
AnyValue::Utf8(v) => write!(f, "{}", format_args!("\"{}\"", v)),
AnyValue::Utf8Owned(v) => write!(f, "{}", format_args!("\"{}\"", v)),
#[cfg(feature = "dtype-date")]
AnyValue::Date(v) => write!(f, "{}", date32_to_date(*v)),
#[cfg(feature = "dtype-datetime")]
Expand Down
88 changes: 88 additions & 0 deletions polars/polars-core/src/series/any_value.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
use crate::prelude::*;

fn any_values_to_primitive<T: PolarsNumericType>(avs: &[AnyValue]) -> ChunkedArray<T> {
avs.iter()
.map(|av| av.extract::<T::Native>())
.collect_trusted()
}

fn any_values_to_utf8(avs: &[AnyValue]) -> Utf8Chunked {
avs.iter()
.map(|av| match av {
AnyValue::Utf8(s) => Some(*s),
AnyValue::Utf8Owned(s) => Some(&**s),
_ => None,
})
.collect_trusted()
}

fn any_values_to_bool(avs: &[AnyValue]) -> BooleanChunked {
avs.iter()
.map(|av| match av {
AnyValue::Boolean(b) => Some(*b),
_ => None,
})
.collect_trusted()
}

fn any_values_to_list(avs: &[AnyValue]) -> ListChunked {
avs.iter()
.map(|av| match av {
AnyValue::List(b) => Some(b),
_ => None,
})
.collect_trusted()
}

impl<'a, T: AsRef<[AnyValue<'a>]>> NamedFrom<T, [AnyValue<'a>]> for Series {
fn new(name: &str, v: T) -> Self {
let av = v.as_ref();
Series::from_any_values(name, av)
}
}

impl Series {
fn from_any_values<'a>(name: &str, av: &[AnyValue<'a>]) -> Series {
match av.iter().find(|av| !matches!(av, AnyValue::Null)) {
None => Series::full_null(name, av.len(), &DataType::Int32),
Some(av_) => {
let mut s = match av_ {
AnyValue::Int32(_) => any_values_to_primitive::<Int32Type>(av).into_series(),
AnyValue::Int64(_) => any_values_to_primitive::<Int64Type>(av).into_series(),
AnyValue::UInt32(_) => any_values_to_primitive::<UInt32Type>(av).into_series(),
AnyValue::UInt64(_) => any_values_to_primitive::<UInt64Type>(av).into_series(),
AnyValue::Float32(_) => {
any_values_to_primitive::<Float32Type>(av).into_series()
}
AnyValue::Float64(_) => {
any_values_to_primitive::<Float64Type>(av).into_series()
}
AnyValue::Utf8(_) | AnyValue::Utf8Owned(_) => {
any_values_to_utf8(av).into_series()
}
AnyValue::Boolean(_) => any_values_to_bool(av).into_series(),
AnyValue::List(_) => any_values_to_list(av).into_series(),
#[cfg(feature = "dtype-date")]
AnyValue::Date(_) => any_values_to_primitive::<Int32Type>(av)
.into_date()
.into_series(),
#[cfg(feature = "dtype-datetime")]
AnyValue::Datetime(_, tu, tz) => any_values_to_primitive::<Int64Type>(av)
.into_datetime(*tu, (*tz).clone())
.into_series(),
#[cfg(feature = "dtype-time")]
AnyValue::Time(_) => any_values_to_primitive::<Int64Type>(av)
.into_time()
.into_series(),
#[cfg(feature = "dtype-duration")]
AnyValue::Duration(_, tu) => any_values_to_primitive::<Int64Type>(av)
.into_duration(*tu)
.into_series(),
_ => todo!(),
};
s.rename(name);
s
}
}
}
}
7 changes: 7 additions & 0 deletions polars/polars-core/src/series/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ use polars_arrow::prelude::QuantileInterpolOptions;
#[cfg(any(feature = "dtype-struct", feature = "object"))]
use std::any::Any;

#[cfg(feature = "series_from_anyvalue")]
mod any_value;
pub(crate) mod arithmetic;
mod comparison;
mod from;
Expand Down Expand Up @@ -152,6 +154,11 @@ impl Hash for Wrap<Series> {
}

impl Series {
/// Create a new empty Series
pub fn new_empty(name: &str, dtype: &DataType) -> Series {
Series::full_null(name, 0, dtype)
}

pub(crate) fn get_inner_mut(&mut self) -> &mut dyn SeriesTrait {
if Arc::weak_count(&self.0) + Arc::strong_count(&self.0) != 1 {
self.0 = self.0.clone_inner();
Expand Down
1 change: 1 addition & 0 deletions py-polars/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ features = [
"string_encoding",
"product",
"ndarray",
"series_from_anyvalue",
]

# [patch.crates-io]
Expand Down
64 changes: 64 additions & 0 deletions py-polars/src/apply/mod.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,14 @@
pub mod dataframe;
pub mod series;

use crate::prelude::ObjectValue;
use crate::{PySeries, Wrap};
use polars::chunked_array::builder::get_list_builder;
use polars::prelude::*;
use polars_core::utils::CustomIterTools;
use polars_core::{export::rayon::prelude::*, POOL};
use pyo3::types::PyTuple;
use pyo3::{PyAny, PyResult};

pub trait PyArrowPrimitiveType: PolarsNumericType {}

Expand All @@ -18,6 +23,65 @@ impl PyArrowPrimitiveType for Int64Type {}
impl PyArrowPrimitiveType for Float32Type {}
impl PyArrowPrimitiveType for Float64Type {}

fn iterator_to_struct<'a>(
it: impl Iterator<Item = Option<&'a PyAny>>,
init_null_count: usize,
first_value: AnyValue<'a>,
name: &str,
capacity: usize,
) -> PyResult<PySeries> {
if let AnyValue::Struct(fields) = &first_value {
let struct_width = fields.len();

let mut items = Vec::with_capacity(fields.len());
for item in fields {
let mut buf = Vec::with_capacity(capacity);
for _ in 0..init_null_count {
buf.push(AnyValue::Null);
}
buf.push(item.clone());
items.push(buf);
}

for tuple in it {
match tuple {
None => {
for field_items in &mut items {
field_items.push(AnyValue::Null);
}
}
Some(tuple) => {
let tuple = tuple.downcast::<PyTuple>()?;
if tuple.len() != struct_width {
return Err(crate::error::ComputeError::new_err(
"all tuples must have equal size",
));
}
for (item, field_items) in tuple.iter().zip(&mut items) {
let item = item.extract::<Wrap<AnyValue>>()?;
field_items.push(item.0)
}
}
}
}

let fields = POOL.install(|| {
items
.par_iter()
.enumerate()
.map(|(i, av)| Series::new(&format!("field_{i}"), av))
.collect::<Vec<_>>()
});

Ok(StructChunked::new(name, &fields)
.unwrap()
.into_series()
.into())
} else {
Err(crate::error::ComputeError::new_err("expected struct"))
}
}

fn iterator_to_primitive<T>(
it: impl Iterator<Item = Option<T::Native>>,
init_null_count: usize,
Expand Down

0 comments on commit 96980ff

Please sign in to comment.