Skip to content

Commit

Permalink
python all anyvalue to python conversion return python types (#3194)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Apr 20, 2022
1 parent 493c29f commit b4e2e56
Show file tree
Hide file tree
Showing 9 changed files with 129 additions and 101 deletions.
6 changes: 5 additions & 1 deletion polars/polars-core/src/chunked_array/logical/struct_/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,11 @@ impl LogicalType for StructChunked {

/// Gets AnyValue from LogicalType
fn get_any_value(&self, i: usize) -> AnyValue<'_> {
AnyValue::Struct(self.fields.iter().map(|s| s.get(i)).collect())
if let DataType::Struct(flds) = self.dtype() {
AnyValue::Struct(self.fields.iter().map(|s| s.get(i)).collect(), flds)
} else {
unreachable!()
}
}

// in case of a struct, a cast will coerce the inner types
Expand Down
2 changes: 1 addition & 1 deletion polars/polars-core/src/chunked_array/ops/any_value.rs
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ pub(crate) unsafe fn arr_to_any_value<'a>(
.zip(flds)
.map(|(arr, fld)| arr_to_any_value(&**arr, idx, fld.data_type()))
.collect();
AnyValue::Struct(vals)
AnyValue::Struct(vals, flds)
}
#[cfg(feature = "dtype-datetime")]
DataType::Datetime(tu, tz) => {
Expand Down
4 changes: 3 additions & 1 deletion polars/polars-core/src/datatypes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,9 @@ pub enum AnyValue<'a> {
/// Can be used to fmt and implements Any, so can be downcasted to the proper value type.
Object(&'a dyn PolarsObjectSafe),
#[cfg(feature = "dtype-struct")]
Struct(Vec<AnyValue<'a>>),
Struct(Vec<AnyValue<'a>>, &'a [Field]),
#[cfg(feature = "dtype-struct")]
StructOwned(Box<(Vec<AnyValue<'a>>, Vec<Field>)>),
/// A UTF8 encoded string type.
Utf8Owned(String),
}
Expand Down
28 changes: 17 additions & 11 deletions polars/polars-core/src/fmt.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ use std::{
fmt,
fmt::{Debug, Display, Formatter},
};

const LIMIT: usize = 25;

use arrow::temporal_conversions::{timestamp_ms_to_datetime, timestamp_us_to_datetime};
Expand Down Expand Up @@ -635,19 +636,24 @@ impl Display for AnyValue<'_> {
#[cfg(feature = "object")]
AnyValue::Object(v) => write!(f, "{}", v),
#[cfg(feature = "dtype-struct")]
AnyValue::Struct(vals) => {
write!(f, "{{")?;
if !vals.is_empty() {
for v in &vals[..vals.len() - 1] {
write!(f, "{},", v)?;
}
// last value has no trailing comma
write!(f, "{}", vals[vals.len() - 1])?;
}
write!(f, "}}")
}
AnyValue::Struct(vals, _) => fmt_struct(f, vals),
#[cfg(feature = "dtype-struct")]
AnyValue::StructOwned(payload) => fmt_struct(f, &payload.0),
}
}
}

#[cfg(feature = "dtype-struct")]
fn fmt_struct(f: &mut Formatter<'_>, vals: &[AnyValue]) -> fmt::Result {
write!(f, "{{")?;
if !vals.is_empty() {
for v in &vals[..vals.len() - 1] {
write!(f, "{},", v)?;
}
// last value has no trailing comma
write!(f, "{}", vals[vals.len() - 1])?;
}
write!(f, "}}")
}

macro_rules! impl_fmt_list {
Expand Down
100 changes: 58 additions & 42 deletions py-polars/src/apply/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ use polars::chunked_array::builder::get_list_builder;
use polars::prelude::*;
use polars_core::utils::CustomIterTools;
use polars_core::{export::rayon::prelude::*, POOL};
use pyo3::types::PyTuple;
use pyo3::types::{PyDict, PyTuple};
use pyo3::{PyAny, PyResult};

pub trait PyArrowPrimitiveType: PolarsNumericType {}
Expand All @@ -30,56 +30,72 @@ fn iterator_to_struct<'a>(
name: &str,
capacity: usize,
) -> PyResult<PySeries> {
if let AnyValue::Struct(fields) = &first_value {
let struct_width = fields.len();
let (vals, flds) = match &first_value {
AnyValue::Struct(vals, flds) => (&**vals, *flds),
AnyValue::StructOwned(payload) => (&*payload.0, &*payload.1),
_ => {
return Err(crate::error::ComputeError::new_err(format!(
"expected struct got {:?}",
first_value
)))
}
};

let mut items = Vec::with_capacity(fields.len());
for item in fields {
let mut buf = Vec::with_capacity(capacity);
for _ in 0..init_null_count {
buf.push(AnyValue::Null);
}
buf.push(item.clone());
items.push(buf);
let struct_width = vals.len();

// every item in the struct is kept as its own buffer of anyvalues
// so as struct with 2 items: {a, b}
// will have
// [
// [ a values ]
// [ b values ]
// ]
let mut items = Vec::with_capacity(vals.len());
for item in vals {
let mut buf = Vec::with_capacity(capacity);
for _ in 0..init_null_count {
buf.push(AnyValue::Null);
}
buf.push(item.clone());
items.push(buf);
}

for tuple in it {
match tuple {
None => {
for field_items in &mut items {
field_items.push(AnyValue::Null);
}
for dict in it {
match dict {
None => {
for field_items in &mut items {
field_items.push(AnyValue::Null);
}
Some(tuple) => {
let tuple = tuple.downcast::<PyTuple>()?;
if tuple.len() != struct_width {
return Err(crate::error::ComputeError::new_err(
"all tuples must have equal size",
));
}
for (item, field_items) in tuple.iter().zip(&mut items) {
let item = item.extract::<Wrap<AnyValue>>()?;
field_items.push(item.0)
}
}
Some(dict) => {
let dict = dict.downcast::<PyDict>()?;
if dict.len() != struct_width {
return Err(crate::error::ComputeError::new_err(
"all tuples must have equal size",
));
}
// we ignore the keys of the rest of the dicts
// the first item determines the output name
for ((_, val), field_items) in dict.iter().zip(&mut items) {
let item = val.extract::<Wrap<AnyValue>>()?;
field_items.push(item.0)
}
}
}
}

let fields = POOL.install(|| {
items
.par_iter()
.enumerate()
.map(|(i, av)| Series::new(&format!("field_{i}"), av))
.collect::<Vec<_>>()
});
let fields = POOL.install(|| {
items
.par_iter()
.zip(flds)
.map(|(av, fld)| Series::new(fld.name(), av))
.collect::<Vec<_>>()
});

Ok(StructChunked::new(name, &fields)
.unwrap()
.into_series()
.into())
} else {
Err(crate::error::ComputeError::new_err("expected struct"))
}
Ok(StructChunked::new(name, &fields)
.unwrap()
.into_series()
.into())
}

fn iterator_to_primitive<T>(
Expand Down
2 changes: 1 addition & 1 deletion py-polars/src/apply/series.rs
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ fn infer_and_finish<'a, A: ApplyLambda<'a>>(
applyer
.apply_lambda_with_list_out_type(py, new_lambda, null_count, &series, dt)
.map(|ca| ca.into_series().into())
} else if out.is_instance_of::<PyTuple>().unwrap() {
} else if out.is_instance_of::<PyDict>().unwrap() {
let first = out.extract::<Wrap<AnyValue<'_>>>()?;
applyer.apply_to_struct(py, lambda, null_count, first.0)
}
Expand Down
56 changes: 27 additions & 29 deletions py-polars/src/conversion.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ use pyo3::basic::CompareOp;
use pyo3::conversion::{FromPyObject, IntoPy};
use pyo3::exceptions::PyValueError;
use pyo3::prelude::*;
use pyo3::types::{PyBool, PyDict, PyList, PySequence, PyTuple};
use pyo3::types::{PyBool, PyDict, PyList, PySequence};
use pyo3::{PyAny, PyResult};
use std::fmt::{Display, Formatter};
use std::hash::{Hash, Hasher};
Expand Down Expand Up @@ -159,6 +159,14 @@ impl<'a> FromPyObject<'a> for Wrap<NullValues> {
}
}

fn struct_dict(py: Python, vals: Vec<AnyValue>, flds: &[Field]) -> PyObject {
let dict = PyDict::new(py);
for (fld, val) in flds.iter().zip(vals) {
dict.set_item(fld.name(), Wrap(val)).unwrap()
}
dict.into_py(py)
}

impl IntoPy<PyObject> for Wrap<AnyValue<'_>> {
fn into_py(self, py: Python) -> PyObject {
match self.0 {
Expand Down Expand Up @@ -222,22 +230,9 @@ impl IntoPy<PyObject> for Wrap<AnyValue<'_>> {
}
}
AnyValue::Time(v) => v.into_py(py),
AnyValue::List(v) => {
let pypolars = PyModule::import(py, "polars").unwrap();
let pyseries = PySeries::new(v);
let python_series_wrapper = pypolars
.getattr("wrap_s")
.unwrap()
.call1((pyseries,))
.unwrap();
python_series_wrapper.into()
}
AnyValue::Struct(vals) => {
// Safety:
// Wrap<T> is transparent
let vals = unsafe { std::mem::transmute::<_, Vec<Wrap<AnyValue>>>(vals) };
PyTuple::new(py, vals).into_py(py)
}
AnyValue::List(v) => PySeries::new(v).to_list(),
AnyValue::Struct(vals, flds) => struct_dict(py, vals, flds),
AnyValue::StructOwned(payload) => struct_dict(py, payload.0, &payload.1),
AnyValue::Object(v) => {
let s = format!("{}", v);
s.into_py(py)
Expand Down Expand Up @@ -412,8 +407,8 @@ impl ToPyObject for Wrap<&StructChunked> {
fn to_object(&self, py: Python) -> PyObject {
let s = self.0.clone().into_series();
let iter = s.iter().map(|av| {
if let AnyValue::Struct(vals) = av {
PyTuple::new(py, vals.into_iter().map(Wrap))
if let AnyValue::Struct(vals, flds) = av {
struct_dict(py, vals, flds)
} else {
unreachable!()
}
Expand Down Expand Up @@ -519,16 +514,19 @@ impl<'s> FromPyObject<'s> for Wrap<AnyValue<'s>> {
}
} else if ob.is_none() {
Ok(AnyValue::Null.into())
} else if ob.is_instance_of::<PyTuple>()? {
let tuple = ob.downcast::<PyTuple>().unwrap();
let items = tuple
.iter()
.map(|ob| {
let av = ob.extract::<Wrap<AnyValue>>()?;
Ok(av.0)
})
.collect::<PyResult<Vec<_>>>()?;
Ok(Wrap(AnyValue::Struct(items)))
} else if ob.is_instance_of::<PyDict>()? {
let dict = ob.downcast::<PyDict>().unwrap();
let len = dict.len();
let mut keys = Vec::with_capacity(len);
let mut vals = Vec::with_capacity(len);
for (k, v) in dict.into_iter() {
let key = k.extract::<&str>()?;
let val = v.extract::<Wrap<AnyValue>>()?.0;
let dtype = DataType::from(&val);
keys.push(Field::new(key, dtype));
vals.push(val)
}
Ok(Wrap(AnyValue::StructOwned(Box::new((vals, keys)))))
} else if ob.is_instance_of::<PyList>()? {
Python::with_gil(|py| {
let pypolars = PyModule::import(py, "polars").unwrap().to_object(py);
Expand Down
4 changes: 2 additions & 2 deletions py-polars/tests/test_interop.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,8 +185,8 @@ def test_from_dict_struct() -> None:
}
df = pl.from_dict(data)
assert df.shape == (2, 2)
assert df["a"][0] == (1, 2)
assert df["a"][1] == (3, 4)
assert df["a"][0] == {"b": 1, "c": 2}
assert df["a"][1] == {"b": 3, "c": 4}


def test_from_dicts() -> None:
Expand Down
28 changes: 15 additions & 13 deletions py-polars/tests/test_struct.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ def test_struct_various() -> None:
s = df.to_struct("my_struct")

assert s.struct.fields == ["int", "str", "bool", "list"]
assert s[0] == (1, "a", True, pl.Series([1, 2]))
assert s[1] == (2, "b", None, pl.Series([3]))
assert s[0] == {"int": 1, "str": "a", "bool": True, "list": [1, 2]}
assert s[1] == {"int": 2, "str": "b", "bool": None, "list": [3]}
assert s.struct.field("list").to_list() == [[1, 2], [3]]
assert s.struct.field("int").to_list() == [1, 2]

Expand All @@ -25,25 +25,25 @@ def test_struct_to_list() -> None:
assert pl.DataFrame(
{"int": [1, 2], "str": ["a", "b"], "bool": [True, None], "list": [[1, 2], [3]]}
).select([pl.struct(pl.all()).alias("my_struct")]).to_series().to_list() == [
(1, "a", True, pl.Series([1, 2])),
(2, "b", None, pl.Series([3])),
{"int": 1, "str": "a", "bool": True, "list": [1, 2]},
{"int": 2, "str": "b", "bool": None, "list": [3]},
]


def test_apply_to_struct() -> None:
df = (
pl.Series([None, 2, 3, 4])
.apply(lambda x: (x, x * 2, True, [1, 2], "foo"))
.apply(lambda x: {"a": x, "b": x * 2, "c": True, "d": [1, 2], "e": "foo"})
.struct.to_frame()
)

expected = pl.DataFrame(
{
"field_0": [None, 2, 3, 4],
"field_1": [None, 4, 6, 8],
"field_2": [None, True, True, True],
"field_3": [None, [1, 2], [1, 2], [1, 2]],
"field_4": [None, "foo", "foo", "foo"],
"a": [None, 2, 3, 4],
"b": [None, 4, 6, 8],
"c": [None, True, True, True],
"d": [None, [1, 2], [1, 2], [1, 2]],
"e": [None, "foo", "foo", "foo"],
}
)

Expand Down Expand Up @@ -124,9 +124,11 @@ def test_value_counts_expr() -> None:
.to_series()
.to_list()
)

out = sorted(out) # type: ignore
assert out == [("a", 1), ("b", 2), ("c", 3)]
assert out == [
{"id": "c", "counts": 3},
{"id": "b", "counts": 2},
{"id": "a", "counts": 1},
]


def test_struct_comparison() -> None:
Expand Down

0 comments on commit b4e2e56

Please sign in to comment.