Skip to content

Commit

Permalink
feat[python]: allow object literals (#4708)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Sep 3, 2022
1 parent fb0aaa3 commit a507e90
Show file tree
Hide file tree
Showing 9 changed files with 74 additions and 32 deletions.
10 changes: 10 additions & 0 deletions polars/polars-core/src/series/implementations/object.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,10 @@ where
Cow::Borrowed(self.0.ref_field())
}

fn _dtype(&self) -> &DataType {
self.0.dtype()
}

unsafe fn agg_list(&self, groups: &GroupsProxy) -> Series {
self.0.agg_list(groups)
}
Expand All @@ -50,6 +54,12 @@ where
fn group_tuples(&self, multithreaded: bool, sorted: bool) -> GroupsProxy {
IntoGroupsProxy::group_tuples(&self.0, multithreaded, sorted)
}
#[cfg(feature = "zip_with")]
fn zip_with_same_type(&self, mask: &BooleanChunked, other: &Series) -> Result<Series> {
self.0
.zip_with(mask, other.as_ref().as_ref())
.map(|ca| ca.into_series())
}
}
#[cfg_attr(docsrs, doc(cfg(feature = "object")))]
impl<T> SeriesTrait for SeriesWrap<ObjectChunked<T>>
Expand Down
2 changes: 1 addition & 1 deletion polars/polars-lazy/src/logical_plan/lit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ impl TryFrom<AnyValue<'_>> for LiteralValue {
#[cfg(feature = "dtype-categorical")]
AnyValue::Categorical(c, rev_mapping) => Ok(Self::Utf8(rev_mapping.get(c).to_string())),
_ => Err(PolarsError::ComputeError(
"Unsupporten AnyValue type variant, cannot convert to Literal".into(),
"Unsupported AnyValue type variant, cannot convert to Literal".into(),
)),
}
}
Expand Down
6 changes: 3 additions & 3 deletions py-polars/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -156,9 +156,9 @@ name = "polars"
crate-type = ["cdylib"]

[profile.release]
codegen-units = 1
lto = "fat"
# codegen-units = 1
# lto = "fat"

# This is ignored here; would be set in .cargo/config.toml.
# Should not be used when packaging
# target-cpu = "native"
target-cpu = "native"
13 changes: 5 additions & 8 deletions py-polars/polars/_html.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
"""Module for formatting output data in HTML."""
from __future__ import annotations

import html
import os
from textwrap import dedent
from types import TracebackType
from typing import Iterable

from polars.datatypes import Object


class Tag:
"""Class for representing an HTML tag."""
Expand Down Expand Up @@ -104,12 +103,10 @@ def write_body(self) -> None:
self.elements.append("...")
else:
series = self.df[:, c]
if series.dtype == Object:
self.elements.append(f"{series[r]}")
else:
self.elements.append(
f"{series._s.get_fmt(r, str_lengths)}"
)

self.elements.append(
html.escape(series._s.get_fmt(r, str_lengths))
)

def write(self, inner: str) -> None:
self.elements.append(inner)
Expand Down
14 changes: 10 additions & 4 deletions py-polars/polars/internals/lazy_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -674,7 +674,9 @@ def tail(column: str | pli.Series, n: int = 10) -> pli.Expr | pli.Series:
return col(column).tail(n)


def lit(value: Any, dtype: type[DataType] | None = None) -> pli.Expr:
def lit(
value: Any, dtype: type[DataType] | None = None, allow_object: bool = False
) -> pli.Expr:
"""
Return an expression representing a literal value.
Expand All @@ -684,6 +686,10 @@ def lit(value: Any, dtype: type[DataType] | None = None) -> pli.Expr:
Value that should be used as a `literal`.
dtype
Optionally define a dtype.
allow_object
If type is unknown use an 'object' type.
By default, we will raise a `ValueException`
if the type is unknown.
Examples
--------
Expand Down Expand Up @@ -724,7 +730,7 @@ def lit(value: Any, dtype: type[DataType] | None = None) -> pli.Expr:
elif isinstance(value, pli.Series):
name = value.name
value = value._s
e = pli.wrap_expr(pylit(value))
e = pli.wrap_expr(pylit(value, allow_object))
if name == "":
return e
return e.alias(name)
Expand All @@ -733,7 +739,7 @@ def lit(value: Any, dtype: type[DataType] | None = None) -> pli.Expr:
return lit(pli.Series("", value))

if dtype:
return pli.wrap_expr(pylit(value)).cast(dtype)
return pli.wrap_expr(pylit(value, allow_object)).cast(dtype)

try:
# numpy literals like np.float32(0) have item/dtype
Expand All @@ -755,7 +761,7 @@ def lit(value: Any, dtype: type[DataType] | None = None) -> pli.Expr:
except AttributeError:
item = value

return pli.wrap_expr(pylit(item))
return pli.wrap_expr(pylit(item, allow_object))


def spearman_rank_corr(a: str | pli.Expr, b: str | pli.Expr, ddof: int = 1) -> pli.Expr:
Expand Down
1 change: 1 addition & 0 deletions py-polars/src/conversion.rs
Original file line number Diff line number Diff line change
Expand Up @@ -609,6 +609,7 @@ impl FromPyObject<'_> for Wrap<Schema> {
}

#[derive(Clone, Debug)]
#[repr(transparent)]
pub struct ObjectValue {
pub inner: PyObject,
}
Expand Down
19 changes: 13 additions & 6 deletions py-polars/src/lazy/dsl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ use super::apply::*;
use crate::conversion::{parse_fill_null_strategy, Wrap};
use crate::lazy::map_single;
use crate::lazy::utils::py_exprs_to_exprs;
use crate::prelude::ObjectValue;
use crate::series::PySeries;
use crate::utils::reinterpret;

Expand Down Expand Up @@ -1668,7 +1669,7 @@ pub fn fold(acc: PyExpr, lambda: PyObject, exprs: Vec<PyExpr>) -> PyExpr {
polars::lazy::dsl::fold_exprs(acc.inner, func, exprs).into()
}

pub fn lit(value: &PyAny) -> PyResult<PyExpr> {
pub fn lit(value: &PyAny, allow_object: bool) -> PyResult<PyExpr> {
if let Ok(true) = value.is_instance_of::<PyBool>() {
let val = value.extract::<bool>().unwrap();
Ok(dsl::lit(val).into())
Expand All @@ -1695,11 +1696,17 @@ pub fn lit(value: &PyAny) -> PyResult<PyExpr> {
} else if value.is_none() {
Ok(dsl::lit(Null {}).into())
} else {
let value = value.str()?;
Err(PyValueError::new_err(format!(
"could not convert value {:?} as a Literal",
value
)))
if allow_object {
let s = Python::with_gil(|py| {
PySeries::new_object("", vec![ObjectValue::from(value.into_py(py))], false).series
});
Ok(dsl::lit(s).into())
} else {
Err(PyValueError::new_err(format!(
"could not convert value {:?} as a Literal",
value.str()?
)))
}
}
}

Expand Down
24 changes: 14 additions & 10 deletions py-polars/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ use polars::prelude::Null;
use polars_core::datatypes::TimeUnit;
use polars_core::prelude::{DataFrame, IntoSeries, IDX_DTYPE};
use polars_core::POOL;
use pyo3::exceptions::PyValueError;
use pyo3::panic::PanicException;
use pyo3::prelude::*;
use pyo3::types::{PyBool, PyDict, PyFloat, PyInt, PyString};
Expand Down Expand Up @@ -96,8 +97,8 @@ fn dtype_str_repr(dtype: Wrap<DataType>) -> PyResult<String> {
}

#[pyfunction]
fn lit(value: &PyAny) -> PyResult<dsl::PyExpr> {
dsl::lit(value)
fn lit(value: &PyAny, allow_object: bool) -> PyResult<dsl::PyExpr> {
dsl::lit(value, allow_object)
}

#[pyfunction]
Expand All @@ -116,30 +117,33 @@ fn arange(low: PyExpr, high: PyExpr, step: usize) -> PyExpr {
}

#[pyfunction]
fn repeat(value: &PyAny, n_times: PyExpr) -> PyExpr {
fn repeat(value: &PyAny, n_times: PyExpr) -> PyResult<PyExpr> {
if let Ok(true) = value.is_instance_of::<PyBool>() {
let val = value.extract::<bool>().unwrap();
polars::lazy::dsl::repeat(val, n_times.inner).into()
Ok(polars::lazy::dsl::repeat(val, n_times.inner).into())
} else if let Ok(int) = value.downcast::<PyInt>() {
let val = int.extract::<i64>().unwrap();

if val > 0 && val < i32::MAX as i64 || val < 0 && val > i32::MIN as i64 {
polars::lazy::dsl::repeat(val as i32, n_times.inner).into()
Ok(polars::lazy::dsl::repeat(val as i32, n_times.inner).into())
} else {
polars::lazy::dsl::repeat(val, n_times.inner).into()
Ok(polars::lazy::dsl::repeat(val, n_times.inner).into())
}
} else if let Ok(float) = value.downcast::<PyFloat>() {
let val = float.extract::<f64>().unwrap();
polars::lazy::dsl::repeat(val, n_times.inner).into()
Ok(polars::lazy::dsl::repeat(val, n_times.inner).into())
} else if let Ok(pystr) = value.downcast::<PyString>() {
let val = pystr
.to_str()
.expect("could not transform Python string to Rust Unicode");
polars::lazy::dsl::repeat(val, n_times.inner).into()
Ok(polars::lazy::dsl::repeat(val, n_times.inner).into())
} else if value.is_none() {
polars::lazy::dsl::repeat(Null {}, n_times.inner).into()
Ok(polars::lazy::dsl::repeat(Null {}, n_times.inner).into())
} else {
panic!("could not convert value {:?} as a Literal", value)
Err(PyValueError::new_err(format!(
"could not convert value {:?} as a Literal",
value.str()?
)))
}
}

Expand Down
17 changes: 17 additions & 0 deletions py-polars/tests/test_object.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import polars as pl


def test_object_when_then_4702() -> None:
# please don't ever do this
x = pl.DataFrame({"Row": [1, 2], "Type": [pl.Date, pl.UInt8]})

assert x.with_column(
pl.when(pl.col("Row") == 1)
.then(pl.lit(pl.UInt16, allow_object=True))
.otherwise(pl.lit(pl.UInt8, allow_object=True))
.alias("New_Type")
).to_dict(False) == {
"Row": [1, 2],
"Type": [pl.Date, pl.UInt8],
"New_Type": [pl.UInt16, pl.UInt8],
}

0 comments on commit a507e90

Please sign in to comment.