Skip to content

Commit

Permalink
depr(python): Rename Series/Expr.apply to map_elements (#10678)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Aug 27, 2023
1 parent be38947 commit f3142cc
Show file tree
Hide file tree
Showing 32 changed files with 329 additions and 235 deletions.
1 change: 1 addition & 0 deletions py-polars/docs/source/reference/expressions/functions.rst
Expand Up @@ -97,6 +97,7 @@ These functions are available from the polars module root and can be used as exp
Expr.head
Expr.implode
Expr.map
Expr.map_elements
Expr.max
Expr.mean
Expr.median
Expand Down
1 change: 1 addition & 0 deletions py-polars/docs/source/reference/series/miscellaneous.rst
Expand Up @@ -7,6 +7,7 @@ Miscellaneous
:toctree: api/

Series.apply
Series.map_elements
Series.reinterpret
Series.series_equal
Series.set_sorted
Expand Down
4 changes: 2 additions & 2 deletions py-polars/polars/exceptions.py
Expand Up @@ -86,7 +86,7 @@ class ChronoFormatWarning(Warning):
"""


class PolarsInefficientApplyWarning(Warning):
class PolarsInefficientMapWarning(Warning):
"""
Warning raised when a potentially slow `apply` operation is performed.
Expand All @@ -103,7 +103,7 @@ class PolarsInefficientApplyWarning(Warning):
"InvalidOperationError",
"NoDataError",
"NoRowsReturnedError",
"PolarsInefficientApplyWarning",
"PolarsInefficientMapWarning",
"PolarsPanicError",
"RowsError",
"SchemaError",
Expand Down
92 changes: 73 additions & 19 deletions py-polars/polars/expr/expr.py
Expand Up @@ -35,7 +35,7 @@
)
from polars.dependencies import _check_for_numpy
from polars.dependencies import numpy as np
from polars.exceptions import PolarsInefficientApplyWarning
from polars.exceptions import PolarsInefficientMapWarning
from polars.expr.array import ExprArrayNameSpace
from polars.expr.binary import ExprBinaryNameSpace
from polars.expr.categorical import ExprCatNameSpace
Expand All @@ -52,6 +52,7 @@
from polars.utils.deprecation import (
deprecate_function,
deprecate_nonkeyword_arguments,
deprecate_renamed_function,
deprecate_renamed_parameter,
warn_closed_future_change,
)
Expand All @@ -70,11 +71,11 @@

from polars import DataFrame, LazyFrame, Series
from polars.type_aliases import (
ApplyStrategy,
ClosedInterval,
FillNullStrategy,
InterpolationMethod,
IntoExpr,
MapElementsStrategy,
NullBehavior,
PolarsDataType,
PythonLiteral,
Expand Down Expand Up @@ -3692,17 +3693,17 @@ def map(
return_dtype = py_type_to_dtype(return_dtype)
return self._from_pyexpr(self._pyexpr.map(function, return_dtype, agg_list))

def apply(
def map_elements(
self,
function: Callable[[Series], Series] | Callable[[Any], Any],
return_dtype: PolarsDataType | None = None,
*,
skip_nulls: bool = True,
pass_name: bool = False,
strategy: ApplyStrategy = "thread_local",
strategy: MapElementsStrategy = "thread_local",
) -> Self:
"""
Apply a custom/user-defined function (UDF) in a GroupBy or Projection context.
Map a custom/user-defined function (UDF) in a GroupBy or Projection context.
.. warning::
This method is much slower than the native expressions API.
Expand All @@ -3720,13 +3721,13 @@ def apply(
Parameters
----------
function
Lambda/ function to apply.
Lambda/ function to map.
return_dtype
Dtype of the output Series.
If not set, the dtype will be
``polars.Unknown``.
skip_nulls
Don't apply the function over values
Don't map the function over values
that contain nulls. This is faster.
pass_name
Pass the Series name to the custom function
Expand All @@ -3744,7 +3745,7 @@ def apply(
Notes
-----
* Using ``apply`` is strongly discouraged as you will be effectively running
* Using ``map`` is strongly discouraged as you will be effectively running
python "for" loops. This will be very slow. Wherever possible you should
strongly prefer the native expression API to achieve the best performance.
Expand All @@ -3769,7 +3770,7 @@ def apply(
In a selection context, the function is applied by row.
>>> df.with_columns( # doctest: +SKIP
... pl.col("a").apply(lambda x: x * 2).alias("a_times_2"),
... pl.col("a").map_elements(lambda x: x * 2).alias("a_times_2"),
... )
shape: (4, 3)
┌─────┬─────┬───────────┐
Expand All @@ -3792,7 +3793,7 @@ def apply(
In a GroupBy context the function is applied by group:
>>> df.lazy().group_by("b", maintain_order=True).agg(
... pl.col("a").apply(lambda x: x.sum())
... pl.col("a").map_elements(lambda x: x.sum())
... ).collect()
shape: (3, 2)
┌─────┬─────┐
Expand All @@ -3813,11 +3814,11 @@ def apply(
"""
# input x: Series of type list containing the group values
from polars.utils.udfs import warn_on_inefficient_apply
from polars.utils.udfs import warn_on_inefficient_map

root_names = self.meta.root_names()
if len(root_names) > 0:
warn_on_inefficient_apply(function, columns=root_names, apply_target="expr")
warn_on_inefficient_map(function, columns=root_names, map_target="expr")

if pass_name:

Expand All @@ -3826,17 +3827,17 @@ def inner(s: Series) -> Series: # pragma: no cover
return function(s.alias(x.name))

with warnings.catch_warnings():
warnings.simplefilter("ignore", PolarsInefficientApplyWarning)
return x.apply(
warnings.simplefilter("ignore", PolarsInefficientMapWarning)
return x.map_elements(
inner, return_dtype=return_dtype, skip_nulls=skip_nulls
)

else:

def wrap_f(x: Series) -> Series: # pragma: no cover
with warnings.catch_warnings():
warnings.simplefilter("ignore", PolarsInefficientApplyWarning)
return x.apply(
warnings.simplefilter("ignore", PolarsInefficientMapWarning)
return x.map_elements(
function, return_dtype=return_dtype, skip_nulls=skip_nulls
)

Expand Down Expand Up @@ -4748,10 +4749,13 @@ def xor(self, other: Any) -> Self:
... schema={"x": pl.UInt8, "y": pl.UInt8},
... )
>>> df.with_columns(
... pl.col("x").apply(binary_string).alias("bin_x"),
... pl.col("y").apply(binary_string).alias("bin_y"),
... pl.col("x").map_elements(binary_string).alias("bin_x"),
... pl.col("y").map_elements(binary_string).alias("bin_y"),
... pl.col("x").xor(pl.col("y")).alias("xor_xy"),
... pl.col("x").xor(pl.col("y")).apply(binary_string).alias("bin_xor_xy"),
... pl.col("x")
... .xor(pl.col("y"))
... .map_elements(binary_string)
... .alias("bin_xor_xy"),
... )
shape: (4, 6)
┌─────┬─────┬──────────┬──────────┬────────┬────────────┐
Expand Down Expand Up @@ -9023,6 +9027,56 @@ def inner(s: Series) -> Series:
func = inner_with_default if default is not None else inner
return self.map(func)

@deprecate_renamed_function("map_elements", version="0.19.0")
def apply(
self,
function: Callable[[Series], Series] | Callable[[Any], Any],
return_dtype: PolarsDataType | None = None,
*,
skip_nulls: bool = True,
pass_name: bool = False,
strategy: MapElementsStrategy = "thread_local",
) -> Self:
"""
Apply a custom/user-defined function (UDF) in a GroupBy or Projection context.
.. deprecated:: 0.19.0
This method has been renamed to :func:`Expr.map_elements`.
Parameters
----------
function
Lambda/ function to apply.
return_dtype
Dtype of the output Series.
If not set, the dtype will be
``polars.Unknown``.
skip_nulls
Don't apply the function over values
that contain nulls. This is faster.
pass_name
Pass the Series name to the custom function
This is more expensive.
strategy : {'thread_local', 'threading'}
This functionality is in `alpha` stage. This may be removed
/changed without it being considered a breaking change.
- 'thread_local': run the python function on a single thread.
- 'threading': run the python function on separate threads. Use with
care as this can slow performance. This might only speed up
your code if the amount of work per element is significant
and the python function releases the GIL (e.g. via calling
a c function)
"""
return self.map_elements(
function,
return_dtype=return_dtype,
skip_nulls=skip_nulls,
pass_name=pass_name,
strategy=strategy,
)

@property
def bin(self) -> ExprBinaryNameSpace:
"""
Expand Down
40 changes: 35 additions & 5 deletions py-polars/polars/series/series.py
Expand Up @@ -92,6 +92,7 @@
)
from polars.utils.deprecation import (
deprecate_nonkeyword_arguments,
deprecate_renamed_function,
deprecate_renamed_parameter,
issue_deprecation_warning,
)
Expand Down Expand Up @@ -4824,15 +4825,15 @@ def tanh(self) -> Series:
"""

def apply(
def map_elements(
self,
function: Callable[[Any], Any],
return_dtype: PolarsDataType | None = None,
*,
skip_nulls: bool = True,
) -> Self:
"""
Apply a custom/user-defined function (UDF) over elements in this Series.
Map a custom/user-defined function (UDF) over elements in this Series.
.. warning::
This method is much slower than the native expressions API.
Expand Down Expand Up @@ -4879,7 +4880,7 @@ def apply(
Examples
--------
>>> s = pl.Series("a", [1, 2, 3])
>>> s.apply(lambda x: x + 10) # doctest: +SKIP
>>> s.map_elements(lambda x: x + 10) # doctest: +SKIP
shape: (3,)
Series: 'a' [i64]
[
Expand All @@ -4893,14 +4894,14 @@ def apply(
Series
"""
from polars.utils.udfs import warn_on_inefficient_apply
from polars.utils.udfs import warn_on_inefficient_map

if return_dtype is None:
pl_return_dtype = None
else:
pl_return_dtype = py_type_to_dtype(return_dtype)

warn_on_inefficient_apply(function, columns=[self.name], apply_target="series")
warn_on_inefficient_map(function, columns=[self.name], map_target="series")
return self._from_pyseries(
self._s.apply_lambda(function, pl_return_dtype, skip_nulls)
)
Expand Down Expand Up @@ -6557,6 +6558,35 @@ def get_chunks(self) -> list[Series]:
def implode(self) -> Self:
"""Aggregate values into a list."""

@deprecate_renamed_function("map_elements", version="0.19.0")
def apply(
self,
function: Callable[[Any], Any],
return_dtype: PolarsDataType | None = None,
*,
skip_nulls: bool = True,
) -> Self:
"""
Apply a custom/user-defined function (UDF) over elements in this Series.
.. deprecated:: 0.19.0
This method has been renamed to :func:`Series.map_elements`.
Parameters
----------
function
Custom function or lambda.
return_dtype
Output datatype. If none is given, the same datatype as this Series will be
used.
skip_nulls
Nulls will be skipped and not passed to the python function.
This is faster because python can be skipped and because we call
more specialized functions.
"""
return self.map_elements(function, return_dtype, skip_nulls=skip_nulls)

# Keep the `list` and `str` properties below at the end of the definition of Series,
# as to not confuse mypy with the type annotation `str` and `list`

Expand Down
2 changes: 1 addition & 1 deletion py-polars/polars/type_aliases.py
Expand Up @@ -122,7 +122,7 @@
TimeUnit: TypeAlias = Literal["ns", "us", "ms"]
UniqueKeepStrategy: TypeAlias = Literal["first", "last", "any", "none"]
UnstackDirection: TypeAlias = Literal["vertical", "horizontal"]
ApplyStrategy: TypeAlias = Literal["thread_local", "threading"]
MapElementsStrategy: TypeAlias = Literal["thread_local", "threading"]

# The following have a Rust enum equivalent with a different name
AsofJoinStrategy: TypeAlias = Literal["backward", "forward", "nearest"] # AsofStrategy
Expand Down

0 comments on commit f3142cc

Please sign in to comment.