Skip to content

Commit

Permalink
depr(python): Deprecate default delimiter value for str.concat (pol…
Browse files Browse the repository at this point in the history
  • Loading branch information
stinodego authored and r-brink committed Jan 22, 2024
1 parent 303674c commit ecda888
Show file tree
Hide file tree
Showing 4 changed files with 98 additions and 86 deletions.
20 changes: 14 additions & 6 deletions py-polars/polars/expr/string.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from polars.utils.deprecation import (
deprecate_renamed_function,
deprecate_renamed_parameter,
issue_deprecation_warning,
rename_use_earliest_to_ambiguous,
)
from polars.utils.various import find_stacklevel
Expand Down Expand Up @@ -448,19 +449,20 @@ def len_chars(self) -> Expr:
"""
return wrap_expr(self._pyexpr.str_len_chars())

def concat(self, delimiter: str = "-", *, ignore_nulls: bool = True) -> Expr:
def concat(
self, delimiter: str | None = None, *, ignore_nulls: bool = True
) -> Expr:
"""
Vertically concat the values in the Series to a single string value.
Vertically concatenate the string values in the column to a single string value.
Parameters
----------
delimiter
The delimiter to insert between consecutive string values.
ignore_nulls
Ignore null values (default).
If set to ``False``, null values will be propagated.
if the column contains any null values, the output is ``None``.
If set to `False`, null values will be propagated. This means that
if the column contains any null values, the output is null.
Returns
-------
Expand All @@ -479,7 +481,6 @@ def concat(self, delimiter: str = "-", *, ignore_nulls: bool = True) -> Expr:
╞═════╡
│ 1-2 │
└─────┘
>>> df = pl.DataFrame({"foo": [1, None, 2]})
>>> df.select(pl.col("foo").str.concat("-", ignore_nulls=False))
shape: (1, 1)
┌──────┐
Expand All @@ -490,6 +491,13 @@ def concat(self, delimiter: str = "-", *, ignore_nulls: bool = True) -> Expr:
│ null │
└──────┘
"""
if delimiter is None:
issue_deprecation_warning(
"The default `delimiter` for `str.concat` will change from '-' to an empty string."
" Pass a delimiter to silence this warning.",
version="0.20.5",
)
delimiter = "-"
return wrap_expr(self._pyexpr.str_concat(delimiter, ignore_nulls))

def to_uppercase(self) -> Expr:
Expand Down
11 changes: 6 additions & 5 deletions py-polars/polars/series/string.py
Original file line number Diff line number Diff line change
Expand Up @@ -385,19 +385,20 @@ def len_chars(self) -> Series:
]
"""

def concat(self, delimiter: str = "-", *, ignore_nulls: bool = True) -> Series:
def concat(
self, delimiter: str | None = None, *, ignore_nulls: bool = True
) -> Series:
"""
Vertically concat the values in the Series to a single string value.
Vertically concatenate the string values in the column to a single string value.
Parameters
----------
delimiter
The delimiter to insert between consecutive string values.
ignore_nulls
Ignore null values (default).
If set to ``False``, null values will be propagated.
if the column contains any null values, the output is ``None``.
If set to `False`, null values will be propagated. This means that
if the column contains any null values, the output is null.
Returns
-------
Expand Down
78 changes: 78 additions & 0 deletions py-polars/tests/unit/namespaces/string/test_concat.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
from datetime import datetime

import pytest

import polars as pl
from polars.testing import assert_series_equal


def test_str_concat() -> None:
s = pl.Series(["1", None, "2", None])
# propagate null
assert_series_equal(
s.str.concat("-", ignore_nulls=False), pl.Series([None], dtype=pl.String)
)
# ignore null
assert_series_equal(s.str.concat("-"), pl.Series(["1-2"]))

# str None/null is ok
s = pl.Series(["1", "None", "2", "null"])
assert_series_equal(
s.str.concat("-", ignore_nulls=False), pl.Series(["1-None-2-null"])
)
assert_series_equal(s.str.concat("-"), pl.Series(["1-None-2-null"]))


def test_str_concat2() -> None:
df = pl.DataFrame({"foo": [1, None, 2, None]})

out = df.select(pl.col("foo").str.concat("-", ignore_nulls=False))
assert out.item() is None

out = df.select(pl.col("foo").str.concat("-"))
assert out.item() == "1-2"


def test_str_concat_all_null() -> None:
s = pl.Series([None, None, None], dtype=pl.String)
assert_series_equal(
s.str.concat("-", ignore_nulls=False), pl.Series([None], dtype=pl.String)
)
assert_series_equal(s.str.concat("-", ignore_nulls=True), pl.Series([""]))


def test_str_concat_empty_list() -> None:
s = pl.Series([], dtype=pl.String)
assert_series_equal(s.str.concat("-", ignore_nulls=False), pl.Series([""]))
assert_series_equal(s.str.concat("-", ignore_nulls=True), pl.Series([""]))


def test_str_concat_empty_list2() -> None:
s = pl.Series([], dtype=pl.String)
df = pl.DataFrame({"foo": s})
result = df.select(pl.col("foo").str.concat("-")).item()
expected = ""
assert result == expected


def test_str_concat_empty_list_agg_context() -> None:
df = pl.DataFrame(data={"i": [1], "v": [None]}, schema_overrides={"v": pl.String})
result = df.group_by("i").agg(pl.col("v").drop_nulls().str.concat("-"))["v"].item()
expected = ""
assert result == expected


def test_str_concat_datetime() -> None:
df = pl.DataFrame({"d": [datetime(2020, 1, 1), None, datetime(2022, 1, 1)]})
out = df.select(pl.col("d").str.concat("|", ignore_nulls=True))
assert out.item() == "2020-01-01 00:00:00.000000|2022-01-01 00:00:00.000000"
out = df.select(pl.col("d").str.concat("|", ignore_nulls=False))
assert out.item() is None


def test_str_concat_delimiter_deprecated() -> None:
s = pl.Series(["1", None, "2", None])
with pytest.deprecated_call():
result = s.str.concat()
expected = pl.Series(["1-2"])
assert_series_equal(result, expected)
75 changes: 0 additions & 75 deletions py-polars/tests/unit/namespaces/string/test_string.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,5 @@
from __future__ import annotations

from datetime import datetime
from typing import cast

import pytest

import polars as pl
Expand Down Expand Up @@ -49,78 +46,6 @@ def test_str_slice_expr() -> None:
df.select(pl.col("a").str.slice(0, -1))


def test_str_concat() -> None:
s = pl.Series(["1", None, "2", None])
# propagate null
assert_series_equal(
s.str.concat(ignore_nulls=False), pl.Series([None], dtype=pl.String)
)
# ignore null
assert_series_equal(s.str.concat(), pl.Series(["1-2"]))

# str None/null is ok
s = pl.Series(["1", "None", "2", "null"])
assert_series_equal(s.str.concat(ignore_nulls=False), pl.Series(["1-None-2-null"]))
assert_series_equal(s.str.concat(), pl.Series(["1-None-2-null"]))


def test_str_concat2() -> None:
df = pl.DataFrame({"foo": [1, None, 2, None]})

out = df.select(pl.col("foo").str.concat("-", ignore_nulls=False))
assert cast(str, out.item()) is None

out = df.select(pl.col("foo").str.concat("-"))
assert cast(str, out.item()) == "1-2"


def test_str_concat_all_null() -> None:
s = pl.Series([None, None, None], dtype=pl.String)
assert_series_equal(
s.str.concat(ignore_nulls=False), pl.Series([None], dtype=pl.String)
)
assert_series_equal(s.str.concat(ignore_nulls=True), pl.Series([""]))


def test_str_concat_single_null() -> None:
s = pl.Series([None], dtype=pl.String)
assert_series_equal(
s.str.concat(ignore_nulls=False), pl.Series([None], dtype=pl.String)
)
assert_series_equal(s.str.concat(ignore_nulls=True), pl.Series([""]))


def test_str_concat_empty_list() -> None:
s = pl.Series([], dtype=pl.String)
assert_series_equal(s.str.concat(ignore_nulls=False), pl.Series([""]))
assert_series_equal(s.str.concat(ignore_nulls=True), pl.Series([""]))


def test_str_concat_empty_list2() -> None:
s = pl.Series([], dtype=pl.String)
df = pl.DataFrame({"foo": s})
result = df.select(pl.col("foo").str.concat()).item()
expected = ""
assert result == expected


def test_str_concat_empty_list_agg_context() -> None:
df = pl.DataFrame(data={"i": [1], "v": [None]}, schema_overrides={"v": pl.String})
result = df.group_by("i").agg(pl.col("v").drop_nulls().str.concat())["v"].item()
expected = ""
assert result == expected


def test_str_concat_datetime() -> None:
df = pl.DataFrame({"d": [datetime(2020, 1, 1), None, datetime(2022, 1, 1)]})
out = df.select(pl.col("d").str.concat("|", ignore_nulls=True))
assert (
cast(str, out.item()) == "2020-01-01 00:00:00.000000|2022-01-01 00:00:00.000000"
)
out = df.select(pl.col("d").str.concat("|", ignore_nulls=False))
assert cast(str, out.item()) is None


def test_str_len_bytes() -> None:
s = pl.Series(["Café", None, "345", "東京"])
result = s.str.len_bytes()
Expand Down

0 comments on commit ecda888

Please sign in to comment.