Skip to content

Commit

Permalink
feat(python): Improve Expr.is_between API (#5981)
Browse files Browse the repository at this point in the history
  • Loading branch information
stinodego committed Jan 2, 2023
1 parent d5793fd commit 1b04b36
Show file tree
Hide file tree
Showing 3 changed files with 70 additions and 30 deletions.
79 changes: 63 additions & 16 deletions py-polars/polars/internals/expr/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -3327,7 +3327,8 @@ def is_between(
self,
start: Expr | datetime | date | int | float,
end: Expr | datetime | date | int | float,
include_bounds: bool | tuple[bool, bool] = False,
include_bounds: bool | tuple[bool, bool] | None = None,
closed: ClosedWindow | None = None,
) -> Expr:
"""
Check if this expression is between start and end.
Expand All @@ -3339,12 +3340,16 @@ def is_between(
end
Upper bound as primitive type or datetime.
include_bounds
False: Exclude both start and end (default).
True: Include both start and end.
(False, False): Exclude start and exclude end.
(True, True): Include start and include end.
(False, True): Exclude start and include end.
(True, False): Include start and exclude end.
This argument is deprecated. Use ``closed`` instead!
- False: Exclude both start and end (default).
- True: Include both start and end.
- (False, False): Exclude start and exclude end.
- (True, True): Include start and include end.
- (False, True): Exclude start and include end.
- (True, False): Include start and exclude end.
closed : {'none', 'left', 'right', 'both'}
Define whether the interval is closed or not. Defaults to 'none'.
Returns
-------
Expand All @@ -3367,25 +3372,67 @@ def is_between(
│ 5 ┆ false │
└─────┴────────────┘
Use the ``closed`` argument to include or exclude the values at the bounds.
>>> df.with_column(pl.col("num").is_between(2, 4, closed="left"))
shape: (5, 2)
┌─────┬────────────┐
│ num ┆ is_between │
│ --- ┆ --- │
│ i64 ┆ bool │
╞═════╪════════════╡
│ 1 ┆ false │
│ 2 ┆ true │
│ 3 ┆ true │
│ 4 ┆ false │
│ 5 ┆ false │
└─────┴────────────┘
"""
if isinstance(include_bounds, list):
if include_bounds is not None:
warnings.warn(
"include_bounds: list[bool] will not be supported in a future "
"version; pass include_bounds: tuple[bool, bool] instead",
"The `include_bounds` argument will be replaced in a future version."
" Use the `closed` argument to silence this warning.",
category=DeprecationWarning,
)
include_bounds = tuple(include_bounds)
if isinstance(include_bounds, list):
include_bounds = tuple(include_bounds)

if include_bounds is False or include_bounds == (False, False):
closed = "none"
elif include_bounds is True or include_bounds == (True, True):
closed = "both"
elif include_bounds == (False, True):
closed = "right"
elif include_bounds == (True, False):
closed = "left"
else:
raise ValueError(
"include_bounds should be a bool or tuple[bool, bool]."
)

if closed is None:
warnings.warn(
"Default behaviour will change from excluding both bounds to including"
" both bounds. Provide a value for the `closed` argument to silence"
" this warning.",
category=FutureWarning,
)
closed = "none"

if include_bounds is False or include_bounds == (False, False):
if closed == "none":
return ((self > start) & (self < end)).alias("is_between")
elif include_bounds is True or include_bounds == (True, True):
elif closed == "both":
return ((self >= start) & (self <= end)).alias("is_between")
elif include_bounds == (False, True):
elif closed == "right":
return ((self > start) & (self <= end)).alias("is_between")
elif include_bounds == (True, False):
elif closed == "left":
return ((self >= start) & (self < end)).alias("is_between")
else:
raise ValueError("include_bounds should be a bool or tuple[bool, bool].")
raise ValueError(
"closed must be one of {'left', 'right', 'both', 'none'},"
f" got {closed}"
)

def hash(
self,
Expand Down
20 changes: 6 additions & 14 deletions py-polars/tests/unit/test_lazy.py
Original file line number Diff line number Diff line change
Expand Up @@ -1168,51 +1168,43 @@ def test_quantile(fruits_cars: pl.DataFrame) -> None:
assert fruits_cars.select(pl.col("A").quantile(0.24, "linear"))["A"][0] == 1.96


@pytest.mark.filterwarnings("ignore::FutureWarning")
def test_is_between(fruits_cars: pl.DataFrame) -> None:
result = fruits_cars.select(pl.col("A").is_between(2, 4))["is_between"]
assert result.series_equal(
pl.Series("is_between", [False, False, True, False, False])
)

result = fruits_cars.select(pl.col("A").is_between(2, 4, False))["is_between"]
assert result.series_equal(
pl.Series("is_between", [False, False, True, False, False])
)

result = fruits_cars.select(pl.col("A").is_between(2, 4, (False, False)))[
result = fruits_cars.select(pl.col("A").is_between(2, 4, closed="none"))[
"is_between"
]
assert result.series_equal(
pl.Series("is_between", [False, False, True, False, False])
)

result = fruits_cars.select(pl.col("A").is_between(2, 4, True))["is_between"]
assert result.series_equal(
pl.Series("is_between", [False, True, True, True, False])
)

result = fruits_cars.select(pl.col("A").is_between(2, 4, (True, True)))[
result = fruits_cars.select(pl.col("A").is_between(2, 4, closed="both"))[
"is_between"
]
assert result.series_equal(
pl.Series("is_between", [False, True, True, True, False])
)

result = fruits_cars.select(pl.col("A").is_between(2, 4, (False, True)))[
result = fruits_cars.select(pl.col("A").is_between(2, 4, closed="right"))[
"is_between"
]
assert result.series_equal(
pl.Series("is_between", [False, False, True, True, False])
)

result = fruits_cars.select(pl.col("A").is_between(2, 4, (True, False)))[
result = fruits_cars.select(pl.col("A").is_between(2, 4, closed="left"))[
"is_between"
]
assert result.series_equal(
pl.Series("is_between", [False, True, True, False, False])
)


@pytest.mark.filterwarnings("ignore::FutureWarning")
def test_is_between_data_types() -> None:
df = pl.DataFrame(
{
Expand Down
1 change: 1 addition & 0 deletions py-polars/tests/unit/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -2050,6 +2050,7 @@ def test_to_physical() -> None:
verify_series_and_expr_api(a, expected, "to_physical")


@pytest.mark.filterwarnings("ignore::FutureWarning")
def test_is_between_datetime() -> None:
s = pl.Series("a", [datetime(2020, 1, 1, 10, 0, 0), datetime(2020, 1, 1, 20, 0, 0)])
start = datetime(2020, 1, 1, 12, 0, 0)
Expand Down

0 comments on commit 1b04b36

Please sign in to comment.