From 212622bf776351112743cb07841e39682fc17857 Mon Sep 17 00:00:00 2001 From: Kapil <90269125+kapiliyer@users.noreply.github.com> Date: Tue, 2 Aug 2022 01:08:52 -0400 Subject: [PATCH 1/9] ENH: Support For Interval __contains__ Other Interval (#46613) --- doc/source/whatsnew/v1.5.0.rst | 1 + pandas/_libs/interval.pyi | 11 +++- pandas/_libs/interval.pyx | 14 +++-- pandas/tests/scalar/interval/test_interval.py | 4 -- pandas/tests/scalar/interval/test_ops.py | 55 +++++++++++++++++++ 5 files changed, 74 insertions(+), 11 deletions(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index bdf811f6a8f6a..8eb1b54d93118 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -282,6 +282,7 @@ Other enhancements - :class:`Series` reducers (e.g. ``min``, ``max``, ``sum``, ``mean``) will now successfully operate when the dtype is numeric and ``numeric_only=True`` is provided; previously this would raise a ``NotImplementedError`` (:issue:`47500`) - :meth:`RangeIndex.union` now can return a :class:`RangeIndex` instead of a :class:`Int64Index` if the resulting values are equally spaced (:issue:`47557`, :issue:`43885`) - :meth:`DataFrame.compare` now accepts an argument ``result_names`` to allow the user to specify the result's names of both left and right DataFrame which are being compared. This is by default ``'self'`` and ``'other'`` (:issue:`44354`) +- :class:`Interval` now supports checking whether one interval is inside of another interval (:issue:`46613`) .. --------------------------------------------------------------------------- .. _whatsnew_150.notable_bug_fixes: diff --git a/pandas/_libs/interval.pyi b/pandas/_libs/interval.pyi index b27d2b5f8fd4d..9b73e9d0bf54a 100644 --- a/pandas/_libs/interval.pyi +++ b/pandas/_libs/interval.pyi @@ -79,10 +79,17 @@ class Interval(IntervalMixin, Generic[_OrderableT]): def __hash__(self) -> int: ... @overload def __contains__( - self: Interval[_OrderableTimesT], key: _OrderableTimesT + self: Interval[Timedelta], key: Timedelta | Interval[Timedelta] ) -> bool: ... @overload - def __contains__(self: Interval[_OrderableScalarT], key: float) -> bool: ... + def __contains__( + self: Interval[Timestamp], key: Timestamp | Interval[Timestamp] + ) -> bool: ... + @overload + def __contains__( + self: Interval[_OrderableScalarT], + key: _OrderableScalarT | Interval[_OrderableScalarT], + ) -> bool: ... @overload def __add__( self: Interval[_OrderableTimesT], y: Timedelta diff --git a/pandas/_libs/interval.pyx b/pandas/_libs/interval.pyx index ec1dbff6903e7..f6e4304b77fee 100644 --- a/pandas/_libs/interval.pyx +++ b/pandas/_libs/interval.pyx @@ -297,10 +297,12 @@ cdef class Interval(IntervalMixin): >>> iv Interval(0, 5, inclusive='right') - You can check if an element belongs to it + You can check if an element belongs to it, or if it contains another interval: >>> 2.5 in iv True + >>> pd.Interval(left=2, right=5, inclusive='both') in iv + True You can test the bounds (``inclusive='right'``, so ``0 < x <= 5``): @@ -409,10 +411,12 @@ cdef class Interval(IntervalMixin): return hash((self.left, self.right, self.inclusive)) def __contains__(self, key) -> bool: - if _interval_like(key): - raise TypeError("__contains__ not defined for two intervals") - return ((self.left < key if self.open_left else self.left <= key) and - (key < self.right if self.open_right else key <= self.right)) + if isinstance(key, Interval): + return ((self.left < key.left if self.open_left and key.closed_left else self.left <= key.left) and + (key.right < self.right if self.open_right and key.closed_right else key.right <= self.right)) + elif isinstance(key, _Timestamp) or is_timedelta64_object(key) or is_float_object(key) or is_integer_object(key): + return ((self.left < key if self.open_left else self.left <= key) and + (key < self.right if self.open_right else key <= self.right)) def __richcmp__(self, other, op: int): if isinstance(other, Interval): diff --git a/pandas/tests/scalar/interval/test_interval.py b/pandas/tests/scalar/interval/test_interval.py index 878b5e6ec0167..c5644b2f36ead 100644 --- a/pandas/tests/scalar/interval/test_interval.py +++ b/pandas/tests/scalar/interval/test_interval.py @@ -36,10 +36,6 @@ def test_contains(self, interval): assert 1 in interval assert 0 not in interval - msg = "__contains__ not defined for two intervals" - with pytest.raises(TypeError, match=msg): - interval in interval - interval_both = Interval(0, 1, "both") assert 0 in interval_both assert 1 in interval_both diff --git a/pandas/tests/scalar/interval/test_ops.py b/pandas/tests/scalar/interval/test_ops.py index 9fe40c208d880..27cc44a009af9 100644 --- a/pandas/tests/scalar/interval/test_ops.py +++ b/pandas/tests/scalar/interval/test_ops.py @@ -66,3 +66,58 @@ def test_overlaps_invalid_type(self, other): msg = f"`other` must be an Interval, got {type(other).__name__}" with pytest.raises(TypeError, match=msg): interval.overlaps(other) + + +class TestContains: + @pytest.mark.parametrize( + "closed", + ["neither", "left", "right"], + ) + def test_contains_interval(self, closed): + interval1 = Interval(0, 1, "both") + interval2 = Interval(0, 1, closed) + assert interval1 in interval1 + assert interval2 in interval2 + assert interval2 in interval1 + assert interval1 not in interval2 + + def test_contains_infinite_length(self): + interval1 = Interval(0, 1, "both") + interval2 = Interval(float("-inf"), float("inf"), "neither") + assert interval1 in interval2 + assert interval2 not in interval1 + + def test_contains_zero_length(self): + interval1 = Interval(0, 1, "both") + interval2 = Interval(-1, -1, "both") + interval3 = Interval(0.5, 0.5, "both") + assert interval2 not in interval1 + assert interval3 in interval1 + assert interval2 not in interval3 and interval3 not in interval2 + assert interval1 not in interval2 and interval1 not in interval3 + + @pytest.mark.parametrize( + "type1", + [ + (0, 1), + (Timestamp(2000, 1, 1, 0), Timestamp(2000, 1, 1, 1)), + (Timedelta("0h"), Timedelta("1h")), + ], + ) + @pytest.mark.parametrize( + "type2", + [ + (0, 1), + (Timestamp(2000, 1, 1, 0), Timestamp(2000, 1, 1, 1)), + (Timedelta("0h"), Timedelta("1h")), + ], + ) + def test_contains_mixed_types(self, type1, type2): + interval1 = Interval(*type1) + interval2 = Interval(*type2) + if type1 == type2: + assert interval1 in interval2 + else: + msg = "^'<=' not supported between instances of" + with pytest.raises(TypeError, match=msg): + interval1 in interval2 From 87ee5baa05d37e72b0ab4167397d3de44dc1cf45 Mon Sep 17 00:00:00 2001 From: Kapil <90269125+kapiliyer@users.noreply.github.com> Date: Tue, 2 Aug 2022 17:20:33 -0400 Subject: [PATCH 2/9] ENH: Support For Interval __contains__ Other Interval (#46613) --- pandas/_libs/interval.pyx | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pandas/_libs/interval.pyx b/pandas/_libs/interval.pyx index f6e4304b77fee..6c79f494ea549 100644 --- a/pandas/_libs/interval.pyx +++ b/pandas/_libs/interval.pyx @@ -414,9 +414,8 @@ cdef class Interval(IntervalMixin): if isinstance(key, Interval): return ((self.left < key.left if self.open_left and key.closed_left else self.left <= key.left) and (key.right < self.right if self.open_right and key.closed_right else key.right <= self.right)) - elif isinstance(key, _Timestamp) or is_timedelta64_object(key) or is_float_object(key) or is_integer_object(key): - return ((self.left < key if self.open_left else self.left <= key) and - (key < self.right if self.open_right else key <= self.right)) + return ((self.left < key if self.open_left else self.left <= key) and + (key < self.right if self.open_right else key <= self.right)) def __richcmp__(self, other, op: int): if isinstance(other, Interval): From 6dc7da3fbe8f11623abb575da2f5c399babeab99 Mon Sep 17 00:00:00 2001 From: "Kapil E. Iyer" <90269125+kapiliyer@users.noreply.github.com> Date: Wed, 3 Aug 2022 11:19:47 -0400 Subject: [PATCH 3/9] Update doc/source/whatsnew/v1.5.0.rst Co-authored-by: Valentin Iovene --- doc/source/whatsnew/v1.5.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 8eb1b54d93118..7c5ef248392e5 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -282,7 +282,7 @@ Other enhancements - :class:`Series` reducers (e.g. ``min``, ``max``, ``sum``, ``mean``) will now successfully operate when the dtype is numeric and ``numeric_only=True`` is provided; previously this would raise a ``NotImplementedError`` (:issue:`47500`) - :meth:`RangeIndex.union` now can return a :class:`RangeIndex` instead of a :class:`Int64Index` if the resulting values are equally spaced (:issue:`47557`, :issue:`43885`) - :meth:`DataFrame.compare` now accepts an argument ``result_names`` to allow the user to specify the result's names of both left and right DataFrame which are being compared. This is by default ``'self'`` and ``'other'`` (:issue:`44354`) -- :class:`Interval` now supports checking whether one interval is inside of another interval (:issue:`46613`) +- :class:`Interval` now supports checking whether one interval is contained by another interval (:issue:`46613`) .. --------------------------------------------------------------------------- .. _whatsnew_150.notable_bug_fixes: From 29336eb42f5fbf1fa4fe6633fe8ed14662b3e638 Mon Sep 17 00:00:00 2001 From: Kapil <90269125+kapiliyer@users.noreply.github.com> Date: Wed, 3 Aug 2022 17:20:52 -0400 Subject: [PATCH 4/9] ENH: Support For Interval __contains__ Other Interval (#46613) --- pandas/_libs/interval.pyx | 8 +++++--- pandas/core/indexes/range.py | 2 +- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/pandas/_libs/interval.pyx b/pandas/_libs/interval.pyx index 6c79f494ea549..31a733434375e 100644 --- a/pandas/_libs/interval.pyx +++ b/pandas/_libs/interval.pyx @@ -411,9 +411,11 @@ cdef class Interval(IntervalMixin): return hash((self.left, self.right, self.inclusive)) def __contains__(self, key) -> bool: - if isinstance(key, Interval): - return ((self.left < key.left if self.open_left and key.closed_left else self.left <= key.left) and - (key.right < self.right if self.open_right and key.closed_right else key.right <= self.right)) + if _interval_like(key): + key_closed_left = key.inclusive in ('left', 'both') + key_closed_right = key.inclusive in ('right', 'both') + return ((self.left < key.left if self.open_left and key_closed_left else self.left <= key.left) and + (key.right < self.right if self.open_right and key_closed_right else key.right <= self.right)) return ((self.left < key if self.open_left else self.left <= key) and (key < self.right if self.open_right else key <= self.right)) diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 376c98b6e176f..07e852ca761a5 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -135,7 +135,7 @@ def __new__( # validate the arguments if com.all_none(start, stop, step): - raise TypeError("RangeIndex(...) must be called with integers") + raise TypeError("RangeIndex(...) must be called with integers/floats") start = ensure_python_int(start) if start is not None else 0 From d94cf2b3a7dd27026acdfbf4b8003b34761a7fed Mon Sep 17 00:00:00 2001 From: Kapil <90269125+kapiliyer@users.noreply.github.com> Date: Wed, 3 Aug 2022 17:24:54 -0400 Subject: [PATCH 5/9] ENH: Support For Interval __contains__ Other Interval (#46613) --- pandas/_libs/interval.pyx | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/interval.pyx b/pandas/_libs/interval.pyx index 31a733434375e..1dcf03735a60b 100644 --- a/pandas/_libs/interval.pyx +++ b/pandas/_libs/interval.pyx @@ -414,8 +414,15 @@ cdef class Interval(IntervalMixin): if _interval_like(key): key_closed_left = key.inclusive in ('left', 'both') key_closed_right = key.inclusive in ('right', 'both') - return ((self.left < key.left if self.open_left and key_closed_left else self.left <= key.left) and - (key.right < self.right if self.open_right and key_closed_right else key.right <= self.right)) + if self.open_left and key_closed_left: + left_contained = self.left < key.left + else: + left_contained = self.left <= key.left + if self.open_right and key_closed_right: + right_contained = key.right < self.right + else: + right_contained = key.right <= self.right + return left_contained and right_contained return ((self.left < key if self.open_left else self.left <= key) and (key < self.right if self.open_right else key <= self.right)) From 2297d408f3f76fabe2b1737c55060415af8cde8e Mon Sep 17 00:00:00 2001 From: "Kapil E. Iyer" <90269125+kapiliyer@users.noreply.github.com> Date: Wed, 3 Aug 2022 17:26:38 -0400 Subject: [PATCH 6/9] Fix: Unintentionally Modified Range --- pandas/core/indexes/range.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 07e852ca761a5..376c98b6e176f 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -135,7 +135,7 @@ def __new__( # validate the arguments if com.all_none(start, stop, step): - raise TypeError("RangeIndex(...) must be called with integers/floats") + raise TypeError("RangeIndex(...) must be called with integers") start = ensure_python_int(start) if start is not None else 0 From d640efdcba2767a292ce35692e1c90ccfa90d391 Mon Sep 17 00:00:00 2001 From: Kapil <90269125+kapiliyer@users.noreply.github.com> Date: Wed, 3 Aug 2022 17:20:52 -0400 Subject: [PATCH 7/9] ENH: Support For Interval __contains__ Other Interval (#46613) --- pandas/core/indexes/range.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 376c98b6e176f..07e852ca761a5 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -135,7 +135,7 @@ def __new__( # validate the arguments if com.all_none(start, stop, step): - raise TypeError("RangeIndex(...) must be called with integers") + raise TypeError("RangeIndex(...) must be called with integers/floats") start = ensure_python_int(start) if start is not None else 0 From a9ad87eae444f709d6c301bb00450000677983d1 Mon Sep 17 00:00:00 2001 From: "Kapil E. Iyer" <90269125+kapiliyer@users.noreply.github.com> Date: Wed, 3 Aug 2022 17:26:38 -0400 Subject: [PATCH 8/9] Fix: Unintentionally Modified Range --- pandas/core/indexes/range.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 07e852ca761a5..376c98b6e176f 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -135,7 +135,7 @@ def __new__( # validate the arguments if com.all_none(start, stop, step): - raise TypeError("RangeIndex(...) must be called with integers/floats") + raise TypeError("RangeIndex(...) must be called with integers") start = ensure_python_int(start) if start is not None else 0 From 48d9aa452fe7a68a3245b442cc0cc1630f7ccc32 Mon Sep 17 00:00:00 2001 From: Kapil <90269125+kapiliyer@users.noreply.github.com> Date: Mon, 8 Aug 2022 21:49:37 -0400 Subject: [PATCH 9/9] ENH: Support For Interval __contains__ Other Interval (#46613) --- pandas/tests/scalar/interval/test_ops.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/pandas/tests/scalar/interval/test_ops.py b/pandas/tests/scalar/interval/test_ops.py index 27cc44a009af9..92db6ac772830 100644 --- a/pandas/tests/scalar/interval/test_ops.py +++ b/pandas/tests/scalar/interval/test_ops.py @@ -69,17 +69,13 @@ def test_overlaps_invalid_type(self, other): class TestContains: - @pytest.mark.parametrize( - "closed", - ["neither", "left", "right"], - ) - def test_contains_interval(self, closed): + def test_contains_interval(self, inclusive_endpoints_fixture): interval1 = Interval(0, 1, "both") - interval2 = Interval(0, 1, closed) + interval2 = Interval(0, 1, inclusive_endpoints_fixture) assert interval1 in interval1 assert interval2 in interval2 assert interval2 in interval1 - assert interval1 not in interval2 + assert interval1 not in interval2 or inclusive_endpoints_fixture == "both" def test_contains_infinite_length(self): interval1 = Interval(0, 1, "both")