Skip to content

Commit

Permalink
ENH: consistency of input args for boundaries - pd.interval_range (#4…
Browse files Browse the repository at this point in the history
  • Loading branch information
weikhor committed Apr 4, 2022
1 parent 0141d92 commit 073b353
Show file tree
Hide file tree
Showing 14 changed files with 106 additions and 53 deletions.
8 changes: 4 additions & 4 deletions doc/source/user_guide/advanced.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1082,14 +1082,14 @@ of :ref:`frequency aliases <timeseries.offset_aliases>` with datetime-like inter
pd.interval_range(start=pd.Timedelta("0 days"), periods=3, freq="9H")
Additionally, the ``closed`` parameter can be used to specify which side(s) the intervals
are closed on. Intervals are closed on the right side by default.
Additionally, the ``inclusive`` parameter can be used to specify which side(s) the intervals
are closed on. Intervals are closed on the both side by default.

.. ipython:: python
pd.interval_range(start=0, end=4, closed="both")
pd.interval_range(start=0, end=4, inclusive="both")
pd.interval_range(start=0, end=4, closed="neither")
pd.interval_range(start=0, end=4, inclusive="neither")
Specifying ``start``, ``end``, and ``periods`` will generate a range of evenly spaced
intervals from ``start`` to ``end`` inclusively, with ``periods`` number of elements
Expand Down
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.5.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -426,6 +426,7 @@ Other Deprecations
- Deprecated behavior of method :meth:`DataFrame.quantile`, attribute ``numeric_only`` will default False. Including datetime/timedelta columns in the result (:issue:`7308`).
- Deprecated :attr:`Timedelta.freq` and :attr:`Timedelta.is_populated` (:issue:`46430`)
- Deprecated :attr:`Timedelta.delta` (:issue:`46476`)
- Deprecated the ``closed`` argument in :meth:`interval_range` in favor of ``inclusive`` argument; In a future version passing ``closed`` will raise (:issue:`40245`)
-

.. ---------------------------------------------------------------------------
Expand Down
57 changes: 45 additions & 12 deletions pandas/core/indexes/interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
Hashable,
Literal,
)
import warnings

import numpy as np

Expand Down Expand Up @@ -160,7 +161,7 @@ def _new_IntervalIndex(cls, d):
A new ``IntervalIndex`` is typically constructed using
:func:`interval_range`:
>>> pd.interval_range(start=0, end=5)
>>> pd.interval_range(start=0, end=5, inclusive="right")
IntervalIndex([(0, 1], (1, 2], (2, 3], (3, 4], (4, 5]],
dtype='interval[int64, right]')
Expand Down Expand Up @@ -443,7 +444,7 @@ def is_overlapping(self) -> bool:
Intervals that share closed endpoints overlap:
>>> index = pd.interval_range(0, 3, closed='both')
>>> index = pd.interval_range(0, 3, inclusive='both')
>>> index
IntervalIndex([[0, 1], [1, 2], [2, 3]],
dtype='interval[int64, both]')
Expand All @@ -452,7 +453,7 @@ def is_overlapping(self) -> bool:
Intervals that only have an open endpoint in common do not overlap:
>>> index = pd.interval_range(0, 3, closed='left')
>>> index = pd.interval_range(0, 3, inclusive='left')
>>> index
IntervalIndex([[0, 1), [1, 2), [2, 3)],
dtype='interval[int64, left]')
Expand Down Expand Up @@ -956,7 +957,8 @@ def interval_range(
periods=None,
freq=None,
name: Hashable = None,
closed: IntervalClosedType = "right",
closed: lib.NoDefault = lib.no_default,
inclusive: IntervalClosedType | None = None,
) -> IntervalIndex:
"""
Return a fixed frequency IntervalIndex.
Expand All @@ -979,6 +981,14 @@ def interval_range(
Whether the intervals are closed on the left-side, right-side, both
or neither.
.. deprecated:: 1.5.0
Argument `closed` has been deprecated to standardize boundary inputs.
Use `inclusive` instead, to set each bound as closed or open.
inclusive : {"both", "neither", "left", "right"}, default "both"
Include boundaries; Whether to set each bound as closed or open.
.. versionadded:: 1.5.0
Returns
-------
IntervalIndex
Expand All @@ -1001,14 +1011,14 @@ def interval_range(
--------
Numeric ``start`` and ``end`` is supported.
>>> pd.interval_range(start=0, end=5)
>>> pd.interval_range(start=0, end=5, inclusive="right")
IntervalIndex([(0, 1], (1, 2], (2, 3], (3, 4], (4, 5]],
dtype='interval[int64, right]')
Additionally, datetime-like input is also supported.
>>> pd.interval_range(start=pd.Timestamp('2017-01-01'),
... end=pd.Timestamp('2017-01-04'))
... end=pd.Timestamp('2017-01-04'), inclusive="right")
IntervalIndex([(2017-01-01, 2017-01-02], (2017-01-02, 2017-01-03],
(2017-01-03, 2017-01-04]],
dtype='interval[datetime64[ns], right]')
Expand All @@ -1017,33 +1027,56 @@ def interval_range(
endpoints of the individual intervals within the ``IntervalIndex``. For
numeric ``start`` and ``end``, the frequency must also be numeric.
>>> pd.interval_range(start=0, periods=4, freq=1.5)
>>> pd.interval_range(start=0, periods=4, freq=1.5, inclusive="right")
IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0]],
dtype='interval[float64, right]')
Similarly, for datetime-like ``start`` and ``end``, the frequency must be
convertible to a DateOffset.
>>> pd.interval_range(start=pd.Timestamp('2017-01-01'),
... periods=3, freq='MS')
... periods=3, freq='MS', inclusive="right")
IntervalIndex([(2017-01-01, 2017-02-01], (2017-02-01, 2017-03-01],
(2017-03-01, 2017-04-01]],
dtype='interval[datetime64[ns], right]')
Specify ``start``, ``end``, and ``periods``; the frequency is generated
automatically (linearly spaced).
>>> pd.interval_range(start=0, end=6, periods=4)
>>> pd.interval_range(start=0, end=6, periods=4, inclusive="right")
IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0]],
dtype='interval[float64, right]')
The ``closed`` parameter specifies which endpoints of the individual
The ``inclusive`` parameter specifies which endpoints of the individual
intervals within the ``IntervalIndex`` are closed.
>>> pd.interval_range(end=5, periods=4, closed='both')
>>> pd.interval_range(end=5, periods=4, inclusive='both')
IntervalIndex([[1, 2], [2, 3], [3, 4], [4, 5]],
dtype='interval[int64, both]')
"""
if inclusive is not None and not isinstance(closed, lib.NoDefault):
raise ValueError(
"Deprecated argument `closed` cannot be passed "
"if argument `inclusive` is not None"
)
elif not isinstance(closed, lib.NoDefault):
warnings.warn(
"Argument `closed` is deprecated in favor of `inclusive`.",
FutureWarning,
stacklevel=2,
)
if closed is None:
inclusive = "both"
elif closed in ("both", "neither", "left", "right"):
inclusive = closed
else:
raise ValueError(
"Argument `closed` has to be either"
"'both', 'neither', 'left' or 'right'"
)
elif inclusive is None:
inclusive = "both"

start = maybe_box_datetimelike(start)
end = maybe_box_datetimelike(end)
endpoint = start if start is not None else end
Expand Down Expand Up @@ -1120,4 +1153,4 @@ def interval_range(
else:
breaks = timedelta_range(start=start, end=end, periods=periods, freq=freq)

return IntervalIndex.from_breaks(breaks, name=name, closed=closed)
return IntervalIndex.from_breaks(breaks, name=name, closed=inclusive)
2 changes: 1 addition & 1 deletion pandas/tests/frame/methods/test_round.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,7 @@ def test_round_nonunique_categorical(self):

def test_round_interval_category_columns(self):
# GH#30063
columns = pd.CategoricalIndex(pd.interval_range(0, 2))
columns = pd.CategoricalIndex(pd.interval_range(0, 2, inclusive="right"))
df = DataFrame([[0.66, 1.1], [0.3, 0.25]], columns=columns)

result = df.round()
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/groupby/aggregate/test_cython.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,7 @@ def test_cython_agg_empty_buckets_nanops(observed):
result = df.groupby(pd.cut(df["a"], grps), observed=observed)._cython_agg_general(
"add", alt=None, numeric_only=True
)
intervals = pd.interval_range(0, 20, freq=5)
intervals = pd.interval_range(0, 20, freq=5, inclusive="right")
expected = DataFrame(
{"a": [0, 0, 36, 0]},
index=pd.CategoricalIndex(intervals, name="a", ordered=True),
Expand Down
10 changes: 5 additions & 5 deletions pandas/tests/indexes/interval/test_astype.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ def test_subtype_integer(self, subtype_start, subtype_end):
@pytest.mark.xfail(reason="GH#15832")
def test_subtype_integer_errors(self):
# int64 -> uint64 fails with negative values
index = interval_range(-10, 10)
index = interval_range(-10, 10, inclusive="right")
dtype = IntervalDtype("uint64", "right")

# Until we decide what the exception message _should_ be, we
Expand All @@ -133,7 +133,7 @@ class TestFloatSubtype(AstypeTests):
"""Tests specific to IntervalIndex with float subtype"""

indexes = [
interval_range(-10.0, 10.0, closed="neither"),
interval_range(-10.0, 10.0, inclusive="neither"),
IntervalIndex.from_arrays(
[-1.5, np.nan, 0.0, 0.0, 1.5], [-0.5, np.nan, 1.0, 1.0, 3.0], closed="both"
),
Expand Down Expand Up @@ -170,7 +170,7 @@ def test_subtype_integer_with_non_integer_borders(self, subtype):

def test_subtype_integer_errors(self):
# float64 -> uint64 fails with negative values
index = interval_range(-10.0, 10.0)
index = interval_range(-10.0, 10.0, inclusive="right")
dtype = IntervalDtype("uint64", "right")
msg = re.escape(
"Cannot convert interval[float64, right] to interval[uint64, right]; "
Expand All @@ -191,10 +191,10 @@ class TestDatetimelikeSubtype(AstypeTests):
"""Tests specific to IntervalIndex with datetime-like subtype"""

indexes = [
interval_range(Timestamp("2018-01-01"), periods=10, closed="neither"),
interval_range(Timestamp("2018-01-01"), periods=10, inclusive="neither"),
interval_range(Timestamp("2018-01-01"), periods=10).insert(2, NaT),
interval_range(Timestamp("2018-01-01", tz="US/Eastern"), periods=10),
interval_range(Timedelta("0 days"), periods=10, closed="both"),
interval_range(Timedelta("0 days"), periods=10, inclusive="both"),
interval_range(Timedelta("0 days"), periods=10).insert(2, NaT),
]

Expand Down
12 changes: 6 additions & 6 deletions pandas/tests/indexes/interval/test_interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,10 +167,10 @@ def test_delete(self, closed):
@pytest.mark.parametrize(
"data",
[
interval_range(0, periods=10, closed="neither"),
interval_range(1.7, periods=8, freq=2.5, closed="both"),
interval_range(Timestamp("20170101"), periods=12, closed="left"),
interval_range(Timedelta("1 day"), periods=6, closed="right"),
interval_range(0, periods=10, inclusive="neither"),
interval_range(1.7, periods=8, freq=2.5, inclusive="both"),
interval_range(Timestamp("20170101"), periods=12, inclusive="left"),
interval_range(Timedelta("1 day"), periods=6, inclusive="right"),
],
)
def test_insert(self, data):
Expand Down Expand Up @@ -868,9 +868,9 @@ def test_nbytes(self):
@pytest.mark.parametrize("new_closed", ["left", "right", "both", "neither"])
def test_set_closed(self, name, closed, new_closed):
# GH 21670
index = interval_range(0, 5, closed=closed, name=name)
index = interval_range(0, 5, inclusive=closed, name=name)
result = index.set_closed(new_closed)
expected = interval_range(0, 5, closed=new_closed, name=name)
expected = interval_range(0, 5, inclusive=new_closed, name=name)
tm.assert_index_equal(result, expected)

@pytest.mark.parametrize("bad_closed", ["foo", 10, "LEFT", True, False])
Expand Down
44 changes: 30 additions & 14 deletions pandas/tests/indexes/interval/test_interval_range.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,25 +34,25 @@ def test_constructor_numeric(self, closed, name, freq, periods):

# defined from start/end/freq
result = interval_range(
start=start, end=end, freq=freq, name=name, closed=closed
start=start, end=end, freq=freq, name=name, inclusive=closed
)
tm.assert_index_equal(result, expected)

# defined from start/periods/freq
result = interval_range(
start=start, periods=periods, freq=freq, name=name, closed=closed
start=start, periods=periods, freq=freq, name=name, inclusive=closed
)
tm.assert_index_equal(result, expected)

# defined from end/periods/freq
result = interval_range(
end=end, periods=periods, freq=freq, name=name, closed=closed
end=end, periods=periods, freq=freq, name=name, inclusive=closed
)
tm.assert_index_equal(result, expected)

# GH 20976: linspace behavior defined from start/end/periods
result = interval_range(
start=start, end=end, periods=periods, name=name, closed=closed
start=start, end=end, periods=periods, name=name, inclusive=closed
)
tm.assert_index_equal(result, expected)

Expand All @@ -67,19 +67,19 @@ def test_constructor_timestamp(self, closed, name, freq, periods, tz):

# defined from start/end/freq
result = interval_range(
start=start, end=end, freq=freq, name=name, closed=closed
start=start, end=end, freq=freq, name=name, inclusive=closed
)
tm.assert_index_equal(result, expected)

# defined from start/periods/freq
result = interval_range(
start=start, periods=periods, freq=freq, name=name, closed=closed
start=start, periods=periods, freq=freq, name=name, inclusive=closed
)
tm.assert_index_equal(result, expected)

# defined from end/periods/freq
result = interval_range(
end=end, periods=periods, freq=freq, name=name, closed=closed
end=end, periods=periods, freq=freq, name=name, inclusive=closed
)
tm.assert_index_equal(result, expected)

Expand All @@ -88,7 +88,7 @@ def test_constructor_timestamp(self, closed, name, freq, periods, tz):
# matches expected only for non-anchored offsets and tz naive
# (anchored/DST transitions cause unequal spacing in expected)
result = interval_range(
start=start, end=end, periods=periods, name=name, closed=closed
start=start, end=end, periods=periods, name=name, inclusive=closed
)
tm.assert_index_equal(result, expected)

Expand All @@ -102,25 +102,25 @@ def test_constructor_timedelta(self, closed, name, freq, periods):

# defined from start/end/freq
result = interval_range(
start=start, end=end, freq=freq, name=name, closed=closed
start=start, end=end, freq=freq, name=name, inclusive=closed
)
tm.assert_index_equal(result, expected)

# defined from start/periods/freq
result = interval_range(
start=start, periods=periods, freq=freq, name=name, closed=closed
start=start, periods=periods, freq=freq, name=name, inclusive=closed
)
tm.assert_index_equal(result, expected)

# defined from end/periods/freq
result = interval_range(
end=end, periods=periods, freq=freq, name=name, closed=closed
end=end, periods=periods, freq=freq, name=name, inclusive=closed
)
tm.assert_index_equal(result, expected)

# GH 20976: linspace behavior defined from start/end/periods
result = interval_range(
start=start, end=end, periods=periods, name=name, closed=closed
start=start, end=end, periods=periods, name=name, inclusive=closed
)
tm.assert_index_equal(result, expected)

Expand Down Expand Up @@ -163,7 +163,9 @@ def test_no_invalid_float_truncation(self, start, end, freq):
breaks = [0.5, 2.0, 3.5, 5.0, 6.5]
expected = IntervalIndex.from_breaks(breaks)

result = interval_range(start=start, end=end, periods=4, freq=freq)
result = interval_range(
start=start, end=end, periods=4, freq=freq, inclusive="right"
)
tm.assert_index_equal(result, expected)

@pytest.mark.parametrize(
Expand All @@ -184,7 +186,7 @@ def test_no_invalid_float_truncation(self, start, end, freq):
def test_linspace_dst_transition(self, start, mid, end):
# GH 20976: linspace behavior defined from start/end/periods
# accounts for the hour gained/lost during DST transition
result = interval_range(start=start, end=end, periods=2)
result = interval_range(start=start, end=end, periods=2, inclusive="right")
expected = IntervalIndex.from_breaks([start, mid, end])
tm.assert_index_equal(result, expected)

Expand Down Expand Up @@ -353,3 +355,17 @@ def test_errors(self):
msg = "Start and end cannot both be tz-aware with different timezones"
with pytest.raises(TypeError, match=msg):
interval_range(start=start, end=end)

def test_interval_range_error_and_warning(self):
# GH 40245

msg = (
"Deprecated argument `closed` cannot "
"be passed if argument `inclusive` is not None"
)
with pytest.raises(ValueError, match=msg):
interval_range(end=5, periods=4, closed="both", inclusive="both")

msg = "Argument `closed` is deprecated in favor of `inclusive`"
with tm.assert_produces_warning(FutureWarning, match=msg):
interval_range(end=5, periods=4, closed="right")
2 changes: 1 addition & 1 deletion pandas/tests/indexes/interval/test_setops.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ def test_set_incompatible_types(self, closed, op_name, sort):
tm.assert_index_equal(result, expected)

# GH 19016: incompatible dtypes -> cast to object
other = interval_range(Timestamp("20180101"), periods=9, closed=closed)
other = interval_range(Timestamp("20180101"), periods=9, inclusive=closed)
expected = getattr(index.astype(object), op_name)(other, sort=sort)
if op_name == "difference":
expected = index
Expand Down

0 comments on commit 073b353

Please sign in to comment.