Skip to content

Commit

Permalink
ENH: consistency of input args for boundaries (pd.date_range) (#43504)
Browse files Browse the repository at this point in the history
Co-authored-by: JHM Darbyshire <24256554+attack68@users.noreply.github.com>
  • Loading branch information
zyc09 and attack68 committed Oct 3, 2021
1 parent 4463fb1 commit 3db765a
Show file tree
Hide file tree
Showing 9 changed files with 192 additions and 72 deletions.
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v1.4.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -336,6 +336,7 @@ Other Deprecations
- Deprecated the 'include_start' and 'include_end' arguments in :meth:`DataFrame.between_time`; in a future version passing 'include_start' or 'include_end' will raise (:issue:`40245`)
- Deprecated the ``squeeze`` argument to :meth:`read_csv`, :meth:`read_table`, and :meth:`read_excel`. Users should squeeze the DataFrame afterwards with ``.squeeze("columns")`` instead. (:issue:`43242`)
- Deprecated the ``index`` argument to :class:`SparseArray` construction (:issue:`23089`)
- Deprecated the ``closed`` argument in :meth:`date_range` and :meth:`bdate_range` in favor of ``inclusive`` argument; In a future version passing ``closed`` will raise (:issue:`40245`)
- Deprecated :meth:`.Rolling.validate`, :meth:`.Expanding.validate`, and :meth:`.ExponentialMovingWindow.validate` (:issue:`43665`)
- Deprecated silent dropping of columns that raised a ``TypeError`` in :class:`Series.transform` and :class:`DataFrame.transform` when used with a dictionary (:issue:`43740`)
- Deprecated silent dropping of columns that raised a ``TypeError``, ``DataError``, and some cases of ``ValueError`` in :meth:`Series.aggregate`, :meth:`DataFrame.aggregate`, :meth:`Series.groupby.aggregate`, and :meth:`DataFrame.groupby.aggregate` when used with a list (:issue:`43740`)
Expand Down Expand Up @@ -385,6 +386,7 @@ Datetimelike
- Bug in :class:`DataFrame` constructor unnecessarily copying non-datetimelike 2D object arrays (:issue:`39272`)
- Bug in :func:`to_datetime` with ``format`` and ``pandas.NA`` was raising ``ValueError`` (:issue:`42957`)
- :func:`to_datetime` would silently swap ``MM/DD/YYYY`` and ``DD/MM/YYYY`` formats if the given ``dayfirst`` option could not be respected - now, a warning is raised in the case of delimited date strings (e.g. ``31-12-2012``) (:issue:`12585`)
- Bug in :meth:`date_range` and :meth:`bdate_range` do not return right bound when ``start`` = ``end`` and set is closed on one side (:issue:`43394`)
-

Timedelta
Expand Down
8 changes: 8 additions & 0 deletions pandas/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,14 @@ def keep(request):
return request.param


@pytest.fixture(params=["both", "neither", "left", "right"])
def inclusive_endpoints_fixture(request):
"""
Fixture for trying all interval 'inclusive' parameters.
"""
return request.param


@pytest.fixture(params=["left", "right", "both", "neither"])
def closed(request):
"""
Expand Down
19 changes: 12 additions & 7 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
)
from pandas._typing import npt
from pandas.errors import PerformanceWarning
from pandas.util._validators import validate_endpoints
from pandas.util._validators import validate_inclusive

from pandas.core.dtypes.cast import astype_dt64_to_dt64tz
from pandas.core.dtypes.common import (
Expand Down Expand Up @@ -394,7 +394,7 @@ def _generate_range(
normalize=False,
ambiguous="raise",
nonexistent="raise",
closed=None,
inclusive="both",
):

periods = dtl.validate_periods(periods)
Expand All @@ -417,7 +417,7 @@ def _generate_range(
if start is NaT or end is NaT:
raise ValueError("Neither `start` nor `end` can be NaT")

left_closed, right_closed = validate_endpoints(closed)
left_inclusive, right_inclusive = validate_inclusive(inclusive)
start, end, _normalized = _maybe_normalize_endpoints(start, end, normalize)
tz = _infer_tz_from_endpoints(start, end, tz)

Expand Down Expand Up @@ -477,10 +477,15 @@ def _generate_range(
arr = arr.astype("M8[ns]", copy=False)
index = cls._simple_new(arr, freq=None, dtype=dtype)

if not left_closed and len(index) and index[0] == start:
index = index[1:]
if not right_closed and len(index) and index[-1] == end:
index = index[:-1]
if start == end:
if not left_inclusive and not right_inclusive:
index = index[1:-1]
else:
if not left_inclusive or not right_inclusive:
if not left_inclusive and len(index) and index[0] == start:
index = index[1:]
if not right_inclusive and len(index) and index[-1] == end:
index = index[:-1]

dtype = tz_to_dtype(tz)
return cls._simple_new(index._ndarray, freq=freq, dtype=dtype)
Expand Down
47 changes: 44 additions & 3 deletions pandas/core/indexes/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -881,7 +881,8 @@ def date_range(
tz=None,
normalize: bool = False,
name: Hashable = None,
closed=None,
closed: str | None | lib.NoDefault = lib.no_default,
inclusive: str | None = None,
**kwargs,
) -> DatetimeIndex:
"""
Expand Down Expand Up @@ -919,6 +920,14 @@ def date_range(
closed : {None, 'left', 'right'}, optional
Make the interval closed with respect to the given frequency to
the 'left', 'right', or both sides (None, the default).
.. deprecated:: 1.4.0
Argument `closed` has been deprecated to standardize boundary inputs.
Use `inclusive` instead, to set each bound as closed or open.
inclusive : {"both", "neither", "left", "right"}, default "both"
Include boundaries; Whether to set each bound as closed or open.
.. versionadded:: 1.4.0
**kwargs
For compatibility. Has no effect on the result.
Expand Down Expand Up @@ -1029,6 +1038,28 @@ def date_range(
DatetimeIndex(['2017-01-02', '2017-01-03', '2017-01-04'],
dtype='datetime64[ns]', freq='D')
"""
if inclusive is not None and not isinstance(closed, lib.NoDefault):
raise ValueError(
"Deprecated argument `closed` cannot be passed"
"if argument `inclusive` is not None"
)
elif not isinstance(closed, lib.NoDefault):
warnings.warn(
"Argument `closed` is deprecated in favor of `inclusive`.",
FutureWarning,
stacklevel=2,
)
if closed is None:
inclusive = "both"
elif closed in ("left", "right"):
inclusive = closed
else:
raise ValueError(
"Argument `closed` has to be either 'left', 'right' or None"
)
elif inclusive is None:
inclusive = "both"

if freq is None and com.any_none(periods, start, end):
freq = "D"

Expand All @@ -1039,7 +1070,7 @@ def date_range(
freq=freq,
tz=tz,
normalize=normalize,
closed=closed,
inclusive=inclusive,
**kwargs,
)
return DatetimeIndex._simple_new(dtarr, name=name)
Expand All @@ -1055,7 +1086,8 @@ def bdate_range(
name: Hashable = None,
weekmask=None,
holidays=None,
closed=None,
closed: lib.NoDefault = lib.no_default,
inclusive: str | None = None,
**kwargs,
) -> DatetimeIndex:
"""
Expand Down Expand Up @@ -1090,6 +1122,14 @@ def bdate_range(
closed : str, default None
Make the interval closed with respect to the given frequency to
the 'left', 'right', or both sides (None).
.. deprecated:: 1.4.0
Argument `closed` has been deprecated to standardize boundary inputs.
Use `inclusive` instead, to set each bound as closed or open.
inclusive : {"both", "neither", "left", "right"}, default "both"
Include boundaries; Whether to set each bound as closed or open.
.. versionadded:: 1.4.0
**kwargs
For compatibility. Has no effect on the result.
Expand Down Expand Up @@ -1143,6 +1183,7 @@ def bdate_range(
normalize=normalize,
name=name,
closed=closed,
inclusive=inclusive,
**kwargs,
)

Expand Down
5 changes: 0 additions & 5 deletions pandas/tests/frame/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,6 @@
import pandas._testing as tm


@pytest.fixture(params=["both", "neither", "left", "right"])
def inclusive_endpoints_fixture(request):
return request.param


@pytest.fixture
def float_frame_with_na():
"""
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/groupby/test_timegrouper.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ def test_groupby_with_timegrouper(self):
expected = DataFrame(
{"Quantity": 0},
index=date_range(
"20130901", "20131205", freq="5D", name="Date", closed="left"
"20130901", "20131205", freq="5D", name="Date", inclusive="left"
),
)
expected.iloc[[0, 6, 18], 0] = np.array([24, 6, 9], dtype="int64")
Expand Down

0 comments on commit 3db765a

Please sign in to comment.