ENH: consistency of input args for boundaries (pd.date_range) (#43504)

Co-authored-by: JHM Darbyshire <24256554+attack68@users.noreply.github.com>
pandas-dev · Oct 3, 2021 · 3db765a · 3db765a
1 parent 4463fb1
commit 3db765a
Show file tree

Hide file tree

Showing 9 changed files with 192 additions and 72 deletions.
diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
@@ -336,6 +336,7 @@ Other Deprecations
 - Deprecated the 'include_start' and 'include_end' arguments in :meth:`DataFrame.between_time`; in a future version passing 'include_start' or 'include_end' will raise (:issue:`40245`)
 - Deprecated the ``squeeze`` argument to :meth:`read_csv`, :meth:`read_table`, and :meth:`read_excel`. Users should squeeze the DataFrame afterwards with ``.squeeze("columns")`` instead. (:issue:`43242`)
 - Deprecated the ``index`` argument to :class:`SparseArray` construction (:issue:`23089`)
+- Deprecated the ``closed`` argument in :meth:`date_range` and :meth:`bdate_range` in favor of ``inclusive`` argument; In a future version passing ``closed`` will raise (:issue:`40245`)
 - Deprecated :meth:`.Rolling.validate`, :meth:`.Expanding.validate`, and :meth:`.ExponentialMovingWindow.validate` (:issue:`43665`)
 - Deprecated silent dropping of columns that raised a ``TypeError`` in :class:`Series.transform` and :class:`DataFrame.transform` when used with a dictionary (:issue:`43740`)
 - Deprecated silent dropping of columns that raised a ``TypeError``, ``DataError``, and some cases of ``ValueError`` in :meth:`Series.aggregate`, :meth:`DataFrame.aggregate`, :meth:`Series.groupby.aggregate`, and :meth:`DataFrame.groupby.aggregate` when used with a list (:issue:`43740`)
@@ -385,6 +386,7 @@ Datetimelike
 - Bug in :class:`DataFrame` constructor unnecessarily copying non-datetimelike 2D object arrays (:issue:`39272`)
 - Bug in :func:`to_datetime` with ``format`` and ``pandas.NA`` was raising ``ValueError`` (:issue:`42957`)
 - :func:`to_datetime` would silently swap ``MM/DD/YYYY`` and ``DD/MM/YYYY`` formats if the given ``dayfirst`` option could not be respected - now, a warning is raised in the case of delimited date strings (e.g. ``31-12-2012``) (:issue:`12585`)
+- Bug in :meth:`date_range` and :meth:`bdate_range` do not return right bound when ``start`` = ``end`` and set is closed on one side (:issue:`43394`)
 -
 
 Timedelta

diff --git a/pandas/conftest.py b/pandas/conftest.py
@@ -242,6 +242,14 @@ def keep(request):
     return request.param
 
 
+@pytest.fixture(params=["both", "neither", "left", "right"])
+def inclusive_endpoints_fixture(request):
+    """
+    Fixture for trying all interval 'inclusive' parameters.
+    """
+    return request.param
+
+
 @pytest.fixture(params=["left", "right", "both", "neither"])
 def closed(request):
     """

diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py
@@ -39,7 +39,7 @@
 )
 from pandas._typing import npt
 from pandas.errors import PerformanceWarning
-from pandas.util._validators import validate_endpoints
+from pandas.util._validators import validate_inclusive
 
 from pandas.core.dtypes.cast import astype_dt64_to_dt64tz
 from pandas.core.dtypes.common import (
@@ -394,7 +394,7 @@ def _generate_range(
         normalize=False,
         ambiguous="raise",
         nonexistent="raise",
-        closed=None,
+        inclusive="both",
     ):
 
         periods = dtl.validate_periods(periods)
@@ -417,7 +417,7 @@ def _generate_range(
         if start is NaT or end is NaT:
             raise ValueError("Neither `start` nor `end` can be NaT")
 
-        left_closed, right_closed = validate_endpoints(closed)
+        left_inclusive, right_inclusive = validate_inclusive(inclusive)
         start, end, _normalized = _maybe_normalize_endpoints(start, end, normalize)
         tz = _infer_tz_from_endpoints(start, end, tz)
 
@@ -477,10 +477,15 @@ def _generate_range(
             arr = arr.astype("M8[ns]", copy=False)
             index = cls._simple_new(arr, freq=None, dtype=dtype)
 
-        if not left_closed and len(index) and index[0] == start:
-            index = index[1:]
-        if not right_closed and len(index) and index[-1] == end:
-            index = index[:-1]
+        if start == end:
+            if not left_inclusive and not right_inclusive:
+                index = index[1:-1]
+        else:
+            if not left_inclusive or not right_inclusive:
+                if not left_inclusive and len(index) and index[0] == start:
+                    index = index[1:]
+                if not right_inclusive and len(index) and index[-1] == end:
+                    index = index[:-1]
 
         dtype = tz_to_dtype(tz)
         return cls._simple_new(index._ndarray, freq=freq, dtype=dtype)

diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py
@@ -881,7 +881,8 @@ def date_range(
     tz=None,
     normalize: bool = False,
     name: Hashable = None,
-    closed=None,
+    closed: str | None | lib.NoDefault = lib.no_default,
+    inclusive: str | None = None,
     **kwargs,
 ) -> DatetimeIndex:
     """
@@ -919,6 +920,14 @@ def date_range(
     closed : {None, 'left', 'right'}, optional
         Make the interval closed with respect to the given frequency to
         the 'left', 'right', or both sides (None, the default).
+
+        .. deprecated:: 1.4.0
+           Argument `closed` has been deprecated to standardize boundary inputs.
+           Use `inclusive` instead, to set each bound as closed or open.
+    inclusive : {"both", "neither", "left", "right"}, default "both"
+        Include boundaries; Whether to set each bound as closed or open.
+
+        .. versionadded:: 1.4.0
     **kwargs
         For compatibility. Has no effect on the result.
 
@@ -1029,6 +1038,28 @@ def date_range(
     DatetimeIndex(['2017-01-02', '2017-01-03', '2017-01-04'],
                   dtype='datetime64[ns]', freq='D')
     """
+    if inclusive is not None and not isinstance(closed, lib.NoDefault):
+        raise ValueError(
+            "Deprecated argument `closed` cannot be passed"
+            "if argument `inclusive` is not None"
+        )
+    elif not isinstance(closed, lib.NoDefault):
+        warnings.warn(
+            "Argument `closed` is deprecated in favor of `inclusive`.",
+            FutureWarning,
+            stacklevel=2,
+        )
+        if closed is None:
+            inclusive = "both"
+        elif closed in ("left", "right"):
+            inclusive = closed
+        else:
+            raise ValueError(
+                "Argument `closed` has to be either 'left', 'right' or None"
+            )
+    elif inclusive is None:
+        inclusive = "both"
+
     if freq is None and com.any_none(periods, start, end):
         freq = "D"
 
@@ -1039,7 +1070,7 @@ def date_range(
         freq=freq,
         tz=tz,
         normalize=normalize,
-        closed=closed,
+        inclusive=inclusive,
         **kwargs,
     )
     return DatetimeIndex._simple_new(dtarr, name=name)
@@ -1055,7 +1086,8 @@ def bdate_range(
     name: Hashable = None,
     weekmask=None,
     holidays=None,
-    closed=None,
+    closed: lib.NoDefault = lib.no_default,
+    inclusive: str | None = None,
     **kwargs,
 ) -> DatetimeIndex:
     """
@@ -1090,6 +1122,14 @@ def bdate_range(
     closed : str, default None
         Make the interval closed with respect to the given frequency to
         the 'left', 'right', or both sides (None).
+
+        .. deprecated:: 1.4.0
+           Argument `closed` has been deprecated to standardize boundary inputs.
+           Use `inclusive` instead, to set each bound as closed or open.
+    inclusive : {"both", "neither", "left", "right"}, default "both"
+        Include boundaries; Whether to set each bound as closed or open.
+
+        .. versionadded:: 1.4.0
     **kwargs
         For compatibility. Has no effect on the result.
 
@@ -1143,6 +1183,7 @@ def bdate_range(
         normalize=normalize,
         name=name,
         closed=closed,
+        inclusive=inclusive,
         **kwargs,
     )
 

diff --git a/pandas/tests/frame/conftest.py b/pandas/tests/frame/conftest.py
@@ -9,11 +9,6 @@
 import pandas._testing as tm
 
 
-@pytest.fixture(params=["both", "neither", "left", "right"])
-def inclusive_endpoints_fixture(request):
-    return request.param
-
-
 @pytest.fixture
 def float_frame_with_na():
     """

diff --git a/pandas/tests/groupby/test_timegrouper.py b/pandas/tests/groupby/test_timegrouper.py
@@ -100,7 +100,7 @@ def test_groupby_with_timegrouper(self):
             expected = DataFrame(
                 {"Quantity": 0},
                 index=date_range(
-                    "20130901", "20131205", freq="5D", name="Date", closed="left"
+                    "20130901", "20131205", freq="5D", name="Date", inclusive="left"
                 ),
             )
             expected.iloc[[0, 6, 18], 0] = np.array([24, 6, 9], dtype="int64")