DEPR: positional indexing on Series __getitem__/__setitem__ (#53201)

* DEPR: positional indexing on Series __getitem__/__setitem__ * troubleshoot docs * troubleshoot docs * update doc * update docs * docs * update fixture * update doctest
pandas-dev · May 19, 2023 · 7b22448 · 7b22448
1 parent b968ce5
commit 7b22448
Show file tree

Hide file tree

Showing 61 changed files with 293 additions and 176 deletions.
diff --git a/doc/source/user_guide/basics.rst b/doc/source/user_guide/basics.rst
@@ -1364,7 +1364,7 @@ We illustrate these fill methods on a simple Series:
 
    rng = pd.date_range("1/3/2000", periods=8)
    ts = pd.Series(np.random.randn(8), index=rng)
-   ts2 = ts[[0, 3, 6]]
+   ts2 = ts.iloc[[0, 3, 6]]
    ts
    ts2
 

diff --git a/doc/source/user_guide/cookbook.rst b/doc/source/user_guide/cookbook.rst
@@ -546,7 +546,7 @@ Unlike agg, apply's callable is passed a sub-DataFrame which gives you access to
 
    def MyCust(x):
        if len(x) > 2:
-           return x[1] * 1.234
+           return x.iloc[1] * 1.234
        return pd.NaT
 
    mhc = {"Mean": np.mean, "Max": np.max, "Custom": MyCust}

diff --git a/doc/source/user_guide/dsintro.rst b/doc/source/user_guide/dsintro.rst
@@ -102,15 +102,15 @@ However, operations such as slicing will also slice the index.
 
 .. ipython:: python
 
-    s[0]
-    s[:3]
+    s.iloc[0]
+    s.iloc[:3]
     s[s > s.median()]
-    s[[4, 3, 1]]
+    s.iloc[[4, 3, 1]]
     np.exp(s)
 
 .. note::
 
-   We will address array-based indexing like ``s[[4, 3, 1]]``
+   We will address array-based indexing like ``s.iloc[[4, 3, 1]]``
    in :ref:`section on indexing <indexing>`.
 
 Like a NumPy array, a pandas :class:`Series` has a single :attr:`~Series.dtype`.
@@ -201,7 +201,7 @@ labels.
 
 .. ipython:: python
 
-    s[1:] + s[:-1]
+    s.iloc[1:] + s.iloc[:-1]
 
 The result of an operation between unaligned :class:`Series` will have the **union** of
 the indexes involved. If a label is not found in one :class:`Series` or the other, the

diff --git a/doc/source/user_guide/missing_data.rst b/doc/source/user_guide/missing_data.rst
@@ -368,7 +368,7 @@ Index aware interpolation is available via the ``method`` keyword:
 .. ipython:: python
    :suppress:
 
-   ts2 = ts[[0, 1, 30, 60, 99]]
+   ts2 = ts.iloc[[0, 1, 30, 60, 99]]
 
 .. ipython:: python
 
@@ -443,7 +443,7 @@ Compare several methods:
 
    ser = pd.Series(np.arange(1, 10.1, 0.25) ** 2 + np.random.randn(37))
    missing = np.array([4, 13, 14, 15, 16, 17, 18, 20, 29])
-   ser[missing] = np.nan
+   ser.iloc[missing] = np.nan
    methods = ["linear", "quadratic", "cubic"]
 
    df = pd.DataFrame({m: ser.interpolate(method=m) for m in methods})

diff --git a/doc/source/user_guide/timeseries.rst b/doc/source/user_guide/timeseries.rst
@@ -777,7 +777,7 @@ regularity will result in a ``DatetimeIndex``, although frequency is lost:
 
 .. ipython:: python
 
-   ts2[[0, 2, 6]].index
+   ts2.iloc[[0, 2, 6]].index
 
 .. _timeseries.components:
 

diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
@@ -55,7 +55,7 @@ For example:
    pi = dti.to_period("D")
    ser_monotonic = pd.Series(np.arange(30), index=pi)
    shuffler = list(range(0, 30, 2)) + list(range(1, 31, 2))
-   ser = ser_monotonic[shuffler]
+   ser = ser_monotonic.iloc[shuffler]
    ser
 
 .. ipython:: python

diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst
@@ -264,7 +264,7 @@ Deprecations
 - Deprecated allowing ``downcast`` keyword other than ``None``, ``False``, "infer", or a dict with these as values in :meth:`Series.fillna`, :meth:`DataFrame.fillna` (:issue:`40988`)
 - Deprecated allowing arbitrary ``fill_value`` in :class:`SparseDtype`, in a future version the ``fill_value`` will need to be compatible with the ``dtype.subtype``, either a scalar that can be held by that subtype or ``NaN`` for integer or bool subtypes (:issue:`23124`)
 - Deprecated constructing :class:`SparseArray` from scalar data, pass a sequence instead (:issue:`53039`)
--
+- Deprecated positional indexing on :class:`Series` with :meth:`Series.__getitem__` and :meth:`Series.__setitem__`, in a future version ``ser[item]`` will *always* interpret ``item`` as a label, not a position (:issue:`50617`)
 
 .. ---------------------------------------------------------------------------
 .. _whatsnew_210.performance:

diff --git a/pandas/conftest.py b/pandas/conftest.py
@@ -746,7 +746,7 @@ def series_with_multilevel_index() -> Series:
     index = MultiIndex.from_tuples(tuples)
     data = np.random.randn(8)
     ser = Series(data, index=index)
-    ser[3] = np.NaN
+    ser.iloc[3] = np.NaN
     return ser
 
 

diff --git a/pandas/core/apply.py b/pandas/core/apply.py
@@ -1435,7 +1435,7 @@ def relabel_result(
                 com.get_callable_name(f) if not isinstance(f, str) else f for f in fun
             ]
             col_idx_order = Index(s.index).get_indexer(fun)
-            s = s[col_idx_order]
+            s = s.iloc[col_idx_order]
 
         # assign the new user-provided "named aggregation" as index names, and reindex
         # it based on the whole user-provided names.

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -6512,8 +6512,8 @@ def copy(self, deep: bool_t | None = True) -> Self:
         Updates to the data shared by shallow copy and original is reflected
         in both; deep copy remains unchanged.
 
-        >>> s[0] = 3
-        >>> shallow[1] = 4
+        >>> s.iloc[0] = 3
+        >>> shallow.iloc[1] = 4
         >>> s
         a    3
         b    4

diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
@@ -1905,7 +1905,7 @@ def _setitem_with_indexer_split_path(self, indexer, value, name: str):
                 pass
 
             elif self._is_scalar_access(indexer) and is_object_dtype(
-                self.obj.dtypes[ilocs[0]]
+                self.obj.dtypes._values[ilocs[0]]
             ):
                 # We are setting nested data, only possible for object dtype data
                 self._setitem_single_column(indexer[1], value, pi)

diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -973,6 +973,15 @@ def __getitem__(self, key):
             key = unpack_1tuple(key)
 
         if is_integer(key) and self.index._should_fallback_to_positional:
+            warnings.warn(
+                # GH#50617
+                "Series.__getitem__ treating keys as positions is deprecated. "
+                "In a future version, integer keys will always be treated "
+                "as labels (consistent with DataFrame behavior). To access "
+                "a value by position, use `ser.iloc[pos]`",
+                FutureWarning,
+                stacklevel=find_stack_level(),
+            )
             return self._values[key]
 
         elif key_is_scalar:
@@ -1035,6 +1044,15 @@ def _get_with(self, key):
             if not self.index._should_fallback_to_positional:
                 return self.loc[key]
             else:
+                warnings.warn(
+                    # GH#50617
+                    "Series.__getitem__ treating keys as positions is deprecated. "
+                    "In a future version, integer keys will always be treated "
+                    "as labels (consistent with DataFrame behavior). To access "
+                    "a value by position, use `ser.iloc[pos]`",
+                    FutureWarning,
+                    stacklevel=find_stack_level(),
+                )
                 return self.iloc[key]
 
         # handle the dup indexing case GH#4246
@@ -1136,6 +1154,15 @@ def __setitem__(self, key, value) -> None:
                     # positional setter
                     # can't use _mgr.setitem_inplace yet bc could have *both*
                     #  KeyError and then ValueError, xref GH#45070
+                    warnings.warn(
+                        # GH#50617
+                        "Series.__setitem__ treating keys as positions is deprecated. "
+                        "In a future version, integer keys will always be treated "
+                        "as labels (consistent with DataFrame behavior). To set "
+                        "a value by position, use `ser.iloc[pos] = value`",
+                        FutureWarning,
+                        stacklevel=find_stack_level(),
+                    )
                     self._set_values(key, value)
             else:
                 # GH#12862 adding a new key to the Series
@@ -1211,6 +1238,15 @@ def _set_with(self, key, value) -> None:
             key_type = lib.infer_dtype(key, skipna=False)
 
             if key_type == "integer":
+                warnings.warn(
+                    # GH#50617
+                    "Series.__setitem__ treating keys as positions is deprecated. "
+                    "In a future version, integer keys will always be treated "
+                    "as labels (consistent with DataFrame behavior). To set "
+                    "a value by position, use `ser.iloc[pos] = value`",
+                    FutureWarning,
+                    stacklevel=find_stack_level(),
+                )
                 self._set_values(key, value)
             else:
                 self._set_labels(key, value)

diff --git a/pandas/io/stata.py b/pandas/io/stata.py
@@ -2637,7 +2637,7 @@ def _prepare_pandas(self, data: DataFrame) -> None:
         )
         for key in self._convert_dates:
             new_type = _convert_datetime_to_stata_type(self._convert_dates[key])
-            dtypes[key] = np.dtype(new_type)
+            dtypes.iloc[key] = np.dtype(new_type)
 
         # Verify object arrays are strings and encode to bytes
         self._encode_strings()

diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py
@@ -1385,7 +1385,7 @@ def sum_div2(s):
 def test_apply_getitem_axis_1():
     # GH 13427
     df = DataFrame({"a": [0, 1, 2], "b": [1, 2, 3]})
-    result = df[["a", "a"]].apply(lambda x: x[0] + x[1], axis=1)
+    result = df[["a", "a"]].apply(lambda x: x.iloc[0] + x.iloc[1], axis=1)
     expected = Series([0, 2, 4])
     tm.assert_series_equal(result, expected)
 

diff --git a/pandas/tests/copy_view/test_indexing.py b/pandas/tests/copy_view/test_indexing.py
@@ -802,7 +802,17 @@ def test_series_subset_set_with_indexer(
     s_orig = s.copy()
     subset = s[:]
 
-    indexer_si(subset)[indexer] = 0
+    warn = None
+    msg = "Series.__setitem__ treating keys as positions is deprecated"
+    if (
+        indexer_si is tm.setitem
+        and isinstance(indexer, np.ndarray)
+        and indexer.dtype.kind == "i"
+    ):
+        warn = FutureWarning
+
+    with tm.assert_produces_warning(warn, match=msg):
+        indexer_si(subset)[indexer] = 0
     expected = Series([0, 0, 3], index=["a", "b", "c"])
     tm.assert_series_equal(subset, expected)
 

diff --git a/pandas/tests/copy_view/test_interp_fillna.py b/pandas/tests/copy_view/test_interp_fillna.py
@@ -287,7 +287,7 @@ def test_fillna_ea_noop_shares_memory(
     if using_copy_on_write:
         assert np.shares_memory(get_array(df, "b"), get_array(df2, "b"))
         assert not df2._mgr._has_no_reference(1)
-    elif isinstance(df.dtypes[0], ArrowDtype):
+    elif isinstance(df.dtypes.iloc[0], ArrowDtype):
         # arrow is immutable, so no-ops do not need to copy underlying array
         assert np.shares_memory(get_array(df, "b"), get_array(df2, "b"))
     else:
@@ -317,7 +317,7 @@ def test_fillna_inplace_ea_noop_shares_memory(
         assert np.shares_memory(get_array(df, "b"), get_array(view, "b"))
         assert not df._mgr._has_no_reference(1)
         assert not view._mgr._has_no_reference(1)
-    elif isinstance(df.dtypes[0], ArrowDtype):
+    elif isinstance(df.dtypes.iloc[0], ArrowDtype):
         # arrow is immutable, so no-ops do not need to copy underlying array
         assert np.shares_memory(get_array(df, "b"), get_array(view, "b"))
     else:

diff --git a/pandas/tests/extension/base/getitem.py b/pandas/tests/extension/base/getitem.py
@@ -330,9 +330,11 @@ def test_get(self, data):
         result = s.get("Z")
         assert result is None
 
-        assert s.get(4) == s.iloc[4]
-        assert s.get(-1) == s.iloc[-1]
-        assert s.get(len(s)) is None
+        msg = "Series.__getitem__ treating keys as positions is deprecated"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            assert s.get(4) == s.iloc[4]
+            assert s.get(-1) == s.iloc[-1]
+            assert s.get(len(s)) is None
 
         # GH 21257
         s = pd.Series(data)

diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py
@@ -454,7 +454,7 @@ def test_setitem_corner(self, float_frame):
 
         # set existing column
         dm["A"] = "bar"
-        assert "bar" == dm["A"][0]
+        assert "bar" == dm["A"].iloc[0]
 
         dm = DataFrame(index=np.arange(3))
         dm["A"] = 1

diff --git a/pandas/tests/frame/methods/test_map.py b/pandas/tests/frame/methods/test_map.py
@@ -19,7 +19,7 @@ def test_map(float_frame):
     float_frame.map(type)
 
     # GH 465: function returning tuples
-    result = float_frame.map(lambda x: (x, x))["A"][0]
+    result = float_frame.map(lambda x: (x, x))["A"].iloc[0]
     assert isinstance(result, tuple)
 
 

diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py
@@ -614,8 +614,8 @@ def test_replace_mixed3(self):
         result = df.replace(3, df.mean().to_dict())
         expected = df.copy().astype("float64")
         m = df.mean()
-        expected.iloc[0, 0] = m[0]
-        expected.iloc[1, 1] = m[1]
+        expected.iloc[0, 0] = m.iloc[0]
+        expected.iloc[1, 1] = m.iloc[1]
         tm.assert_frame_equal(result, expected)
 
     def test_replace_nullable_int_with_string_doesnt_cast(self):
@@ -1072,7 +1072,7 @@ def test_replace_period(self):
         assert set(df.fname.values) == set(d["fname"].keys())
 
         expected = DataFrame({"fname": [d["fname"][k] for k in df.fname.values]})
-        assert expected.dtypes[0] == "Period[M]"
+        assert expected.dtypes.iloc[0] == "Period[M]"
         result = df.replace(d)
         tm.assert_frame_equal(result, expected)
 

diff --git a/pandas/tests/frame/methods/test_values.py b/pandas/tests/frame/methods/test_values.py
@@ -38,9 +38,9 @@ def test_values_mixed_dtypes(self, float_frame, float_string_frame):
             for j, value in enumerate(row):
                 col = frame_cols[j]
                 if np.isnan(value):
-                    assert np.isnan(frame[col][i])
+                    assert np.isnan(frame[col].iloc[i])
                 else:
-                    assert value == frame[col][i]
+                    assert value == frame[col].iloc[i]
 
         # mixed type
         arr = float_string_frame[["foo", "A"]].values

diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py
@@ -611,16 +611,16 @@ def test_operators_timedelta64(self):
 
         # min
         result = diffs.min()
-        assert result[0] == diffs.loc[0, "A"]
-        assert result[1] == diffs.loc[0, "B"]
+        assert result.iloc[0] == diffs.loc[0, "A"]
+        assert result.iloc[1] == diffs.loc[0, "B"]
 
         result = diffs.min(axis=1)
         assert (result == diffs.loc[0, "B"]).all()
 
         # max
         result = diffs.max()
-        assert result[0] == diffs.loc[2, "A"]
-        assert result[1] == diffs.loc[2, "B"]
+        assert result.iloc[0] == diffs.loc[2, "A"]
+        assert result.iloc[1] == diffs.loc[2, "B"]
 
         result = diffs.max(axis=1)
         assert (result == diffs["A"]).all()

diff --git a/pandas/tests/frame/test_stack_unstack.py b/pandas/tests/frame/test_stack_unstack.py
@@ -1075,7 +1075,7 @@ def test_stack_full_multiIndex(self):
             ),
             columns=Index(["B", "C"], name="Upper"),
         )
-        expected["B"] = expected["B"].astype(df.dtypes[0])
+        expected["B"] = expected["B"].astype(df.dtypes.iloc[0])
         tm.assert_frame_equal(result, expected)
 
     @pytest.mark.parametrize("ordered", [False, True])

diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
@@ -1344,11 +1344,11 @@ def convert_force_pure(x):
 
     result = grouped.agg(convert_fast)
     assert result.dtype == np.object_
-    assert isinstance(result[0], Decimal)
+    assert isinstance(result.iloc[0], Decimal)
 
     result = grouped.agg(convert_force_pure)
     assert result.dtype == np.object_
-    assert isinstance(result[0], Decimal)
+    assert isinstance(result.iloc[0], Decimal)
 
 
 def test_groupby_dtype_inference_empty():
@@ -1967,8 +1967,8 @@ def get_categorical_invalid_expected():
         expected = DataFrame([], columns=[], index=idx)
         return expected
 
-    is_per = isinstance(df.dtypes[0], pd.PeriodDtype)
-    is_dt64 = df.dtypes[0].kind == "M"
+    is_per = isinstance(df.dtypes.iloc[0], pd.PeriodDtype)
+    is_dt64 = df.dtypes.iloc[0].kind == "M"
     is_cat = isinstance(values, Categorical)
 
     if isinstance(values, Categorical) and not values.ordered and op in ["min", "max"]:

diff --git a/pandas/tests/indexes/datetimes/test_misc.py b/pandas/tests/indexes/datetimes/test_misc.py
@@ -50,8 +50,8 @@ def test_datetimeindex_accessors(self):
             assert dti.dayofyear[0] == 1
             assert dti.dayofyear[120] == 121
 
-            assert dti.isocalendar().week[0] == 1
-            assert dti.isocalendar().week[120] == 18
+            assert dti.isocalendar().week.iloc[0] == 1
+            assert dti.isocalendar().week.iloc[120] == 18
 
             assert dti.quarter[0] == 1
             assert dti.quarter[120] == 2

diff --git a/pandas/tests/indexes/datetimes/test_partial_slicing.py b/pandas/tests/indexes/datetimes/test_partial_slicing.py
@@ -379,7 +379,7 @@ def test_partial_slice_requires_monotonicity(self):
         # Disallowed since 2.0 (GH 37819)
         ser = Series(np.arange(10), date_range("2014-01-01", periods=10))
 
-        nonmonotonic = ser[[3, 5, 4]]
+        nonmonotonic = ser.iloc[[3, 5, 4]]
         timestamp = Timestamp("2014-01-10")
         with pytest.raises(
             KeyError, match="Value based partial slicing on non-monotonic"