Revert set_index inplace and copy keyword changes (#48417)

* Revert "DEPR: inplace kwarg in set_index (#48115)" This reverts commit 9de1f0b. * Revert "ENH: set_index copy kwd (#48043)" This reverts commit 9716fcb. * fixup merge of whatsnew file
pandas-dev · Sep 14, 2022 · a551f1b · a551f1b
1 parent 4fb83b0
commit a551f1b
Show file tree

Hide file tree

Showing 24 changed files with 51 additions and 121 deletions.
diff --git a/doc/source/user_guide/indexing.rst b/doc/source/user_guide/indexing.rst
@@ -1723,12 +1723,13 @@ the given columns to a MultiIndex:
    frame
 
 Other options in ``set_index`` allow you not drop the index columns or to add
-the index without creating a copy of the underlying data:
+the index in-place (without creating a new object):
 
 .. ipython:: python
 
    data.set_index('c', drop=False)
-   data.set_index(['a', 'b'], copy=False)
+   data.set_index(['a', 'b'], inplace=True)
+   data
 
 Reset the index
 ~~~~~~~~~~~~~~~

diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst
@@ -330,7 +330,6 @@ Other enhancements
 - :meth:`DataFrame.quantile` gained a ``method`` argument that can accept ``table`` to evaluate multi-column quantiles (:issue:`43881`)
 - :class:`Interval` now supports checking whether one interval is contained by another interval (:issue:`46613`)
 - Added ``copy`` keyword to :meth:`Series.set_axis` and :meth:`DataFrame.set_axis` to allow user to set axis on a new object without necessarily copying the underlying data (:issue:`47932`)
-- :meth:`DataFrame.set_index` now supports a ``copy`` keyword. If ``False``, the underlying data is not copied when a new :class:`DataFrame` is returned (:issue:`48043`)
 - The method :meth:`.ExtensionArray.factorize` accepts ``use_na_sentinel=False`` for determining how null values are to be treated (:issue:`46601`)
 - The ``Dockerfile`` now installs a dedicated ``pandas-dev`` virtual environment for pandas development instead of using the ``base`` environment (:issue:`48427`)
 
@@ -934,7 +933,6 @@ Other Deprecations
 - Deprecated the ``inplace`` keyword in :meth:`DataFrame.set_axis` and :meth:`Series.set_axis`, use ``obj = obj.set_axis(..., copy=False)`` instead (:issue:`48130`)
 - Deprecated producing a single element when iterating over a :class:`DataFrameGroupBy` or a :class:`SeriesGroupBy` that has been grouped by a list of length 1; A tuple of length one will be returned instead (:issue:`42795`)
 - Fixed up warning message of deprecation of :meth:`MultiIndex.lesort_depth` as public method, as the message previously referred to :meth:`MultiIndex.is_lexsorted` instead (:issue:`38701`)
-- Deprecated the ``inplace`` keyword in :meth:`DataFrame.set_index`, use ``df = df.set_index(..., copy=False)`` instead (:issue:`48115`)
 - Deprecated the ``sort_columns`` argument in :meth:`DataFrame.plot` and :meth:`Series.plot` (:issue:`47563`).
 - Deprecated positional arguments for all but the first argument of :meth:`DataFrame.to_stata` and :func:`read_stata`, use keyword arguments instead (:issue:`48128`).
 - Deprecated the ``mangle_dupe_cols`` argument in :func:`read_csv`, :func:`read_fwf`, :func:`read_table` and :func:`read_excel`. The argument was never implemented, and a new argument where the renaming pattern can be specified will be added instead (:issue:`47718`)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -5869,9 +5869,8 @@ def set_index(
         *,
         drop: bool = ...,
         append: bool = ...,
-        inplace: Literal[False] | lib.NoDefault = ...,
+        inplace: Literal[False] = ...,
         verify_integrity: bool = ...,
-        copy: bool | lib.NoDefault = ...,
     ) -> DataFrame:
         ...
 
@@ -5884,7 +5883,6 @@ def set_index(
         append: bool = ...,
         inplace: Literal[True],
         verify_integrity: bool = ...,
-        copy: bool | lib.NoDefault = ...,
     ) -> None:
         ...
 
@@ -5894,9 +5892,8 @@ def set_index(
         keys,
         drop: bool = True,
         append: bool = False,
-        inplace: bool | lib.NoDefault = lib.no_default,
+        inplace: bool = False,
         verify_integrity: bool = False,
-        copy: bool | lib.NoDefault = lib.no_default,
     ) -> DataFrame | None:
         """
         Set the DataFrame index using existing columns.
@@ -5919,18 +5916,10 @@ def set_index(
             Whether to append columns to existing index.
         inplace : bool, default False
             Whether to modify the DataFrame rather than creating a new one.
-
-            .. deprecated:: 1.5.0
-
         verify_integrity : bool, default False
             Check the new index for duplicates. Otherwise defer the check until
             necessary. Setting to False will improve the performance of this
             method.
-        copy : bool, default True
-            Whether to make a copy of the underlying data when returning a new
-            DataFrame.
-
-            .. versionadded:: 1.5.0
 
         Returns
         -------
@@ -5995,25 +5984,7 @@ def set_index(
         3 9       7  2013    84
         4 16     10  2014    31
         """
-        if inplace is not lib.no_default:
-            inplace = validate_bool_kwarg(inplace, "inplace")
-            warnings.warn(
-                "The 'inplace' keyword in DataFrame.set_index is deprecated "
-                "and will be removed in a future version. Use "
-                "`df = df.set_index(..., copy=False)` instead.",
-                FutureWarning,
-                stacklevel=find_stack_level(inspect.currentframe()),
-            )
-        else:
-            inplace = False
-
-        if inplace:
-            if copy is not lib.no_default:
-                raise ValueError("Cannot specify copy when inplace=True")
-            copy = False
-        elif copy is lib.no_default:
-            copy = True
-
+        inplace = validate_bool_kwarg(inplace, "inplace")
         self._check_inplace_and_allows_duplicate_labels(inplace)
         if not isinstance(keys, list):
             keys = [keys]
@@ -6049,7 +6020,7 @@ def set_index(
         if inplace:
             frame = self
         else:
-            frame = self.copy(deep=copy)
+            frame = self.copy()
 
         arrays = []
         names: list[Hashable] = []

diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py
@@ -783,9 +783,9 @@ def get_result(self, copy: bool = True) -> DataFrame:
         if self.indicator:
             result = self._indicator_post_merge(result)
 
-        result = self._maybe_add_join_keys(result, left_indexer, right_indexer)
+        self._maybe_add_join_keys(result, left_indexer, right_indexer)
 
-        result = self._maybe_restore_index_levels(result)
+        self._maybe_restore_index_levels(result)
 
         self._maybe_drop_cross_column(result, self._cross)
 
@@ -852,7 +852,7 @@ def _indicator_post_merge(self, result: DataFrame) -> DataFrame:
         result = result.drop(labels=["_left_indicator", "_right_indicator"], axis=1)
         return result
 
-    def _maybe_restore_index_levels(self, result: DataFrame) -> DataFrame:
+    def _maybe_restore_index_levels(self, result: DataFrame) -> None:
         """
         Restore index levels specified as `on` parameters
 
@@ -870,7 +870,7 @@ def _maybe_restore_index_levels(self, result: DataFrame) -> DataFrame:
 
         Returns
         -------
-        DataFrame
+        None
         """
         names_to_restore = []
         for name, left_key, right_key in zip(
@@ -894,15 +894,14 @@ def _maybe_restore_index_levels(self, result: DataFrame) -> DataFrame:
                 names_to_restore.append(name)
 
         if names_to_restore:
-            result = result.set_index(names_to_restore, copy=False)
-        return result
+            result.set_index(names_to_restore, inplace=True)
 
     def _maybe_add_join_keys(
         self,
         result: DataFrame,
         left_indexer: np.ndarray | None,
         right_indexer: np.ndarray | None,
-    ) -> DataFrame:
+    ) -> None:
 
         left_has_missing = None
         right_has_missing = None
@@ -993,12 +992,11 @@ def _maybe_add_join_keys(
                             for level_name in result.index.names
                         ]
 
-                        result = result.set_index(idx_list, copy=False)
+                        result.set_index(idx_list, inplace=True)
                     else:
                         result.index = Index(key_col, name=name)
                 else:
                     result.insert(i, name or f"key_{i}", key_col)
-        return result
 
     def _get_join_indexers(self) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]:
         """return the join indexers"""
@@ -1771,8 +1769,7 @@ def get_result(self, copy: bool = True) -> DataFrame:
         result = self._reindex_and_concat(
             join_index, left_join_indexer, right_join_indexer, copy=copy
         )
-
-        result = self._maybe_add_join_keys(result, left_indexer, right_indexer)
+        self._maybe_add_join_keys(result, left_indexer, right_indexer)
 
         return result
 

diff --git a/pandas/io/parsers/arrow_parser_wrapper.py b/pandas/io/parsers/arrow_parser_wrapper.py
@@ -117,7 +117,7 @@ def _finalize_output(self, frame: DataFrame) -> DataFrame:
                     # String case
                     if item not in frame.columns:
                         raise ValueError(f"Index {item} invalid")
-            frame = frame.set_index(self.index_col, drop=True, copy=False)
+            frame.set_index(self.index_col, drop=True, inplace=True)
             # Clear names if headerless and no name given
             if self.header is None and not multi_index_named:
                 frame.index.names = [None] * len(frame.index.names)

diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
@@ -4673,7 +4673,7 @@ def read(
                     columns.insert(0, n)
         s = super().read(where=where, columns=columns, start=start, stop=stop)
         if is_multi_index:
-            s = s.set_index(self.levels, copy=False)
+            s.set_index(self.levels, inplace=True)
 
         s = s.iloc[:, 0]
 

diff --git a/pandas/io/sql.py b/pandas/io/sql.py
@@ -152,7 +152,7 @@ def _wrap_result(
     frame = _parse_date_columns(frame, parse_dates)
 
     if index_col is not None:
-        frame = frame.set_index(index_col, copy=False)
+        frame.set_index(index_col, inplace=True)
 
     return frame
 
@@ -980,7 +980,7 @@ def _query_iterator(
                 self._harmonize_columns(parse_dates=parse_dates)
 
                 if self.index is not None:
-                    self.frame = self.frame.set_index(self.index, copy=False)
+                    self.frame.set_index(self.index, inplace=True)
 
                 yield self.frame
 
@@ -1021,7 +1021,7 @@ def read(
             self._harmonize_columns(parse_dates=parse_dates)
 
             if self.index is not None:
-                self.frame = self.frame.set_index(self.index, copy=False)
+                self.frame.set_index(self.index, inplace=True)
 
             return self.frame
 

diff --git a/pandas/tests/frame/methods/test_combine_first.py b/pandas/tests/frame/methods/test_combine_first.py
@@ -394,12 +394,12 @@ def test_combine_first_string_dtype_only_na(self, nullable_string_dtype):
             PerformanceWarning,
             pa_version_under7p0 and nullable_string_dtype == "string[pyarrow]",
         ):
-            df = df.set_index(["a", "b"], copy=False)
+            df.set_index(["a", "b"], inplace=True)
         with tm.maybe_produces_warning(
             PerformanceWarning,
             pa_version_under7p0 and nullable_string_dtype == "string[pyarrow]",
         ):
-            df2 = df2.set_index(["a", "b"], copy=False)
+            df2.set_index(["a", "b"], inplace=True)
         result = df.combine_first(df2)
         with tm.maybe_produces_warning(
             PerformanceWarning,

diff --git a/pandas/tests/frame/methods/test_set_index.py b/pandas/tests/frame/methods/test_set_index.py
@@ -25,27 +25,6 @@
 
 
 class TestSetIndex:
-    def test_set_index_copy(self):
-        # GH#48043
-        df = DataFrame({"A": [1, 2], "B": [3, 4], "C": [5, 6]})
-        expected = DataFrame({"B": [3, 4], "C": [5, 6]}, index=Index([1, 2], name="A"))
-
-        res = df.set_index("A", copy=True)
-        tm.assert_frame_equal(res, expected)
-        assert not any(tm.shares_memory(df[col], res[col]) for col in res.columns)
-
-        res = df.set_index("A", copy=False)
-        tm.assert_frame_equal(res, expected)
-        assert all(tm.shares_memory(df[col], res[col]) for col in res.columns)
-
-        msg = "Cannot specify copy when inplace=True"
-        with pytest.raises(ValueError, match=msg):
-            with tm.assert_produces_warning(FutureWarning, match="The 'inplace'"):
-                df.set_index("A", inplace=True, copy=True)
-        with pytest.raises(ValueError, match=msg):
-            with tm.assert_produces_warning(FutureWarning, match="The 'inplace'"):
-                df.set_index("A", inplace=True, copy=False)
-
     def test_set_index_multiindex(self):
         # segfault in GH#3308
         d = {"t1": [2, 2.5, 3], "t2": [4, 5, 6]}
@@ -199,10 +178,7 @@ def test_set_index_drop_inplace(self, frame_of_index_cols, drop, inplace, keys):
 
         if inplace:
             result = df.copy()
-            with tm.assert_produces_warning(
-                FutureWarning, match="The 'inplace' keyword"
-            ):
-                return_value = result.set_index(keys, drop=drop, inplace=True)
+            return_value = result.set_index(keys, drop=drop, inplace=True)
             assert return_value is None
         else:
             result = df.set_index(keys, drop=drop)

diff --git a/pandas/tests/frame/test_api.py b/pandas/tests/frame/test_api.py
@@ -244,8 +244,7 @@ def _check_f(base, f):
 
         # set_index
         f = lambda x: x.set_index("a", inplace=True)
-        with tm.assert_produces_warning(FutureWarning, match="The 'inplace' keyword"):
-            _check_f(data.copy(), f)
+        _check_f(data.copy(), f)
 
         # reset_index
         f = lambda x: x.reset_index(inplace=True)

diff --git a/pandas/tests/frame/test_query_eval.py b/pandas/tests/frame/test_query_eval.py
@@ -436,8 +436,7 @@ def test_date_index_query(self):
         df = DataFrame(np.random.randn(n, 3))
         df["dates1"] = date_range("1/1/2012", periods=n)
         df["dates3"] = date_range("1/1/2014", periods=n)
-        with tm.assert_produces_warning(FutureWarning, match="The 'inplace' keyword"):
-            return_value = df.set_index("dates1", inplace=True, drop=True)
+        return_value = df.set_index("dates1", inplace=True, drop=True)
         assert return_value is None
         res = df.query("index < 20130101 < dates3", engine=engine, parser=parser)
         expec = df[(df.index < "20130101") & ("20130101" < df.dates3)]
@@ -450,8 +449,7 @@ def test_date_index_query_with_NaT(self):
         df["dates1"] = date_range("1/1/2012", periods=n)
         df["dates3"] = date_range("1/1/2014", periods=n)
         df.iloc[0, 0] = pd.NaT
-        with tm.assert_produces_warning(FutureWarning, match="The 'inplace' keyword"):
-            return_value = df.set_index("dates1", inplace=True, drop=True)
+        return_value = df.set_index("dates1", inplace=True, drop=True)
         assert return_value is None
         res = df.query("index < 20130101 < dates3", engine=engine, parser=parser)
         expec = df[(df.index < "20130101") & ("20130101" < df.dates3)]
@@ -465,8 +463,7 @@ def test_date_index_query_with_NaT_duplicates(self):
         d["dates3"] = date_range("1/1/2014", periods=n)
         df = DataFrame(d)
         df.loc[np.random.rand(n) > 0.5, "dates1"] = pd.NaT
-        with tm.assert_produces_warning(FutureWarning, match="The 'inplace' keyword"):
-            return_value = df.set_index("dates1", inplace=True, drop=True)
+        return_value = df.set_index("dates1", inplace=True, drop=True)
         assert return_value is None
         res = df.query("dates1 < 20130101 < dates3", engine=engine, parser=parser)
         expec = df[(df.index.to_series() < "20130101") & ("20130101" < df.dates3)]
@@ -797,8 +794,7 @@ def test_date_index_query(self):
         df = DataFrame(np.random.randn(n, 3))
         df["dates1"] = date_range("1/1/2012", periods=n)
         df["dates3"] = date_range("1/1/2014", periods=n)
-        with tm.assert_produces_warning(FutureWarning, match="The 'inplace' keyword"):
-            return_value = df.set_index("dates1", inplace=True, drop=True)
+        return_value = df.set_index("dates1", inplace=True, drop=True)
         assert return_value is None
         res = df.query(
             "(index < 20130101) & (20130101 < dates3)", engine=engine, parser=parser
@@ -813,8 +809,7 @@ def test_date_index_query_with_NaT(self):
         df["dates1"] = date_range("1/1/2012", periods=n)
         df["dates3"] = date_range("1/1/2014", periods=n)
         df.iloc[0, 0] = pd.NaT
-        with tm.assert_produces_warning(FutureWarning, match="The 'inplace' keyword"):
-            return_value = df.set_index("dates1", inplace=True, drop=True)
+        return_value = df.set_index("dates1", inplace=True, drop=True)
         assert return_value is None
         res = df.query(
             "(index < 20130101) & (20130101 < dates3)", engine=engine, parser=parser
@@ -829,8 +824,7 @@ def test_date_index_query_with_NaT_duplicates(self):
         df["dates1"] = date_range("1/1/2012", periods=n)
         df["dates3"] = date_range("1/1/2014", periods=n)
         df.loc[np.random.rand(n) > 0.5, "dates1"] = pd.NaT
-        with tm.assert_produces_warning(FutureWarning, match="The 'inplace' keyword"):
-            return_value = df.set_index("dates1", inplace=True, drop=True)
+        return_value = df.set_index("dates1", inplace=True, drop=True)
         assert return_value is None
         msg = r"'BoolOp' nodes are not implemented"
         with pytest.raises(NotImplementedError, match=msg):

diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py
@@ -678,7 +678,7 @@ def test_apply_groupby_datetimeindex():
     result = df.groupby("Name").sum()
 
     expected = DataFrame({"Name": ["A", "B", "C"], "Value": [10, 50, 90]})
-    expected = expected.set_index("Name", copy=False)
+    expected.set_index("Name", inplace=True)
 
     tm.assert_frame_equal(result, expected)
 

diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py
@@ -98,7 +98,7 @@ def test_builtins_apply(keys, f):
 
     if f != sum:
         expected = gb.agg(fname).reset_index()
-        expected = expected.set_index(keys, copy=False, drop=False)
+        expected.set_index(keys, inplace=True, drop=False)
         tm.assert_frame_equal(result, expected, check_dtype=False)
 
     tm.assert_series_equal(getattr(result, fname)(), getattr(df, fname)())
@@ -454,7 +454,7 @@ def test_groupby_non_arithmetic_agg_types(dtype, method, data):
     df_out = DataFrame(exp)
 
     df_out["b"] = df_out.b.astype(out_type)
-    df_out = df_out.set_index("a", copy=False)
+    df_out.set_index("a", inplace=True)
 
     grpd = df.groupby("a")
     t = getattr(grpd, method)(*data["args"])