Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DEPR: inplace kwarg in set_index #48115

Merged
merged 3 commits into from
Aug 17, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
5 changes: 2 additions & 3 deletions doc/source/user_guide/indexing.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1723,13 +1723,12 @@ the given columns to a MultiIndex:
frame
Other options in ``set_index`` allow you not drop the index columns or to add
the index in-place (without creating a new object):
the index without creating a copy of the underlying data:

.. ipython:: python
data.set_index('c', drop=False)
data.set_index(['a', 'b'], inplace=True)
data
data.set_index(['a', 'b'], copy=False)
Reset the index
~~~~~~~~~~~~~~~
Expand Down
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.5.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -849,6 +849,7 @@ Other Deprecations
- Deprecated unused arguments ``encoding`` and ``verbose`` in :meth:`Series.to_excel` and :meth:`DataFrame.to_excel` (:issue:`47912`)
- Deprecated producing a single element when iterating over a :class:`DataFrameGroupBy` or a :class:`SeriesGroupBy` that has been grouped by a list of length 1; A tuple of length one will be returned instead (:issue:`42795`)
- Fixed up warning message of deprecation of :meth:`MultiIndex.lesort_depth` as public method, as the message previously referred to :meth:`MultiIndex.is_lexsorted` instead (:issue:`38701`)
- Deprecated the ``inplace`` keyword in :meth:`DataFrame.set_index`, use ``df = df.set_index(..., copy=False)`` instead (:issue:`48115`)
- Deprecated the ``sort_columns`` argument in :meth:`DataFrame.plot` and :meth:`Series.plot` (:issue:`47563`).

.. ---------------------------------------------------------------------------
Expand Down
20 changes: 17 additions & 3 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -5818,7 +5818,7 @@ def set_index(
*,
drop: bool = ...,
append: bool = ...,
inplace: Literal[False] = ...,
inplace: Literal[False] | lib.NoDefault = ...,
verify_integrity: bool = ...,
copy: bool | lib.NoDefault = ...,
) -> DataFrame:
Expand All @@ -5843,7 +5843,7 @@ def set_index(
keys,
drop: bool = True,
append: bool = False,
inplace: bool = False,
inplace: bool | lib.NoDefault = lib.no_default,
verify_integrity: bool = False,
copy: bool | lib.NoDefault = lib.no_default,
) -> DataFrame | None:
Expand All @@ -5868,6 +5868,9 @@ def set_index(
Whether to append columns to existing index.
inplace : bool, default False
Whether to modify the DataFrame rather than creating a new one.

.. deprecated:: 1.5.0

verify_integrity : bool, default False
Check the new index for duplicates. Otherwise defer the check until
necessary. Setting to False will improve the performance of this
Expand Down Expand Up @@ -5941,7 +5944,18 @@ def set_index(
3 9 7 2013 84
4 16 10 2014 31
"""
inplace = validate_bool_kwarg(inplace, "inplace")
if inplace is not lib.no_default:
inplace = validate_bool_kwarg(inplace, "inplace")
warnings.warn(
"The 'inplace' keyword in DataFrame.set_index is deprecated "
"and will be removed in a future version. Use "
"`df = df.set_index(..., copy=False)` instead.",
FutureWarning,
stacklevel=find_stack_level(inspect.currentframe()),
)
else:
inplace = False

if inplace:
if copy is not lib.no_default:
raise ValueError("Cannot specify copy when inplace=True")
Expand Down
19 changes: 11 additions & 8 deletions pandas/core/reshape/merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -782,9 +782,9 @@ def get_result(self, copy: bool = True) -> DataFrame:
if self.indicator:
result = self._indicator_post_merge(result)

self._maybe_add_join_keys(result, left_indexer, right_indexer)
result = self._maybe_add_join_keys(result, left_indexer, right_indexer)

self._maybe_restore_index_levels(result)
result = self._maybe_restore_index_levels(result)

self._maybe_drop_cross_column(result, self._cross)

Expand Down Expand Up @@ -851,7 +851,7 @@ def _indicator_post_merge(self, result: DataFrame) -> DataFrame:
result = result.drop(labels=["_left_indicator", "_right_indicator"], axis=1)
return result

def _maybe_restore_index_levels(self, result: DataFrame) -> None:
def _maybe_restore_index_levels(self, result: DataFrame) -> DataFrame:
"""
Restore index levels specified as `on` parameters

Expand All @@ -869,7 +869,7 @@ def _maybe_restore_index_levels(self, result: DataFrame) -> None:

Returns
-------
None
DataFrame
"""
names_to_restore = []
for name, left_key, right_key in zip(
Expand All @@ -893,14 +893,15 @@ def _maybe_restore_index_levels(self, result: DataFrame) -> None:
names_to_restore.append(name)

if names_to_restore:
result.set_index(names_to_restore, inplace=True)
result = result.set_index(names_to_restore, copy=False)
return result

def _maybe_add_join_keys(
self,
result: DataFrame,
left_indexer: np.ndarray | None,
right_indexer: np.ndarray | None,
) -> None:
) -> DataFrame:

left_has_missing = None
right_has_missing = None
Expand Down Expand Up @@ -996,11 +997,12 @@ def _maybe_add_join_keys(
for level_name in result.index.names
]

result.set_index(idx_list, inplace=True)
result = result.set_index(idx_list, copy=False)
else:
result.index = Index(key_col, name=name)
else:
result.insert(i, name or f"key_{i}", key_col)
return result

def _get_join_indexers(self) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]:
"""return the join indexers"""
Expand Down Expand Up @@ -1768,7 +1770,8 @@ def get_result(self, copy: bool = True) -> DataFrame:
result = self._reindex_and_concat(
join_index, left_join_indexer, right_join_indexer, copy=copy
)
self._maybe_add_join_keys(result, left_indexer, right_indexer)

result = self._maybe_add_join_keys(result, left_indexer, right_indexer)

return result

Expand Down
2 changes: 1 addition & 1 deletion pandas/io/parsers/arrow_parser_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ def _finalize_output(self, frame: DataFrame) -> DataFrame:
# String case
if item not in frame.columns:
raise ValueError(f"Index {item} invalid")
frame.set_index(self.index_col, drop=True, inplace=True)
frame = frame.set_index(self.index_col, drop=True, copy=False)
# Clear names if headerless and no name given
if self.header is None and not multi_index_named:
frame.index.names = [None] * len(frame.index.names)
Expand Down
2 changes: 1 addition & 1 deletion pandas/io/pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -4663,7 +4663,7 @@ def read(
columns.insert(0, n)
s = super().read(where=where, columns=columns, start=start, stop=stop)
if is_multi_index:
s.set_index(self.levels, inplace=True)
s = s.set_index(self.levels, copy=False)

s = s.iloc[:, 0]

Expand Down
6 changes: 3 additions & 3 deletions pandas/io/sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ def _wrap_result(
frame = _parse_date_columns(frame, parse_dates)

if index_col is not None:
frame.set_index(index_col, inplace=True)
frame = frame.set_index(index_col, copy=False)

return frame

Expand Down Expand Up @@ -979,7 +979,7 @@ def _query_iterator(
self._harmonize_columns(parse_dates=parse_dates)

if self.index is not None:
self.frame.set_index(self.index, inplace=True)
self.frame = self.frame.set_index(self.index, copy=False)

yield self.frame

Expand Down Expand Up @@ -1020,7 +1020,7 @@ def read(
self._harmonize_columns(parse_dates=parse_dates)

if self.index is not None:
self.frame.set_index(self.index, inplace=True)
self.frame = self.frame.set_index(self.index, copy=False)

return self.frame

Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/frame/methods/test_combine_first.py
Original file line number Diff line number Diff line change
Expand Up @@ -387,8 +387,8 @@ def test_combine_first_string_dtype_only_na(self, nullable_string_dtype):
{"a": ["962", "85"], "b": [pd.NA] * 2}, dtype=nullable_string_dtype
)
df2 = DataFrame({"a": ["85"], "b": [pd.NA]}, dtype=nullable_string_dtype)
df.set_index(["a", "b"], inplace=True)
df2.set_index(["a", "b"], inplace=True)
df = df.set_index(["a", "b"], copy=False)
df2 = df2.set_index(["a", "b"], copy=False)
result = df.combine_first(df2)
expected = DataFrame(
{"a": ["962", "85"], "b": [pd.NA] * 2}, dtype=nullable_string_dtype
Expand Down
11 changes: 8 additions & 3 deletions pandas/tests/frame/methods/test_set_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,11 @@ def test_set_index_copy(self):

msg = "Cannot specify copy when inplace=True"
with pytest.raises(ValueError, match=msg):
df.set_index("A", inplace=True, copy=True)
with tm.assert_produces_warning(FutureWarning, match="The 'inplace'"):
df.set_index("A", inplace=True, copy=True)
with pytest.raises(ValueError, match=msg):
df.set_index("A", inplace=True, copy=False)
with tm.assert_produces_warning(FutureWarning, match="The 'inplace'"):
df.set_index("A", inplace=True, copy=False)

def test_set_index_multiindex(self):
# segfault in GH#3308
Expand Down Expand Up @@ -197,7 +199,10 @@ def test_set_index_drop_inplace(self, frame_of_index_cols, drop, inplace, keys):

if inplace:
result = df.copy()
return_value = result.set_index(keys, drop=drop, inplace=True)
with tm.assert_produces_warning(
FutureWarning, match="The 'inplace' keyword"
):
return_value = result.set_index(keys, drop=drop, inplace=True)
assert return_value is None
else:
result = df.set_index(keys, drop=drop)
Expand Down
3 changes: 2 additions & 1 deletion pandas/tests/frame/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,8 @@ def _check_f(base, f):

# set_index
f = lambda x: x.set_index("a", inplace=True)
_check_f(data.copy(), f)
with tm.assert_produces_warning(FutureWarning, match="The 'inplace' keyword"):
_check_f(data.copy(), f)

# reset_index
f = lambda x: x.reset_index(inplace=True)
Expand Down
18 changes: 12 additions & 6 deletions pandas/tests/frame/test_query_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -436,7 +436,8 @@ def test_date_index_query(self):
df = DataFrame(np.random.randn(n, 3))
df["dates1"] = date_range("1/1/2012", periods=n)
df["dates3"] = date_range("1/1/2014", periods=n)
return_value = df.set_index("dates1", inplace=True, drop=True)
with tm.assert_produces_warning(FutureWarning, match="The 'inplace' keyword"):
return_value = df.set_index("dates1", inplace=True, drop=True)
assert return_value is None
res = df.query("index < 20130101 < dates3", engine=engine, parser=parser)
expec = df[(df.index < "20130101") & ("20130101" < df.dates3)]
Expand All @@ -449,7 +450,8 @@ def test_date_index_query_with_NaT(self):
df["dates1"] = date_range("1/1/2012", periods=n)
df["dates3"] = date_range("1/1/2014", periods=n)
df.iloc[0, 0] = pd.NaT
return_value = df.set_index("dates1", inplace=True, drop=True)
with tm.assert_produces_warning(FutureWarning, match="The 'inplace' keyword"):
return_value = df.set_index("dates1", inplace=True, drop=True)
assert return_value is None
res = df.query("index < 20130101 < dates3", engine=engine, parser=parser)
expec = df[(df.index < "20130101") & ("20130101" < df.dates3)]
Expand All @@ -463,7 +465,8 @@ def test_date_index_query_with_NaT_duplicates(self):
d["dates3"] = date_range("1/1/2014", periods=n)
df = DataFrame(d)
df.loc[np.random.rand(n) > 0.5, "dates1"] = pd.NaT
return_value = df.set_index("dates1", inplace=True, drop=True)
with tm.assert_produces_warning(FutureWarning, match="The 'inplace' keyword"):
return_value = df.set_index("dates1", inplace=True, drop=True)
assert return_value is None
res = df.query("dates1 < 20130101 < dates3", engine=engine, parser=parser)
expec = df[(df.index.to_series() < "20130101") & ("20130101" < df.dates3)]
Expand Down Expand Up @@ -794,7 +797,8 @@ def test_date_index_query(self):
df = DataFrame(np.random.randn(n, 3))
df["dates1"] = date_range("1/1/2012", periods=n)
df["dates3"] = date_range("1/1/2014", periods=n)
return_value = df.set_index("dates1", inplace=True, drop=True)
with tm.assert_produces_warning(FutureWarning, match="The 'inplace' keyword"):
return_value = df.set_index("dates1", inplace=True, drop=True)
assert return_value is None
res = df.query(
"(index < 20130101) & (20130101 < dates3)", engine=engine, parser=parser
Expand All @@ -809,7 +813,8 @@ def test_date_index_query_with_NaT(self):
df["dates1"] = date_range("1/1/2012", periods=n)
df["dates3"] = date_range("1/1/2014", periods=n)
df.iloc[0, 0] = pd.NaT
return_value = df.set_index("dates1", inplace=True, drop=True)
with tm.assert_produces_warning(FutureWarning, match="The 'inplace' keyword"):
return_value = df.set_index("dates1", inplace=True, drop=True)
assert return_value is None
res = df.query(
"(index < 20130101) & (20130101 < dates3)", engine=engine, parser=parser
Expand All @@ -824,7 +829,8 @@ def test_date_index_query_with_NaT_duplicates(self):
df["dates1"] = date_range("1/1/2012", periods=n)
df["dates3"] = date_range("1/1/2014", periods=n)
df.loc[np.random.rand(n) > 0.5, "dates1"] = pd.NaT
return_value = df.set_index("dates1", inplace=True, drop=True)
with tm.assert_produces_warning(FutureWarning, match="The 'inplace' keyword"):
return_value = df.set_index("dates1", inplace=True, drop=True)
assert return_value is None
msg = r"'BoolOp' nodes are not implemented"
with pytest.raises(NotImplementedError, match=msg):
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/groupby/test_apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -678,7 +678,7 @@ def test_apply_groupby_datetimeindex():
result = df.groupby("Name").sum()

expected = DataFrame({"Name": ["A", "B", "C"], "Value": [10, 50, 90]})
expected.set_index("Name", inplace=True)
expected = expected.set_index("Name", copy=False)

tm.assert_frame_equal(result, expected)

Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/groupby/test_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ def test_builtins_apply(keys, f):

if f != sum:
expected = gb.agg(fname).reset_index()
expected.set_index(keys, inplace=True, drop=False)
expected = expected.set_index(keys, copy=False, drop=False)
tm.assert_frame_equal(result, expected, check_dtype=False)

tm.assert_series_equal(getattr(result, fname)(), getattr(df, fname)())
Expand Down Expand Up @@ -454,7 +454,7 @@ def test_groupby_non_arithmetic_agg_types(dtype, method, data):
df_out = DataFrame(exp)

df_out["b"] = df_out.b.astype(out_type)
df_out.set_index("a", inplace=True)
df_out = df_out.set_index("a", copy=False)

grpd = df.groupby("a")
t = getattr(grpd, method)(*data["args"])
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/indexes/multi/test_reshape.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def test_insert(idx):
idx.insert(0, ("foo2",))

left = pd.DataFrame([["a", "b", 0], ["b", "d", 1]], columns=["1st", "2nd", "3rd"])
left.set_index(["1st", "2nd"], inplace=True)
left = left.set_index(["1st", "2nd"], copy=False)
ts = left["3rd"].copy(deep=True)

left.loc[("b", "x"), "3rd"] = 2
Expand Down Expand Up @@ -65,7 +65,7 @@ def test_insert(idx):
],
columns=["1st", "2nd", "3rd"],
)
right.set_index(["1st", "2nd"], inplace=True)
right = right.set_index(["1st", "2nd"], copy=False)
# FIXME data types changes to float because
# of intermediate nan insertion;
tm.assert_frame_equal(left, right, check_dtype=False)
Expand Down
7 changes: 5 additions & 2 deletions pandas/tests/indexing/multiindex/test_indexing_slow.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,15 +60,18 @@ def validate(mi, df, key):
assert key[: i + 1] in mi.index
right = df[mask].copy()

msg = "The 'inplace' keyword in DataFrame.set_index is deprecated"
if i + 1 != len(key): # partial key
return_value = right.drop(cols[: i + 1], axis=1, inplace=True)
assert return_value is None
return_value = right.set_index(cols[i + 1 : -1], inplace=True)
with tm.assert_produces_warning(FutureWarning, match=msg):
return_value = right.set_index(cols[i + 1 : -1], inplace=True)
assert return_value is None
tm.assert_frame_equal(mi.loc[key[: i + 1]], right)

else: # full key
return_value = right.set_index(cols[:-1], inplace=True)
with tm.assert_produces_warning(FutureWarning, match=msg):
return_value = right.set_index(cols[:-1], inplace=True)
assert return_value is None
if len(right) == 1: # single hit
right = Series(
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/indexing/multiindex/test_multiindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ def test_multiindex_complex(self):
"z": non_complex_data,
}
)
result.set_index(["x", "y"], inplace=True)
result = result.set_index(["x", "y"], copy=False)
expected = DataFrame(
{"z": non_complex_data},
index=MultiIndex.from_arrays(
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/io/pytables/test_append.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ def test_append_series(setup_path):
mi["B"] = np.arange(len(mi))
mi["C"] = "foo"
mi.loc[3:5, "C"] = "bar"
mi.set_index(["C", "B"], inplace=True)
mi = mi.set_index(["C", "B"], copy=False)
s = mi.stack()
s.index = s.index.droplevel(2)
store.append("mi", s)
Expand Down Expand Up @@ -326,7 +326,7 @@ def test_append_with_different_block_ordering(setup_path):
a = df.pop("A")
df["A"] = a

df.set_index("index", inplace=True)
df = df.set_index("index", copy=False)

store.append("df", df)

Expand Down