Skip to content

Commit

Permalink
DEPR: Index.reindex with duplicate index (#42568)
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel committed Aug 8, 2021
1 parent e042219 commit 8fb347f
Show file tree
Hide file tree
Showing 10 changed files with 49 additions and 16 deletions.
1 change: 1 addition & 0 deletions doc/source/user_guide/duplicates.rst
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ duplicates present. The output can't be determined, and so pandas raises.

.. ipython:: python
:okexcept:
:okwarning:
s1 = pd.Series([0, 1, 2], index=["a", "b", "b"])
s1.reindex(["a", "b", "c"])
Expand Down
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.4.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,7 @@ Deprecations
- Deprecated ignoring missing labels when indexing with a sequence of labels on a level of a MultiIndex (:issue:`42351`)
- Creating an empty Series without a dtype will now raise a more visible ``FutureWarning`` instead of a ``DeprecationWarning`` (:issue:`30017`)
- Deprecated the 'kind' argument in :meth:`Index.get_slice_bound`, :meth:`Index.slice_indexer`, :meth:`Index.slice_locs`; in a future version passing 'kind' will raise (:issue:`42857`)
- Deprecated :meth:`Index.reindex` with a non-unique index (:issue:`42568`)
-

.. ---------------------------------------------------------------------------
Expand Down
9 changes: 9 additions & 0 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -3915,6 +3915,15 @@ def reindex(
)
indexer, _ = self.get_indexer_non_unique(target)

if not self.is_unique:
# GH#42568
warnings.warn(
"reindexing with a non-unique Index is deprecated and "
"will raise in a future version",
FutureWarning,
stacklevel=2,
)

target = self._wrap_reindex_result(target, indexer, preserve_names)
return target, indexer

Expand Down
8 changes: 8 additions & 0 deletions pandas/core/indexes/category.py
Original file line number Diff line number Diff line change
Expand Up @@ -426,6 +426,14 @@ def reindex(
missing = np.array([], dtype=np.intp)
else:
indexer, missing = self.get_indexer_non_unique(target)
if not self.is_unique:
# GH#42568
warnings.warn(
"reindexing with a non-unique Index is deprecated and will "
"raise in a future version",
FutureWarning,
stacklevel=2,
)

if len(self) and indexer is not None:
new_target = self.take(indexer)
Expand Down
3 changes: 2 additions & 1 deletion pandas/tests/frame/indexing/test_getitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -301,7 +301,8 @@ def test_getitem_boolean_frame_unaligned_with_duplicate_columns(self, df_dup_col
df = df_dup_cols
msg = "cannot reindex on an axis with duplicate labels"
with pytest.raises(ValueError, match=msg):
df[df.A > 6]
with tm.assert_produces_warning(FutureWarning, match="non-unique"):
df[df.A > 6]

def test_getitem_boolean_series_with_duplicate_columns(self, df_dup_cols):
# boolean indexing
Expand Down
3 changes: 2 additions & 1 deletion pandas/tests/frame/indexing/test_setitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,8 @@ def test_setitem_error_msmgs(self):
)
msg = "cannot reindex on an axis with duplicate labels"
with pytest.raises(ValueError, match=msg):
df["newcol"] = ser
with tm.assert_produces_warning(FutureWarning, match="non-unique"):
df["newcol"] = ser

# GH 4107, more descriptive error message
df = DataFrame(np.random.randint(0, 2, (4, 4)), columns=["a", "b", "c", "d"])
Expand Down
12 changes: 8 additions & 4 deletions pandas/tests/frame/methods/test_reindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -674,7 +674,8 @@ def test_reindex_dups(self):
# reindex fails
msg = "cannot reindex on an axis with duplicate labels"
with pytest.raises(ValueError, match=msg):
df.reindex(index=list(range(len(df))))
with tm.assert_produces_warning(FutureWarning, match="non-unique"):
df.reindex(index=list(range(len(df))))

def test_reindex_with_duplicate_columns(self):

Expand All @@ -684,9 +685,11 @@ def test_reindex_with_duplicate_columns(self):
)
msg = "cannot reindex on an axis with duplicate labels"
with pytest.raises(ValueError, match=msg):
df.reindex(columns=["bar"])
with tm.assert_produces_warning(FutureWarning, match="non-unique"):
df.reindex(columns=["bar"])
with pytest.raises(ValueError, match=msg):
df.reindex(columns=["bar", "foo"])
with tm.assert_produces_warning(FutureWarning, match="non-unique"):
df.reindex(columns=["bar", "foo"])

def test_reindex_axis_style(self):
# https://github.com/pandas-dev/pandas/issues/12392
Expand Down Expand Up @@ -958,7 +961,8 @@ def test_reindex_with_categoricalindex(self):
# passed duplicate indexers are not allowed
msg = "cannot reindex on an axis with duplicate labels"
with pytest.raises(ValueError, match=msg):
df2.reindex(["a", "b"])
with tm.assert_produces_warning(FutureWarning, match="non-unique"):
df2.reindex(["a", "b"])

# args NotImplemented ATM
msg = r"argument {} is not implemented for CategoricalIndex\.reindex"
Expand Down
22 changes: 14 additions & 8 deletions pandas/tests/indexes/categorical/test_reindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,26 +13,32 @@

class TestReindex:
def test_reindex_dtype(self):
c = CategoricalIndex(["a", "b", "c", "a"])
res, indexer = c.reindex(["a", "c"])
# GH#11586
ci = CategoricalIndex(["a", "b", "c", "a"])
with tm.assert_produces_warning(FutureWarning, match="non-unique"):
res, indexer = ci.reindex(["a", "c"])

tm.assert_index_equal(res, Index(["a", "a", "c"]), exact=True)
tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2], dtype=np.intp))

c = CategoricalIndex(["a", "b", "c", "a"])
res, indexer = c.reindex(Categorical(["a", "c"]))
ci = CategoricalIndex(["a", "b", "c", "a"])
with tm.assert_produces_warning(FutureWarning, match="non-unique"):
res, indexer = ci.reindex(Categorical(["a", "c"]))

exp = CategoricalIndex(["a", "a", "c"], categories=["a", "c"])
tm.assert_index_equal(res, exp, exact=True)
tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2], dtype=np.intp))

c = CategoricalIndex(["a", "b", "c", "a"], categories=["a", "b", "c", "d"])
res, indexer = c.reindex(["a", "c"])
ci = CategoricalIndex(["a", "b", "c", "a"], categories=["a", "b", "c", "d"])
with tm.assert_produces_warning(FutureWarning, match="non-unique"):
res, indexer = ci.reindex(["a", "c"])
exp = Index(["a", "a", "c"], dtype="object")
tm.assert_index_equal(res, exp, exact=True)
tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2], dtype=np.intp))

c = CategoricalIndex(["a", "b", "c", "a"], categories=["a", "b", "c", "d"])
res, indexer = c.reindex(Categorical(["a", "c"]))
ci = CategoricalIndex(["a", "b", "c", "a"], categories=["a", "b", "c", "d"])
with tm.assert_produces_warning(FutureWarning, match="non-unique"):
res, indexer = ci.reindex(Categorical(["a", "c"]))
exp = CategoricalIndex(["a", "a", "c"], categories=["a", "c"])
tm.assert_index_equal(res, exp, exact=True)
tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2], dtype=np.intp))
Expand Down
3 changes: 2 additions & 1 deletion pandas/tests/indexes/multi/test_reindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,8 @@ def test_reindex_non_unique():

msg = "cannot handle a non-unique multi-index!"
with pytest.raises(ValueError, match=msg):
a.reindex(new_idx)
with tm.assert_produces_warning(FutureWarning, match="non-unique"):
a.reindex(new_idx)


@pytest.mark.parametrize("values", [[["a"], ["x"]], [[], []]])
Expand Down
3 changes: 2 additions & 1 deletion pandas/tests/resample/test_datetime_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -694,7 +694,8 @@ def test_asfreq_non_unique():

msg = "cannot reindex on an axis with duplicate labels"
with pytest.raises(ValueError, match=msg):
ts.asfreq("B")
with tm.assert_produces_warning(FutureWarning, match="non-unique"):
ts.asfreq("B")


def test_resample_axis1():
Expand Down

0 comments on commit 8fb347f

Please sign in to comment.