diff --git a/doc/source/whatsnew/v0.18.0.txt b/doc/source/whatsnew/v0.18.0.txt index 4d35d7f833ddf..be1a62424dacc 100644 --- a/doc/source/whatsnew/v0.18.0.txt +++ b/doc/source/whatsnew/v0.18.0.txt @@ -468,9 +468,11 @@ Bug Fixes - Bug in ``to_numeric`` where it does not raise if input is more than one dimension (:issue:`11776`) - Bug in parsing timezone offset strings with non-zero minutes (:issue:`11708`) + - Bug in ``df.plot`` using incorrect colors for bar plots under matplotlib 1.5+ (:issue:`11614`) - Bug in the ``groupby`` ``plot`` method when using keyword arguments (:issue:`11805`). +- Bug in ``DataFrame.duplicated`` and ``drop_duplicates`` causing spurious matches when setting ``keep=False`` (:issue:`11864`) - Bug in ``.loc`` result with duplicated key may have ``Index`` with incorrect dtype (:issue:`11497`) - Bug in ``pd.rolling_median`` where memory allocation failed even with sufficient memory (:issue:`11696`) diff --git a/pandas/hashtable.pyx b/pandas/hashtable.pyx index dfa7930ada62f..58e9d64921e0d 100644 --- a/pandas/hashtable.pyx +++ b/pandas/hashtable.pyx @@ -1067,7 +1067,8 @@ def mode_int64(int64_t[:] values): @cython.boundscheck(False) def duplicated_int64(ndarray[int64_t, ndim=1] values, object keep='first'): cdef: - int ret = 0, value, k + int ret = 0, k + int64_t value Py_ssize_t i, n = len(values) kh_int64_t * table = kh_init_int64() ndarray[uint8_t, ndim=1, cast=True] out = np.empty(n, dtype='bool') diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index adb1b538fc2aa..960e931383310 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -8532,6 +8532,13 @@ def test_drop_duplicates(self): df = pd.DataFrame([[-x, x], [x, x + 4]]) assert_frame_equal(df.drop_duplicates(), df) + # GH 11864 + df = pd.DataFrame([i] * 9 for i in range(16)) + df = df.append([[1] + [0] * 8], ignore_index=True) + + for keep in ['first', 'last', False]: + assert_equal(df.duplicated(keep=keep).sum(), 0) + def test_drop_duplicates_for_take_all(self): df = DataFrame({'AAA': ['foo', 'bar', 'baz', 'bar', 'foo', 'bar', 'qux', 'foo'],