From fc4849a74bf8e80227634d34536faace79f8c3a2 Mon Sep 17 00:00:00 2001 From: dhirschfeld Date: Tue, 5 Dec 2017 22:24:06 +1000 Subject: [PATCH 1/8] Fixed handling of boolean indexing with 2-d ndarrays Fixes #18582 --- pandas/core/frame.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 12a4a7fdaedad..faf9f2673b0ba 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2532,10 +2532,10 @@ def __setitem__(self, key, value): if indexer is not None: return self._setitem_slice(indexer, value) - if isinstance(key, (Series, np.ndarray, list, Index)): - self._setitem_array(key, value) - elif isinstance(key, DataFrame): + if isinstance(key, DataFrame) or getattr(key, 'ndim', None) == 2: self._setitem_frame(key, value) + elif isinstance(key, (Series, np.ndarray, list, Index)): + self._setitem_array(key, value) else: # set column self._set_item(key, value) @@ -2568,8 +2568,17 @@ def _setitem_array(self, key, value): def _setitem_frame(self, key, value): # support boolean setting with DataFrame input, e.g. # df[df > df2] = 0 + if isinstance(key, np.ndarray): + if key.shape != self.shape: + raise ValueError( + 'Array conditional must be same shape as self' + ) + key = self._constructor(key, **self._construct_axes_dict()) + if key.values.size and not is_bool_dtype(key.values): - raise TypeError('Must pass DataFrame with boolean values only') + raise TypeError( + 'Must pass DataFrame or 2-d ndarray with boolean values only' + ) self._check_inplace_setting(value) self._check_setitem_copy() From a8a4ea692b89aaee9196d326e069b3ac931d82ee Mon Sep 17 00:00:00 2001 From: dhirschfeld Date: Wed, 6 Dec 2017 18:35:38 +1000 Subject: [PATCH 2/8] Added test for 2-d ndarray boolean indexing --- pandas/tests/frame/test_indexing.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py index 62bc0eada9d89..d928585a270c3 100644 --- a/pandas/tests/frame/test_indexing.py +++ b/pandas/tests/frame/test_indexing.py @@ -524,9 +524,8 @@ def test_setitem_boolean(self): values[values == 2] = 3 assert_almost_equal(df.values, values) - with tm.assert_raises_regex(TypeError, 'Must pass ' - 'DataFrame with ' - 'boolean values only'): + msg = "Must pass DataFrame or 2-d ndarray with boolean values only" + with tm.assert_raises_regex(TypeError, msg): df[df * 0] = 2 # index with DataFrame @@ -542,6 +541,16 @@ def test_setitem_boolean(self): np.putmask(expected.values, mask.values, df.values * 2) assert_frame_equal(df, expected) + def test_setitem_boolean_ndarary(self): + df = self.frame.copy() + mask = df > np.abs(df) + expected = df.copy() + expected.values[mask.values] = nan + # index with 2-d boolean ndarray + actual = df.copy() + actual[mask.values] = nan + assert_frame_equal(actual, expected) + def test_setitem_cast(self): self.frame['D'] = self.frame['D'].astype('i8') assert self.frame['D'].dtype == np.int64 From 7c47379200950df4f589d3178f334545d98a9957 Mon Sep 17 00:00:00 2001 From: dhirschfeld Date: Sun, 10 Dec 2017 12:17:42 +1000 Subject: [PATCH 3/8] Address review comments --- pandas/tests/frame/test_indexing.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py index d928585a270c3..a2c80c4bcf8e4 100644 --- a/pandas/tests/frame/test_indexing.py +++ b/pandas/tests/frame/test_indexing.py @@ -542,15 +542,23 @@ def test_setitem_boolean(self): assert_frame_equal(df, expected) def test_setitem_boolean_ndarary(self): + # Test for issue #18582 + df = self.frame.copy() mask = df > np.abs(df) expected = df.copy() expected.values[mask.values] = nan + # index with 2-d boolean ndarray actual = df.copy() actual[mask.values] = nan assert_frame_equal(actual, expected) + # index with boolean DataFrame + actual = df.copy() + actual[mask] = nan + assert_frame_equal(actual, expected) + def test_setitem_cast(self): self.frame['D'] = self.frame['D'].astype('i8') assert self.frame['D'].dtype == np.int64 From d3d731f2546e318bcae41199a2a34734b41c4358 Mon Sep 17 00:00:00 2001 From: dhirschfeld Date: Sat, 30 Dec 2017 16:00:59 +1000 Subject: [PATCH 4/8] Parameterized test_setitem_boolean_mask --- pandas/tests/frame/test_indexing.py | 39 ++++++++++++++++++++--------- 1 file changed, 27 insertions(+), 12 deletions(-) diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py index a2c80c4bcf8e4..ded501858bd64 100644 --- a/pandas/tests/frame/test_indexing.py +++ b/pandas/tests/frame/test_indexing.py @@ -38,6 +38,22 @@ from pandas.tests.frame.common import TestData +@pytest.fixture(scope='module', + params=['dataframe', 'ndarray']) +def f_get_mask(request): + from functools import partial + + def get_mask(df, mask_type): + if mask_type == 'dataframe': + return df > np.abs(df) + elif mask_type == 'ndarray': + return (df > np.abs(df)).values + else: + raise ValueError("Unknown `mask_type` '{}'".format(mask_type)) + + return partial(get_mask, mask_type=request.param) + + class TestDataFrameIndexing(TestData): def test_getitem(self): @@ -541,22 +557,21 @@ def test_setitem_boolean(self): np.putmask(expected.values, mask.values, df.values * 2) assert_frame_equal(df, expected) - def test_setitem_boolean_ndarary(self): + def test_setitem_boolean_mask(self, f_get_mask): # Test for issue #18582 - df = self.frame.copy() - mask = df > np.abs(df) - expected = df.copy() - expected.values[mask.values] = nan - - # index with 2-d boolean ndarray - actual = df.copy() - actual[mask.values] = nan - assert_frame_equal(actual, expected) - - # index with boolean DataFrame + mask = f_get_mask(df) + + # index with boolean mask actual = df.copy() actual[mask] = nan + + # compare against ndarray boolean indexing + if isinstance(mask, pd.DataFrame): + mask = mask.values + + expected = df.copy() + expected.values[mask] = nan assert_frame_equal(actual, expected) def test_setitem_cast(self): From 96047f33e363679d0b1248f89f900a6eb845427f Mon Sep 17 00:00:00 2001 From: dhirschfeld Date: Sat, 30 Dec 2017 16:17:03 +1000 Subject: [PATCH 5/8] Add whatsnew entry --- doc/source/whatsnew/v0.23.0.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 0301bf0a23dd5..430637ac6d384 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -314,6 +314,7 @@ Indexing - :func:`DatetimeIndex.to_series` now accepts ``index`` and ``name`` kwargs (:issue:`18699`) - Bug in indexing non-scalar value from ``Series`` having non-unique ``Index`` will return value flattened (:issue:`17610`) - Bug in :func:`DatetimeIndex.insert` where inserting ``NaT`` into a timezone-aware index incorrectly raised (:issue:`16357`) +- Bug in ``__setitem__`` when indexing a :class:`DataFrame` with a 2-d boolean ndarray (:issue:`18582`) I/O From 60fe96fec53d57eed8f4392fe3da1fdf7371f103 Mon Sep 17 00:00:00 2001 From: dhirschfeld Date: Sat, 30 Dec 2017 17:56:10 +1000 Subject: [PATCH 6/8] Fix lint errors --- pandas/tests/frame/test_indexing.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py index ded501858bd64..12bdcd5873bb9 100644 --- a/pandas/tests/frame/test_indexing.py +++ b/pandas/tests/frame/test_indexing.py @@ -50,7 +50,7 @@ def get_mask(df, mask_type): return (df > np.abs(df)).values else: raise ValueError("Unknown `mask_type` '{}'".format(mask_type)) - + return partial(get_mask, mask_type=request.param) @@ -561,7 +561,7 @@ def test_setitem_boolean_mask(self, f_get_mask): # Test for issue #18582 df = self.frame.copy() mask = f_get_mask(df) - + # index with boolean mask actual = df.copy() actual[mask] = nan From 5072dd079d702fc5c36345d376027262774ce70c Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sat, 30 Dec 2017 07:51:48 -0500 Subject: [PATCH 7/8] fixup paramaterization --- pandas/tests/frame/test_indexing.py | 38 +++++++++-------------------- 1 file changed, 12 insertions(+), 26 deletions(-) diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py index 12bdcd5873bb9..430e653ed8e96 100644 --- a/pandas/tests/frame/test_indexing.py +++ b/pandas/tests/frame/test_indexing.py @@ -38,22 +38,6 @@ from pandas.tests.frame.common import TestData -@pytest.fixture(scope='module', - params=['dataframe', 'ndarray']) -def f_get_mask(request): - from functools import partial - - def get_mask(df, mask_type): - if mask_type == 'dataframe': - return df > np.abs(df) - elif mask_type == 'ndarray': - return (df > np.abs(df)).values - else: - raise ValueError("Unknown `mask_type` '{}'".format(mask_type)) - - return partial(get_mask, mask_type=request.param) - - class TestDataFrameIndexing(TestData): def test_getitem(self): @@ -557,22 +541,24 @@ def test_setitem_boolean(self): np.putmask(expected.values, mask.values, df.values * 2) assert_frame_equal(df, expected) - def test_setitem_boolean_mask(self, f_get_mask): + @pytest.mark.parametrize( + "mask_type", + [lambda df: df > np.abs(df) / 2, + lambda df: (df > np.abs(df) / 2).values], + ids=['dataframe', 'array']) + def test_setitem_boolean_mask(self, mask_type): + # Test for issue #18582 df = self.frame.copy() - mask = f_get_mask(df) + mask = mask_type(df) # index with boolean mask - actual = df.copy() - actual[mask] = nan - - # compare against ndarray boolean indexing - if isinstance(mask, pd.DataFrame): - mask = mask.values + result = df.copy() + result[mask] = np.nan expected = df.copy() - expected.values[mask] = nan - assert_frame_equal(actual, expected) + expected.values[mask] = np.nan + assert_frame_equal(result, expected) def test_setitem_cast(self): self.frame['D'] = self.frame['D'].astype('i8') From c2da170405ea0eb7cbc1225f3abc7b94d0108775 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sat, 30 Dec 2017 09:23:43 -0500 Subject: [PATCH 8/8] fix for older numpies --- pandas/tests/frame/test_indexing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py index 430e653ed8e96..882fa634d167d 100644 --- a/pandas/tests/frame/test_indexing.py +++ b/pandas/tests/frame/test_indexing.py @@ -557,7 +557,7 @@ def test_setitem_boolean_mask(self, mask_type): result[mask] = np.nan expected = df.copy() - expected.values[mask] = np.nan + expected.values[np.array(mask)] = np.nan assert_frame_equal(result, expected) def test_setitem_cast(self):