From 75c915f9c72394c24fd96c936f5aee1f49e17ff2 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Fri, 31 Jan 2020 23:13:15 -0600 Subject: [PATCH 01/58] Add test --- pandas/tests/arrays/test_boolean.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/pandas/tests/arrays/test_boolean.py b/pandas/tests/arrays/test_boolean.py index cb9b07db4a0df..d5b9a5b0a3f10 100644 --- a/pandas/tests/arrays/test_boolean.py +++ b/pandas/tests/arrays/test_boolean.py @@ -929,3 +929,13 @@ def test_diff(): result = s.diff() expected = pd.Series(expected) tm.assert_series_equal(result, expected) + + +def test_nullable_boolean_filter(): + s = pd.Series([1, 2, 3]) + mask = pd.array([True, True, None], dtype="boolean") + + result = s[mask] + expected = s.iloc[:2] + + tm.assert_series_equal(result, expected) From 9c5b9f06f54e570134a846dfde488c5c7b720671 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Sun, 2 Feb 2020 17:53:52 -0600 Subject: [PATCH 02/58] Remove test that checks for error --- pandas/tests/indexing/test_na_indexing.py | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/pandas/tests/indexing/test_na_indexing.py b/pandas/tests/indexing/test_na_indexing.py index befe4fee8ecf8..f1ccaacfec049 100644 --- a/pandas/tests/indexing/test_na_indexing.py +++ b/pandas/tests/indexing/test_na_indexing.py @@ -59,21 +59,3 @@ def test_series_mask_boolean(values, dtype, mask, box_mask, frame): result = ser.loc[mask] tm.assert_equal(result, expected) - - -@pytest.mark.parametrize("frame", [True, False]) -def test_indexing_with_na_raises(frame): - s = pd.Series([1, 2, 3], name="name") - - if frame: - s = s.to_frame() - mask = pd.array([True, False, None], dtype="boolean") - match = "cannot mask with array containing NA / NaN values" - with pytest.raises(ValueError, match=match): - s[mask] - - with pytest.raises(ValueError, match=match): - s.loc[mask] - - with pytest.raises(ValueError, match=match): - s.iloc[mask] From 2441b40a3eb1360134bb9843b8d2e05f30f09bba Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Sun, 2 Feb 2020 18:00:05 -0600 Subject: [PATCH 03/58] Add frame test --- pandas/tests/arrays/test_boolean.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/pandas/tests/arrays/test_boolean.py b/pandas/tests/arrays/test_boolean.py index d5b9a5b0a3f10..01a9604edc580 100644 --- a/pandas/tests/arrays/test_boolean.py +++ b/pandas/tests/arrays/test_boolean.py @@ -931,7 +931,7 @@ def test_diff(): tm.assert_series_equal(result, expected) -def test_nullable_boolean_filter(): +def test_nullable_boolean_index_series(): s = pd.Series([1, 2, 3]) mask = pd.array([True, True, None], dtype="boolean") @@ -939,3 +939,13 @@ def test_nullable_boolean_filter(): expected = s.iloc[:2] tm.assert_series_equal(result, expected) + + +def test_nullable_boolean_index_frame(): + df = pd.DataFrame({"a": [1, 2, 3]}) + mask = pd.array([True, True, None], dtype="boolean") + + result = df[mask] + expected = df.iloc[:2, :] + + tm.assert_frame_equal(result, expected) From d71d1ba7ba7ceb3f95d5f2bed3aa819724a1c74d Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Sun, 2 Feb 2020 18:02:32 -0600 Subject: [PATCH 04/58] Don't raise with nullable boolean --- pandas/core/common.py | 6 ------ pandas/core/indexing.py | 7 +++++++ 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/pandas/core/common.py b/pandas/core/common.py index a76119da2707a..ea0a219e9537d 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -114,7 +114,6 @@ def is_bool_indexer(key: Any) -> bool: check_array_indexer : Check that `key` is a valid array to index, and convert to an ndarray. """ - na_msg = "cannot mask with array containing NA / NaN values" if isinstance(key, (ABCSeries, np.ndarray, ABCIndex)) or ( is_array_like(key) and is_extension_array_dtype(key.dtype) ): @@ -127,11 +126,6 @@ def is_bool_indexer(key: Any) -> bool: return False return True elif is_bool_dtype(key.dtype): - # an ndarray with bool-dtype by definition has no missing values. - # So we only need to check for NAs in ExtensionArrays - if is_extension_array_dtype(key.dtype): - if np.any(key.isna()): - raise ValueError(na_msg) return True elif isinstance(key, list): try: diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 7e56148b7569e..4a60e3ff6d24f 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -8,6 +8,8 @@ from pandas.util._decorators import Appender from pandas.core.dtypes.common import ( + is_bool_dtype, + is_extension_array_dtype, is_float, is_integer, is_iterator, @@ -2229,6 +2231,11 @@ def check_bool_indexer(index: Index, key) -> np.ndarray: "the indexed object do not match)." ) result = result.astype(bool)._values + elif is_extension_array_dtype(key) and is_bool_dtype(key): + mask = isna(key) + if mask.any(): + result[mask] = False + result = np.asarray(result, dtype=bool) else: # key might be sparse / object-dtype bool, check_array_indexer needs bool array result = np.asarray(result, dtype=bool) From 4d3a264f47497f03f51795e964395118d618e77e Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Sun, 2 Feb 2020 18:10:53 -0600 Subject: [PATCH 05/58] Don't modify result --- pandas/core/indexing.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 4a60e3ff6d24f..dcf9c676e99de 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -2234,8 +2234,9 @@ def check_bool_indexer(index: Index, key) -> np.ndarray: elif is_extension_array_dtype(key) and is_bool_dtype(key): mask = isna(key) if mask.any(): - result[mask] = False - result = np.asarray(result, dtype=bool) + result = np.asarray(np.where(~mask, result, False), dtype=bool) + else: + result = np.asarray(result, dtype=bool) else: # key might be sparse / object-dtype bool, check_array_indexer needs bool array result = np.asarray(result, dtype=bool) From 543ef9ab262e1004e967f112d4a37927f10ebe7f Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Sun, 2 Feb 2020 18:11:12 -0600 Subject: [PATCH 06/58] Add frame test --- pandas/tests/arrays/test_boolean.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/arrays/test_boolean.py b/pandas/tests/arrays/test_boolean.py index 01a9604edc580..b3dc4dbc11b3d 100644 --- a/pandas/tests/arrays/test_boolean.py +++ b/pandas/tests/arrays/test_boolean.py @@ -931,7 +931,7 @@ def test_diff(): tm.assert_series_equal(result, expected) -def test_nullable_boolean_index_series(): +def test_nullable_boolean_mask_series(): s = pd.Series([1, 2, 3]) mask = pd.array([True, True, None], dtype="boolean") @@ -941,7 +941,7 @@ def test_nullable_boolean_index_series(): tm.assert_series_equal(result, expected) -def test_nullable_boolean_index_frame(): +def test_nullable_boolean_mask_frame(): df = pd.DataFrame({"a": [1, 2, 3]}) mask = pd.array([True, True, None], dtype="boolean") From d3e7a6909207f93e9e172cdc93bb6c0217f2badf Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Sun, 2 Feb 2020 18:16:42 -0600 Subject: [PATCH 07/58] Update whatsnew --- doc/source/whatsnew/v1.1.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index e07a8fa0469f4..f47038ff8300c 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -202,6 +202,7 @@ Sparse ExtensionArray ^^^^^^^^^^^^^^ +- Enabled filtering with nullable Boolean arrays. (:issue:`31503`) - - From ad7ae6684b99636ac17b65a3bc79671f695fb8f2 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Sun, 2 Feb 2020 20:04:37 -0600 Subject: [PATCH 08/58] Fill NA --- pandas/core/indexing.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index dcf9c676e99de..bf96bd09b77b4 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -2234,9 +2234,9 @@ def check_bool_indexer(index: Index, key) -> np.ndarray: elif is_extension_array_dtype(key) and is_bool_dtype(key): mask = isna(key) if mask.any(): - result = np.asarray(np.where(~mask, result, False), dtype=bool) + result = np.asarray(key.fillna(False), dtype=bool) else: - result = np.asarray(result, dtype=bool) + result = np.asarray(key, dtype=bool) else: # key might be sparse / object-dtype bool, check_array_indexer needs bool array result = np.asarray(result, dtype=bool) From f6e9ce598ffa103159b9a6b6fb3b13a2730ebc0c Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Mon, 3 Feb 2020 12:24:50 -0600 Subject: [PATCH 09/58] Remove some more tests --- pandas/tests/arrays/categorical/test_indexing.py | 11 ----------- pandas/tests/extension/base/getitem.py | 16 ---------------- 2 files changed, 27 deletions(-) diff --git a/pandas/tests/arrays/categorical/test_indexing.py b/pandas/tests/arrays/categorical/test_indexing.py index 85d5a6a3dc3ac..652ae2a9dde87 100644 --- a/pandas/tests/arrays/categorical/test_indexing.py +++ b/pandas/tests/arrays/categorical/test_indexing.py @@ -239,17 +239,6 @@ def test_mask_with_boolean(index): tm.assert_series_equal(result, expected) -@pytest.mark.parametrize("index", [True, False]) -def test_mask_with_boolean_raises(index): - s = Series(range(3)) - idx = Categorical([True, False, None]) - if index: - idx = CategoricalIndex(idx) - - with pytest.raises(ValueError, match="NA / NaN"): - s[idx] - - @pytest.fixture def non_coercible_categorical(monkeypatch): """ diff --git a/pandas/tests/extension/base/getitem.py b/pandas/tests/extension/base/getitem.py index 8615a8df22dcc..50be6494a8e63 100644 --- a/pandas/tests/extension/base/getitem.py +++ b/pandas/tests/extension/base/getitem.py @@ -158,22 +158,6 @@ def test_getitem_boolean_array_mask(self, data): result = pd.Series(data)[mask] self.assert_series_equal(result, expected) - def test_getitem_boolean_array_mask_raises(self, data): - mask = pd.array(np.zeros(data.shape, dtype="bool"), dtype="boolean") - mask[:2] = pd.NA - - msg = ( - "Cannot mask with a boolean indexer containing NA values|" - "cannot mask with array containing NA / NaN values" - ) - with pytest.raises(ValueError, match=msg): - data[mask] - - s = pd.Series(data) - - with pytest.raises(ValueError): - s[mask] - @pytest.mark.parametrize( "idx", [[0, 1, 2], pd.array([0, 1, 2], dtype="Int64"), np.array([0, 1, 2])], From 12344071edfcbe84007457ad0f812fe367dbc67b Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Mon, 3 Feb 2020 12:49:25 -0600 Subject: [PATCH 10/58] Delete another test --- pandas/tests/series/indexing/test_boolean.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/pandas/tests/series/indexing/test_boolean.py b/pandas/tests/series/indexing/test_boolean.py index 28f3c0f7429f8..b78dd2fc84f12 100644 --- a/pandas/tests/series/indexing/test_boolean.py +++ b/pandas/tests/series/indexing/test_boolean.py @@ -70,14 +70,6 @@ def test_getitem_boolean_object(string_series): s2[mask] = 5 tm.assert_series_equal(cop, s2) - # nans raise exception - omask[5:10] = np.nan - msg = "cannot mask with array containing NA / NaN values" - with pytest.raises(ValueError, match=msg): - s[omask] - with pytest.raises(ValueError, match=msg): - s[omask] = 5 - def test_getitem_setitem_boolean_corner(datetime_series): ts = datetime_series From 9b7e87936f84678e74c4a955017c28b0d3b146f0 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Mon, 3 Feb 2020 17:35:36 -0600 Subject: [PATCH 11/58] Use to_numpy --- pandas/core/indexing.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 77ce84509a25b..3785ecbd07818 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -2225,11 +2225,7 @@ def check_bool_indexer(index: Index, key) -> np.ndarray: ) result = result.astype(bool)._values elif is_extension_array_dtype(key) and is_bool_dtype(key): - mask = isna(key) - if mask.any(): - result = np.asarray(key.fillna(False), dtype=bool) - else: - result = np.asarray(key, dtype=bool) + result = key.to_numpy(dtype=bool, na_value=False) else: # key might be sparse / object-dtype bool, check_array_indexer needs bool array result = np.asarray(result, dtype=bool) From efdd29a9284646f61de00f7ecb6ebd6eae390b24 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Mon, 3 Feb 2020 17:56:32 -0600 Subject: [PATCH 12/58] Update whatsnew --- doc/source/whatsnew/v1.1.0.rst | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 52bd13eff3bc1..d7c1864d21fb2 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -36,6 +36,11 @@ For example: ser["2014"] ser.loc["May 2015"] +Indexing with Nullable Boolean Arrays +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Previously indexing with a nullable Boolean array would raise a ``ValueError``, however this is now permitted with ``NA`` being treated as ``False``. (:issue:`31503`) + .. _whatsnew_110.enhancements.other: Other enhancements @@ -212,7 +217,6 @@ Sparse ExtensionArray ^^^^^^^^^^^^^^ -- Enabled filtering with nullable Boolean arrays. (:issue:`31503`) - - From 7fa36b6a3fd051412d9af46d97ab616eb98830aa Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Mon, 3 Feb 2020 17:59:18 -0600 Subject: [PATCH 13/58] Don't check for NA --- pandas/core/common.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/pandas/core/common.py b/pandas/core/common.py index e7b1d1097a085..9b1394daf671f 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -121,8 +121,6 @@ def is_bool_indexer(key: Any) -> bool: key = np.asarray(values_from_object(key)) if not lib.is_bool_array(key): - if isna(key).any(): - raise ValueError(na_msg) return False return True elif is_bool_dtype(key.dtype): From b8e3d6b11957ee3d8ad1f2c2e8ab25484c6c18a9 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Mon, 3 Feb 2020 18:30:50 -0600 Subject: [PATCH 14/58] Revert "Remove test that checks for error" This reverts commit 9c5b9f06f54e570134a846dfde488c5c7b720671. --- pandas/tests/indexing/test_na_indexing.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/pandas/tests/indexing/test_na_indexing.py b/pandas/tests/indexing/test_na_indexing.py index f1ccaacfec049..befe4fee8ecf8 100644 --- a/pandas/tests/indexing/test_na_indexing.py +++ b/pandas/tests/indexing/test_na_indexing.py @@ -59,3 +59,21 @@ def test_series_mask_boolean(values, dtype, mask, box_mask, frame): result = ser.loc[mask] tm.assert_equal(result, expected) + + +@pytest.mark.parametrize("frame", [True, False]) +def test_indexing_with_na_raises(frame): + s = pd.Series([1, 2, 3], name="name") + + if frame: + s = s.to_frame() + mask = pd.array([True, False, None], dtype="boolean") + match = "cannot mask with array containing NA / NaN values" + with pytest.raises(ValueError, match=match): + s[mask] + + with pytest.raises(ValueError, match=match): + s.loc[mask] + + with pytest.raises(ValueError, match=match): + s.iloc[mask] From bc3fe3f0c2c9a2f1f51e46bf0fd6f0e4a4794c54 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Mon, 3 Feb 2020 18:40:04 -0600 Subject: [PATCH 15/58] Update NA test --- pandas/tests/indexing/test_na_indexing.py | 26 ++++++++++++++++------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/pandas/tests/indexing/test_na_indexing.py b/pandas/tests/indexing/test_na_indexing.py index befe4fee8ecf8..4a9c93d753cc7 100644 --- a/pandas/tests/indexing/test_na_indexing.py +++ b/pandas/tests/indexing/test_na_indexing.py @@ -62,18 +62,28 @@ def test_series_mask_boolean(values, dtype, mask, box_mask, frame): @pytest.mark.parametrize("frame", [True, False]) -def test_indexing_with_na_raises(frame): +def test_na_treated_as_false(frame): s = pd.Series([1, 2, 3], name="name") if frame: s = s.to_frame() + mask = pd.array([True, False, None], dtype="boolean") - match = "cannot mask with array containing NA / NaN values" - with pytest.raises(ValueError, match=match): - s[mask] - with pytest.raises(ValueError, match=match): - s.loc[mask] + result = s[mask] + expected = s[mask.fillna(False)] + + result_loc = s.loc[mask] + expected_loc = s.loc[mask.fillna(False)] - with pytest.raises(ValueError, match=match): - s.iloc[mask] + result_iloc = s.iloc[mask] + expected_iloc = s.iloc[mask.fillna(False)] + + if frame: + tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result_loc, expected_loc) + tm.assert_frame_equal(result_iloc, expected_iloc) + else: + tm.assert_series_equal(result, expected) + tm.assert_series_equal(result_loc, expected_loc) + tm.assert_series_equal(result_iloc, expected_iloc) From 73ad2211ac0ad37a502d8eee52805d6f87ae7454 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Mon, 3 Feb 2020 18:45:42 -0600 Subject: [PATCH 16/58] Revert "Remove some more tests" This reverts commit f6e9ce598ffa103159b9a6b6fb3b13a2730ebc0c. --- pandas/tests/arrays/categorical/test_indexing.py | 11 +++++++++++ pandas/tests/extension/base/getitem.py | 16 ++++++++++++++++ 2 files changed, 27 insertions(+) diff --git a/pandas/tests/arrays/categorical/test_indexing.py b/pandas/tests/arrays/categorical/test_indexing.py index 652ae2a9dde87..85d5a6a3dc3ac 100644 --- a/pandas/tests/arrays/categorical/test_indexing.py +++ b/pandas/tests/arrays/categorical/test_indexing.py @@ -239,6 +239,17 @@ def test_mask_with_boolean(index): tm.assert_series_equal(result, expected) +@pytest.mark.parametrize("index", [True, False]) +def test_mask_with_boolean_raises(index): + s = Series(range(3)) + idx = Categorical([True, False, None]) + if index: + idx = CategoricalIndex(idx) + + with pytest.raises(ValueError, match="NA / NaN"): + s[idx] + + @pytest.fixture def non_coercible_categorical(monkeypatch): """ diff --git a/pandas/tests/extension/base/getitem.py b/pandas/tests/extension/base/getitem.py index 50be6494a8e63..8615a8df22dcc 100644 --- a/pandas/tests/extension/base/getitem.py +++ b/pandas/tests/extension/base/getitem.py @@ -158,6 +158,22 @@ def test_getitem_boolean_array_mask(self, data): result = pd.Series(data)[mask] self.assert_series_equal(result, expected) + def test_getitem_boolean_array_mask_raises(self, data): + mask = pd.array(np.zeros(data.shape, dtype="bool"), dtype="boolean") + mask[:2] = pd.NA + + msg = ( + "Cannot mask with a boolean indexer containing NA values|" + "cannot mask with array containing NA / NaN values" + ) + with pytest.raises(ValueError, match=msg): + data[mask] + + s = pd.Series(data) + + with pytest.raises(ValueError): + s[mask] + @pytest.mark.parametrize( "idx", [[0, 1, 2], pd.array([0, 1, 2], dtype="Int64"), np.array([0, 1, 2])], From 547d7bc7cd1fdbd38320723fb28c4a4906e51366 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Mon, 3 Feb 2020 18:51:07 -0600 Subject: [PATCH 17/58] Update Categorical test Not sure how applicable the CategoricalIndex part is here so going to remove for now --- pandas/tests/arrays/categorical/test_indexing.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/pandas/tests/arrays/categorical/test_indexing.py b/pandas/tests/arrays/categorical/test_indexing.py index 85d5a6a3dc3ac..b01c5d5f178c5 100644 --- a/pandas/tests/arrays/categorical/test_indexing.py +++ b/pandas/tests/arrays/categorical/test_indexing.py @@ -239,15 +239,14 @@ def test_mask_with_boolean(index): tm.assert_series_equal(result, expected) -@pytest.mark.parametrize("index", [True, False]) -def test_mask_with_boolean_raises(index): +def test_mask_with_boolean_na_treated_as_false(): s = Series(range(3)) idx = Categorical([True, False, None]) - if index: - idx = CategoricalIndex(idx) - with pytest.raises(ValueError, match="NA / NaN"): - s[idx] + result = s[idx] + expected = s[idx.fillna(False)] + + tm.assert_series_equal(result, expected) @pytest.fixture From 5649445e86e1d3e553823efba187661725c9eab3 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Mon, 3 Feb 2020 18:56:08 -0600 Subject: [PATCH 18/58] Update getitem tests --- pandas/tests/extension/base/getitem.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/pandas/tests/extension/base/getitem.py b/pandas/tests/extension/base/getitem.py index 8615a8df22dcc..cfa883a07eed0 100644 --- a/pandas/tests/extension/base/getitem.py +++ b/pandas/tests/extension/base/getitem.py @@ -158,21 +158,21 @@ def test_getitem_boolean_array_mask(self, data): result = pd.Series(data)[mask] self.assert_series_equal(result, expected) - def test_getitem_boolean_array_mask_raises(self, data): + def test_getitem_boolean_na_treated_as_false(self, data): mask = pd.array(np.zeros(data.shape, dtype="bool"), dtype="boolean") mask[:2] = pd.NA - msg = ( - "Cannot mask with a boolean indexer containing NA values|" - "cannot mask with array containing NA / NaN values" - ) - with pytest.raises(ValueError, match=msg): - data[mask] + result = data[mask] + expected = data[mask.fillna(False)] + + tm.assert_frame_equal(result, expected) s = pd.Series(data) - with pytest.raises(ValueError): - s[mask] + result = s[mask] + expected = s[mask.fillna(False)] + + tm.assert_series_equal(result, expected) @pytest.mark.parametrize( "idx", From bb3d143f8b769f79b6cdfb95a74196cc29150e85 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Mon, 3 Feb 2020 19:55:07 -0600 Subject: [PATCH 19/58] Update indexers.py --- pandas/core/indexers.py | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/pandas/core/indexers.py b/pandas/core/indexers.py index fe475527f4596..6da2150595a09 100644 --- a/pandas/core/indexers.py +++ b/pandas/core/indexers.py @@ -10,6 +10,7 @@ from pandas.core.dtypes.common import ( is_array_like, is_bool_dtype, + is_extension_array_dtype, is_integer_dtype, is_list_like, ) @@ -333,14 +334,11 @@ def check_array_indexer(array: AnyArrayLike, indexer: Any) -> Any: ... IndexError: Boolean index has wrong length: 3 instead of 2. - A ValueError is raised when the mask cannot be converted to - a bool-dtype ndarray. + NA values are treated as False. >>> mask = pd.array([True, pd.NA]) >>> pd.api.indexers.check_array_indexer(arr, mask) - Traceback (most recent call last): - ... - ValueError: Cannot mask with a boolean indexer containing NA values + array([ True, False]) A numpy boolean mask will get passed through (if the length is correct): @@ -392,10 +390,17 @@ def check_array_indexer(array: AnyArrayLike, indexer: Any) -> Any: dtype = indexer.dtype if is_bool_dtype(dtype): - try: - indexer = np.asarray(indexer, dtype=bool) - except ValueError: - raise ValueError("Cannot mask with a boolean indexer containing NA values") + if is_extension_array_dtype(dtype): + indexer = indexer.to_numpy(dtype=bool, na_value=False) + else: + try: + indexer = np.asarray(indexer, dtype=bool) + except ValueError: + msg = ( + "Cannot mask with a non-ExtensionArray boolean indexer " + " containing missing values" + ) + raise ValueError(msg) # GH26658 if len(indexer) != len(array): From f107252431710d535aa9a5652ad852e05571bb29 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Mon, 3 Feb 2020 20:19:16 -0600 Subject: [PATCH 20/58] tm -> self --- pandas/tests/extension/base/getitem.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/extension/base/getitem.py b/pandas/tests/extension/base/getitem.py index cfa883a07eed0..eb0f0e91e8fe6 100644 --- a/pandas/tests/extension/base/getitem.py +++ b/pandas/tests/extension/base/getitem.py @@ -165,14 +165,14 @@ def test_getitem_boolean_na_treated_as_false(self, data): result = data[mask] expected = data[mask.fillna(False)] - tm.assert_frame_equal(result, expected) + self.assert_frame_equal(result, expected) s = pd.Series(data) result = s[mask] expected = s[mask.fillna(False)] - tm.assert_series_equal(result, expected) + self.assert_series_equal(result, expected) @pytest.mark.parametrize( "idx", From 7b924b7ca4f6fc15a1b071d7553a3b6ceb23e50d Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Mon, 3 Feb 2020 20:23:22 -0600 Subject: [PATCH 21/58] Assert for EA not DataFrame --- pandas/tests/extension/base/getitem.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/extension/base/getitem.py b/pandas/tests/extension/base/getitem.py index eb0f0e91e8fe6..aa1fbbb41bfb9 100644 --- a/pandas/tests/extension/base/getitem.py +++ b/pandas/tests/extension/base/getitem.py @@ -165,7 +165,7 @@ def test_getitem_boolean_na_treated_as_false(self, data): result = data[mask] expected = data[mask.fillna(False)] - self.assert_frame_equal(result, expected) + self.assert_extension_array_equal(result, expected) s = pd.Series(data) From 46d77dfc39d426971a2a9fc6ad5e594ed75e2ebb Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Tue, 4 Feb 2020 12:50:09 -0600 Subject: [PATCH 22/58] Don't try / except --- pandas/core/indexers.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/pandas/core/indexers.py b/pandas/core/indexers.py index 6da2150595a09..8b7716707bb04 100644 --- a/pandas/core/indexers.py +++ b/pandas/core/indexers.py @@ -393,14 +393,7 @@ def check_array_indexer(array: AnyArrayLike, indexer: Any) -> Any: if is_extension_array_dtype(dtype): indexer = indexer.to_numpy(dtype=bool, na_value=False) else: - try: - indexer = np.asarray(indexer, dtype=bool) - except ValueError: - msg = ( - "Cannot mask with a non-ExtensionArray boolean indexer " - " containing missing values" - ) - raise ValueError(msg) + indexer = np.asarray(indexer, dtype=bool) # GH26658 if len(indexer) != len(array): From ac71cbf1bf3dae6e1114e9578543711ed8046789 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Tue, 4 Feb 2020 12:50:37 -0600 Subject: [PATCH 23/58] Change check_indexer test Removed the test on [True, False, None] which is no longer raising. Not clear to me what the correct behavior should be, but this is now returning a pandas array with pd.NA instead of None. --- pandas/tests/indexing/test_check_indexer.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/pandas/tests/indexing/test_check_indexer.py b/pandas/tests/indexing/test_check_indexer.py index 82f8c12229824..0275a286bff29 100644 --- a/pandas/tests/indexing/test_check_indexer.py +++ b/pandas/tests/indexing/test_check_indexer.py @@ -31,15 +31,16 @@ def test_valid_input(indexer, expected): tm.assert_numpy_array_equal(result, expected) -@pytest.mark.parametrize( - "indexer", [[True, False, None], pd.array([True, False, None], dtype="boolean")], -) -def test_bool_raise_missing_values(indexer): - array = np.array([1, 2, 3]) +def test_boolean_na_returns_indexer(): + # TODO: What to do now about list input that contains + # bools and None? + arr = np.array([1, 2, 3]) + indexer = pd.array([True, False, None], dtype="boolean") - msg = "Cannot mask with a boolean indexer containing NA values" - with pytest.raises(ValueError, match=msg): - check_array_indexer(array, indexer) + result = check_array_indexer(arr, indexer) + expected = np.array([True, False, False], dtype=bool) + + tm.assert_numpy_array_equal(result, expected) @pytest.mark.parametrize( From e5ed092fd8a7af391c09525918ffe4e3e14f5f1d Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Tue, 4 Feb 2020 16:48:59 -0600 Subject: [PATCH 24/58] Modify __getitem__ for datetimelike --- pandas/core/arrays/datetimelike.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 4f14ac2a14157..22b2dffb4e97b 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -24,6 +24,7 @@ is_datetime64tz_dtype, is_datetime_or_timedelta_dtype, is_dtype_equal, + is_extension_array_dtype, is_float_dtype, is_integer_dtype, is_list_like, @@ -520,7 +521,11 @@ def __getitem__(self, key): if com.is_bool_indexer(key): # first convert to boolean, because check_array_indexer doesn't # allow object dtype - key = np.asarray(key, dtype=bool) + if is_extension_array_dtype(key): + key = key.to_numpy(dtype=bool, na_value=False) + else: + key = np.asarray(key, dtype=bool) + key = check_array_indexer(self, key) if key.all(): key = slice(0, None, None) From 9fcdb23b82d0959307aa8677927ecc6f4f5579b7 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Tue, 4 Feb 2020 17:10:12 -0600 Subject: [PATCH 25/58] Add back ValueError for non-boolean with NA --- pandas/core/common.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pandas/core/common.py b/pandas/core/common.py index 9b1394daf671f..345a4fbc308d8 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -121,6 +121,9 @@ def is_bool_indexer(key: Any) -> bool: key = np.asarray(values_from_object(key)) if not lib.is_bool_array(key): + na_msg = "Cannot mask with non-boolean array containing NA / NaN values" + if isna(key).any(): + raise ValueError(na_msg) return False return True elif is_bool_dtype(key.dtype): From c2dfa93a522e3c784dde562a06c911b61e12a619 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Tue, 4 Feb 2020 17:30:07 -0600 Subject: [PATCH 26/58] Revert "Delete another test" This reverts commit 12344071edfcbe84007457ad0f812fe367dbc67b. --- pandas/tests/series/indexing/test_boolean.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/pandas/tests/series/indexing/test_boolean.py b/pandas/tests/series/indexing/test_boolean.py index b78dd2fc84f12..28f3c0f7429f8 100644 --- a/pandas/tests/series/indexing/test_boolean.py +++ b/pandas/tests/series/indexing/test_boolean.py @@ -70,6 +70,14 @@ def test_getitem_boolean_object(string_series): s2[mask] = 5 tm.assert_series_equal(cop, s2) + # nans raise exception + omask[5:10] = np.nan + msg = "cannot mask with array containing NA / NaN values" + with pytest.raises(ValueError, match=msg): + s[omask] + with pytest.raises(ValueError, match=msg): + s[omask] = 5 + def test_getitem_setitem_boolean_corner(datetime_series): ts = datetime_series From a9a12b19f85e3c61d253093d8859f659ed9463e6 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Tue, 4 Feb 2020 17:31:49 -0600 Subject: [PATCH 27/58] Fixup error message --- pandas/tests/series/indexing/test_boolean.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/series/indexing/test_boolean.py b/pandas/tests/series/indexing/test_boolean.py index 28f3c0f7429f8..8878a4a6526af 100644 --- a/pandas/tests/series/indexing/test_boolean.py +++ b/pandas/tests/series/indexing/test_boolean.py @@ -72,7 +72,7 @@ def test_getitem_boolean_object(string_series): # nans raise exception omask[5:10] = np.nan - msg = "cannot mask with array containing NA / NaN values" + msg = "Cannot mask with non-boolean array containing NA / NaN values" with pytest.raises(ValueError, match=msg): s[omask] with pytest.raises(ValueError, match=msg): From 7c10f33643c54cb732b8c3bd41850a646f3e9899 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Tue, 4 Feb 2020 21:04:13 -0600 Subject: [PATCH 28/58] Add before and after examples --- doc/source/whatsnew/v1.1.0.rst | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index d7c1864d21fb2..6d1c9a6b8a2ab 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -41,6 +41,26 @@ Indexing with Nullable Boolean Arrays Previously indexing with a nullable Boolean array would raise a ``ValueError``, however this is now permitted with ``NA`` being treated as ``False``. (:issue:`31503`) +*pandas 1.0.x* + +.. code-block:: python + + >>> s = pd.Series([1, 2, 3, 4]) + >>> mask = pd.array([True, True, False, None], dtype="boolean") + >>> s[mask] + Traceback (most recent call last): + ... + ValueError: cannot mask with array containing NA / NaN values + +*pandas 1.1.0* + +.. code-block:: python + + >>> s[mask] + 0 1 + 1 2 + dtype: int64 + .. _whatsnew_110.enhancements.other: Other enhancements From cf3d60de75aa2ead0f02b71adb262658e422a894 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Tue, 4 Feb 2020 21:21:16 -0600 Subject: [PATCH 29/58] Get rid of some tests --- pandas/tests/arrays/test_boolean.py | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/pandas/tests/arrays/test_boolean.py b/pandas/tests/arrays/test_boolean.py index b3dc4dbc11b3d..cb9b07db4a0df 100644 --- a/pandas/tests/arrays/test_boolean.py +++ b/pandas/tests/arrays/test_boolean.py @@ -929,23 +929,3 @@ def test_diff(): result = s.diff() expected = pd.Series(expected) tm.assert_series_equal(result, expected) - - -def test_nullable_boolean_mask_series(): - s = pd.Series([1, 2, 3]) - mask = pd.array([True, True, None], dtype="boolean") - - result = s[mask] - expected = s.iloc[:2] - - tm.assert_series_equal(result, expected) - - -def test_nullable_boolean_mask_frame(): - df = pd.DataFrame({"a": [1, 2, 3]}) - mask = pd.array([True, True, None], dtype="boolean") - - result = df[mask] - expected = df.iloc[:2, :] - - tm.assert_frame_equal(result, expected) From 157d8b98d00e686b0437aa91b84341aa7f261ebc Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Tue, 4 Feb 2020 22:09:26 -0600 Subject: [PATCH 30/58] Cast another way --- pandas/core/arrays/datetimelike.py | 5 +---- pandas/core/indexing.py | 6 +++--- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 22b2dffb4e97b..8544a2dcf1f49 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -24,7 +24,6 @@ is_datetime64tz_dtype, is_datetime_or_timedelta_dtype, is_dtype_equal, - is_extension_array_dtype, is_float_dtype, is_integer_dtype, is_list_like, @@ -521,9 +520,7 @@ def __getitem__(self, key): if com.is_bool_indexer(key): # first convert to boolean, because check_array_indexer doesn't # allow object dtype - if is_extension_array_dtype(key): - key = key.to_numpy(dtype=bool, na_value=False) - else: + if is_object_dtype(key): key = np.asarray(key, dtype=bool) key = check_array_indexer(self, key) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 3785ecbd07818..90952e201e4d2 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -2224,12 +2224,12 @@ def check_bool_indexer(index: Index, key) -> np.ndarray: "the indexed object do not match)." ) result = result.astype(bool)._values - elif is_extension_array_dtype(key) and is_bool_dtype(key): - result = key.to_numpy(dtype=bool, na_value=False) - else: + elif is_object_dtype(key): # key might be sparse / object-dtype bool, check_array_indexer needs bool array result = np.asarray(result, dtype=bool) result = check_array_indexer(index, result) + else: + result = check_array_indexer(index, result) return result From 250f22828a1044ddba0bf2363e3c3500a047a30d Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Wed, 5 Feb 2020 08:03:26 -0600 Subject: [PATCH 31/58] Import --- pandas/core/indexing.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 90952e201e4d2..81e69b245d5e2 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -15,6 +15,7 @@ is_iterator, is_list_like, is_numeric_dtype, + is_object_dtype, is_scalar, is_sequence, ) From 647f0f63f10b9d346f4f79692ba5cf769b797927 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Wed, 5 Feb 2020 09:41:22 -0600 Subject: [PATCH 32/58] Don't import unused --- pandas/core/indexing.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 81e69b245d5e2..cb8575b49e37e 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -8,8 +8,6 @@ from pandas.util._decorators import Appender from pandas.core.dtypes.common import ( - is_bool_dtype, - is_extension_array_dtype, is_float, is_integer, is_iterator, From a9e73deac9617e7eea12bf9aacc036605d2a1150 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Mon, 10 Feb 2020 08:18:26 -0600 Subject: [PATCH 33/58] Update whatsnew --- doc/source/whatsnew/v1.1.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 34779ef508149..fa53b89df5be0 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -39,7 +39,7 @@ For example: Indexing with Nullable Boolean Arrays ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Previously indexing with a nullable Boolean array would raise a ``ValueError``, however this is now permitted with ``NA`` being treated as ``False``. (:issue:`31503`) +Previously indexing with a nullable Boolean array containing ``NA`` would raise a ``ValueError``, however this is now permitted with ``NA`` being treated as ``False``. (:issue:`31503`) *pandas 1.0.x* From adc307518db31dd4ee9c4408cdb2c87760eac4ba Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Mon, 10 Feb 2020 08:21:42 -0600 Subject: [PATCH 34/58] Update boolean.rst --- doc/source/user_guide/boolean.rst | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/doc/source/user_guide/boolean.rst b/doc/source/user_guide/boolean.rst index 5276bc6142206..3da2be09d2b7c 100644 --- a/doc/source/user_guide/boolean.rst +++ b/doc/source/user_guide/boolean.rst @@ -20,8 +20,7 @@ Nullable Boolean Data Type Indexing with NA values ----------------------- -pandas does not allow indexing with NA values. Attempting to do so -will raise a ``ValueError``. +pandas allows indexing with ``NA`` values, which are treated as ``False``. .. ipython:: python :okexcept: @@ -30,13 +29,6 @@ will raise a ``ValueError``. mask = pd.array([True, False, pd.NA], dtype="boolean") s[mask] -The missing values will need to be explicitly filled with True or False prior -to using the array as a mask. - -.. ipython:: python - - s[mask.fillna(False)] - .. _boolean.kleene: Kleene Logical Operations From 29ff823e6d4cc53ec2081a5e45fc0bb719d0bb59 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Mon, 10 Feb 2020 08:24:06 -0600 Subject: [PATCH 35/58] check_array_indexer docstring --- pandas/core/indexers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexers.py b/pandas/core/indexers.py index 8b7716707bb04..e9bdc99cef3ed 100644 --- a/pandas/core/indexers.py +++ b/pandas/core/indexers.py @@ -334,7 +334,7 @@ def check_array_indexer(array: AnyArrayLike, indexer: Any) -> Any: ... IndexError: Boolean index has wrong length: 3 instead of 2. - NA values are treated as False. + NA values in a boolean array are treated as False. >>> mask = pd.array([True, pd.NA]) >>> pd.api.indexers.check_array_indexer(arr, mask) From 0a58605193c48f5a70db26a6a700cad8e4a96ced Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Mon, 10 Feb 2020 08:34:26 -0600 Subject: [PATCH 36/58] Edit 1.1.0 whatsnew --- doc/source/whatsnew/v1.1.0.rst | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index fa53b89df5be0..8e6f213a6a58a 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -41,12 +41,17 @@ Indexing with Nullable Boolean Arrays Previously indexing with a nullable Boolean array containing ``NA`` would raise a ``ValueError``, however this is now permitted with ``NA`` being treated as ``False``. (:issue:`31503`) +.. ipython:: python + + s = pd.Series([1, 2, 3, 4]) + mask = pd.array([True, True, False, None], dtype="boolean") + s + mask + *pandas 1.0.x* .. code-block:: python - >>> s = pd.Series([1, 2, 3, 4]) - >>> mask = pd.array([True, True, False, None], dtype="boolean") >>> s[mask] Traceback (most recent call last): ... @@ -54,12 +59,9 @@ Previously indexing with a nullable Boolean array containing ``NA`` would raise *pandas 1.1.0* -.. code-block:: python +.. ipython:: python - >>> s[mask] - 0 1 - 1 2 - dtype: int64 + s[mask] .. _whatsnew_110.enhancements.other: From b38a20912f4bbd62848e466c9b7a583668f38e14 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Mon, 10 Feb 2020 09:05:45 -0600 Subject: [PATCH 37/58] Add to indexing.rst --- doc/source/user_guide/indexing.rst | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/doc/source/user_guide/indexing.rst b/doc/source/user_guide/indexing.rst index a8cdf4a61073d..cc17fcd9e4263 100644 --- a/doc/source/user_guide/indexing.rst +++ b/doc/source/user_guide/indexing.rst @@ -59,7 +59,7 @@ of multi-axis indexing. slices, **both** the start and the stop are included, when present in the index! See :ref:`Slicing with labels ` and :ref:`Endpoints are inclusive `.) - * A boolean array + * A boolean array (any ``NA`` values will be treated as ``False``). * A ``callable`` function with one argument (the calling Series or DataFrame) and that returns valid output for indexing (one of the above). @@ -75,7 +75,7 @@ of multi-axis indexing. * An integer e.g. ``5``. * A list or array of integers ``[4, 3, 0]``. * A slice object with ints ``1:7``. - * A boolean array. + * A boolean array (any ``NA`` values will be treated as ``False``). * A ``callable`` function with one argument (the calling Series or DataFrame) and that returns valid output for indexing (one of the above). @@ -374,6 +374,12 @@ For getting values with a boolean array: df1.loc['a'] > 0 df1.loc[:, df1.loc['a'] > 0] +NA values in a boolean array propogate as ``False``: + + mask = pd.array([True, False, True, False, pd.NA, False], dtype="boolean") + mask + df1[mask] + For getting a value explicitly: .. ipython:: python From 5088cbb426684baab0bc87ca85f6d940fa5c7696 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Mon, 10 Feb 2020 09:16:13 -0600 Subject: [PATCH 38/58] Add back index parameter --- pandas/tests/arrays/categorical/test_indexing.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/tests/arrays/categorical/test_indexing.py b/pandas/tests/arrays/categorical/test_indexing.py index b01c5d5f178c5..a866eb08f1f4e 100644 --- a/pandas/tests/arrays/categorical/test_indexing.py +++ b/pandas/tests/arrays/categorical/test_indexing.py @@ -239,9 +239,12 @@ def test_mask_with_boolean(index): tm.assert_series_equal(result, expected) -def test_mask_with_boolean_na_treated_as_false(): +@pytest.mark.parametrize("index", [True, False]) +def test_mask_with_boolean_na_treated_as_false(index): s = Series(range(3)) idx = Categorical([True, False, None]) + if index: + idx = CategoricalIndex(idx) result = s[idx] expected = s[idx.fillna(False)] From 54efdd95a85cb330e296ffba2ed82409d245e3c0 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Mon, 10 Feb 2020 09:19:49 -0600 Subject: [PATCH 39/58] Add some True values in test --- pandas/tests/extension/base/getitem.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/extension/base/getitem.py b/pandas/tests/extension/base/getitem.py index aa1fbbb41bfb9..d69efb90388e5 100644 --- a/pandas/tests/extension/base/getitem.py +++ b/pandas/tests/extension/base/getitem.py @@ -161,6 +161,7 @@ def test_getitem_boolean_array_mask(self, data): def test_getitem_boolean_na_treated_as_false(self, data): mask = pd.array(np.zeros(data.shape, dtype="bool"), dtype="boolean") mask[:2] = pd.NA + mask[2:4] = True result = data[mask] expected = data[mask.fillna(False)] From c6b81ed1bdebdd360f9b50b28d89831ca21e9e15 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Mon, 10 Feb 2020 10:09:50 -0600 Subject: [PATCH 40/58] Edit boolean.rst --- doc/source/user_guide/boolean.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/user_guide/boolean.rst b/doc/source/user_guide/boolean.rst index 3da2be09d2b7c..971fea2d6c9d3 100644 --- a/doc/source/user_guide/boolean.rst +++ b/doc/source/user_guide/boolean.rst @@ -20,7 +20,7 @@ Nullable Boolean Data Type Indexing with NA values ----------------------- -pandas allows indexing with ``NA`` values, which are treated as ``False``. +pandas allows indexing with ``NA`` values in a boolean array, which are treated as ``False``. .. ipython:: python :okexcept: From 67800c67b257e381b9aacf9bd751108cc03708b4 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Mon, 10 Feb 2020 10:44:11 -0600 Subject: [PATCH 41/58] Add list back to check_array_indexer test --- pandas/tests/indexing/test_check_indexer.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/tests/indexing/test_check_indexer.py b/pandas/tests/indexing/test_check_indexer.py index 0275a286bff29..64f841fdcc41b 100644 --- a/pandas/tests/indexing/test_check_indexer.py +++ b/pandas/tests/indexing/test_check_indexer.py @@ -31,11 +31,11 @@ def test_valid_input(indexer, expected): tm.assert_numpy_array_equal(result, expected) -def test_boolean_na_returns_indexer(): - # TODO: What to do now about list input that contains - # bools and None? +@pytest.mark.parametrize( + "indexer", [[True, False, None], pd.array([True, False, None], dtype="boolean")], +) +def test_boolean_na_returns_indexer(indexer): arr = np.array([1, 2, 3]) - indexer = pd.array([True, False, None], dtype="boolean") result = check_array_indexer(arr, indexer) expected = np.array([True, False, False], dtype=bool) From 578fd3cdf134627013a084e703f6186d019eef75 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Mon, 10 Feb 2020 14:54:12 -0600 Subject: [PATCH 42/58] Account for pd.NA in is_bool_indexer --- pandas/core/common.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/pandas/core/common.py b/pandas/core/common.py index 526a9144ddcbf..1892e19dd3681 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -13,7 +13,7 @@ import numpy as np -from pandas._libs import lib, tslibs +from pandas._libs import lib, tslibs, missing as libmissing from pandas._typing import T from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike @@ -133,11 +133,7 @@ def is_bool_indexer(key: Any) -> bool: elif is_bool_dtype(key.dtype): return True elif isinstance(key, list): - try: - arr = np.asarray(key) - return arr.dtype == np.bool_ and len(arr) == len(key) - except TypeError: # pragma: no cover - return False + return all(k is libmissing.NA or k is True or k is False for k in key) return False From a5593850b97f09fc72e9411851addcd433290b5c Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Mon, 10 Feb 2020 14:55:14 -0600 Subject: [PATCH 43/58] Include list mask in test --- pandas/tests/extension/base/getitem.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/pandas/tests/extension/base/getitem.py b/pandas/tests/extension/base/getitem.py index d69efb90388e5..8eae9425b01a1 100644 --- a/pandas/tests/extension/base/getitem.py +++ b/pandas/tests/extension/base/getitem.py @@ -158,20 +158,26 @@ def test_getitem_boolean_array_mask(self, data): result = pd.Series(data)[mask] self.assert_series_equal(result, expected) - def test_getitem_boolean_na_treated_as_false(self, data): + @pytest.mark.parametrize("use_list_mask", [True, False]) + def test_getitem_boolean_na_treated_as_false(self, data, use_list_mask): mask = pd.array(np.zeros(data.shape, dtype="bool"), dtype="boolean") mask[:2] = pd.NA mask[2:4] = True + filled_mask = mask.fillna(False) + + if use_list_mask: + mask = list(mask) + filled_mask = list(filled_mask) result = data[mask] - expected = data[mask.fillna(False)] + expected = data[filled_mask] self.assert_extension_array_equal(result, expected) s = pd.Series(data) result = s[mask] - expected = s[mask.fillna(False)] + expected = s[filled_mask] self.assert_series_equal(result, expected) From 705947e238c8db258b9df86581368aa68df5260f Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Mon, 10 Feb 2020 16:07:59 -0600 Subject: [PATCH 44/58] Account for empty key --- pandas/core/common.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/core/common.py b/pandas/core/common.py index 1892e19dd3681..59127286cc19d 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -133,7 +133,9 @@ def is_bool_indexer(key: Any) -> bool: elif is_bool_dtype(key.dtype): return True elif isinstance(key, list): - return all(k is libmissing.NA or k is True or k is False for k in key) + return all(k is libmissing.NA or k is True or k is False for k in key) and ( + len(key) > 0 + ) return False From 4974778384444d8ff779a85f9456d63f90301589 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Mon, 10 Feb 2020 17:00:57 -0600 Subject: [PATCH 45/58] Revert "Account for empty key" This reverts commit 705947e238c8db258b9df86581368aa68df5260f. --- pandas/core/common.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pandas/core/common.py b/pandas/core/common.py index 59127286cc19d..1892e19dd3681 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -133,9 +133,7 @@ def is_bool_indexer(key: Any) -> bool: elif is_bool_dtype(key.dtype): return True elif isinstance(key, list): - return all(k is libmissing.NA or k is True or k is False for k in key) and ( - len(key) > 0 - ) + return all(k is libmissing.NA or k is True or k is False for k in key) return False From 319b52549825ea4aa7f268a505d12089f311c91d Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Mon, 10 Feb 2020 17:01:21 -0600 Subject: [PATCH 46/58] Revert "Account for pd.NA in is_bool_indexer" This reverts commit 578fd3cdf134627013a084e703f6186d019eef75. --- pandas/core/common.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pandas/core/common.py b/pandas/core/common.py index 1892e19dd3681..526a9144ddcbf 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -13,7 +13,7 @@ import numpy as np -from pandas._libs import lib, tslibs, missing as libmissing +from pandas._libs import lib, tslibs from pandas._typing import T from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike @@ -133,7 +133,11 @@ def is_bool_indexer(key: Any) -> bool: elif is_bool_dtype(key.dtype): return True elif isinstance(key, list): - return all(k is libmissing.NA or k is True or k is False for k in key) + try: + arr = np.asarray(key) + return arr.dtype == np.bool_ and len(arr) == len(key) + except TypeError: # pragma: no cover + return False return False From 8007ce43a5b609cac8ddf46b5f5cc306f6b70eda Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Tue, 11 Feb 2020 13:24:10 -0600 Subject: [PATCH 47/58] Try modifying is_bool_indexer --- pandas/core/common.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/core/common.py b/pandas/core/common.py index 526a9144ddcbf..b6cb97082f18b 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -27,6 +27,8 @@ from pandas.core.dtypes.inference import _iterable_not_string from pandas.core.dtypes.missing import isna, isnull, notnull # noqa +import pandas as pd + class SettingWithCopyError(ValueError): pass @@ -134,8 +136,8 @@ def is_bool_indexer(key: Any) -> bool: return True elif isinstance(key, list): try: - arr = np.asarray(key) - return arr.dtype == np.bool_ and len(arr) == len(key) + arr = pd.array(key) + return is_bool_dtype(arr.dtype) and (len(arr) == len(key)) except TypeError: # pragma: no cover return False From a10765f217ae3e8e566f230c0093cf00b5e4c8ca Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Tue, 11 Feb 2020 14:37:43 -0600 Subject: [PATCH 48/58] Revert "Try modifying is_bool_indexer" This reverts commit 8007ce43a5b609cac8ddf46b5f5cc306f6b70eda. --- pandas/core/common.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/pandas/core/common.py b/pandas/core/common.py index b6cb97082f18b..526a9144ddcbf 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -27,8 +27,6 @@ from pandas.core.dtypes.inference import _iterable_not_string from pandas.core.dtypes.missing import isna, isnull, notnull # noqa -import pandas as pd - class SettingWithCopyError(ValueError): pass @@ -136,8 +134,8 @@ def is_bool_indexer(key: Any) -> bool: return True elif isinstance(key, list): try: - arr = pd.array(key) - return is_bool_dtype(arr.dtype) and (len(arr) == len(key)) + arr = np.asarray(key) + return arr.dtype == np.bool_ and len(arr) == len(key) except TypeError: # pragma: no cover return False From d7fc3b7a65e2303e25256966974e377c7d719fc2 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Tue, 11 Feb 2020 14:38:08 -0600 Subject: [PATCH 49/58] Revert "Include list mask in test" This reverts commit a5593850b97f09fc72e9411851addcd433290b5c. --- pandas/tests/extension/base/getitem.py | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/pandas/tests/extension/base/getitem.py b/pandas/tests/extension/base/getitem.py index 8eae9425b01a1..d69efb90388e5 100644 --- a/pandas/tests/extension/base/getitem.py +++ b/pandas/tests/extension/base/getitem.py @@ -158,26 +158,20 @@ def test_getitem_boolean_array_mask(self, data): result = pd.Series(data)[mask] self.assert_series_equal(result, expected) - @pytest.mark.parametrize("use_list_mask", [True, False]) - def test_getitem_boolean_na_treated_as_false(self, data, use_list_mask): + def test_getitem_boolean_na_treated_as_false(self, data): mask = pd.array(np.zeros(data.shape, dtype="bool"), dtype="boolean") mask[:2] = pd.NA mask[2:4] = True - filled_mask = mask.fillna(False) - - if use_list_mask: - mask = list(mask) - filled_mask = list(filled_mask) result = data[mask] - expected = data[filled_mask] + expected = data[mask.fillna(False)] self.assert_extension_array_equal(result, expected) s = pd.Series(data) result = s[mask] - expected = s[filled_mask] + expected = s[mask.fillna(False)] self.assert_series_equal(result, expected) From 6f9a2982179818176c156c2d11296fe8cc3770d0 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Thu, 13 Feb 2020 08:44:28 -0600 Subject: [PATCH 50/58] Update release notes and docs --- doc/source/user_guide/boolean.rst | 2 ++ doc/source/user_guide/indexing.rst | 2 ++ doc/source/whatsnew/v1.0.2.rst | 27 +++++++++++++++++++++++++++ doc/source/whatsnew/v1.1.0.rst | 27 --------------------------- 4 files changed, 31 insertions(+), 27 deletions(-) diff --git a/doc/source/user_guide/boolean.rst b/doc/source/user_guide/boolean.rst index de0130bcd890e..d45bdc08e4a60 100644 --- a/doc/source/user_guide/boolean.rst +++ b/doc/source/user_guide/boolean.rst @@ -22,6 +22,8 @@ Indexing with NA values pandas allows indexing with ``NA`` values in a boolean array, which are treated as ``False``. +.. versionchanged:: 1.0.2 + .. ipython:: python :okexcept: diff --git a/doc/source/user_guide/indexing.rst b/doc/source/user_guide/indexing.rst index cc17fcd9e4263..2bd3ff626f2e1 100644 --- a/doc/source/user_guide/indexing.rst +++ b/doc/source/user_guide/indexing.rst @@ -376,6 +376,8 @@ For getting values with a boolean array: NA values in a boolean array propogate as ``False``: +.. versionchanged:: 1.0.2 + mask = pd.array([True, False, True, False, pd.NA, False], dtype="boolean") mask df1[mask] diff --git a/doc/source/whatsnew/v1.0.2.rst b/doc/source/whatsnew/v1.0.2.rst index 0216007ea5ba8..3815229dc092c 100644 --- a/doc/source/whatsnew/v1.0.2.rst +++ b/doc/source/whatsnew/v1.0.2.rst @@ -46,6 +46,33 @@ Bug fixes .. --------------------------------------------------------------------------- +Indexing with Nullable Boolean Arrays +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Previously indexing with a nullable Boolean array containing ``NA`` would raise a ``ValueError``, however this is now permitted with ``NA`` being treated as ``False``. (:issue:`31503`) + +.. ipython:: python + + s = pd.Series([1, 2, 3, 4]) + mask = pd.array([True, True, False, None], dtype="boolean") + s + mask + +*pandas 1.0.0-1.0.1* + +.. code-block:: python + + >>> s[mask] + Traceback (most recent call last): + ... + ValueError: cannot mask with array containing NA / NaN values + +*pandas 1.0.2* + +.. ipython:: python + + s[mask] + .. _whatsnew_102.contributors: Contributors diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index b09edaedef625..13827e8fc4c33 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -36,33 +36,6 @@ For example: ser["2014"] ser.loc["May 2015"] -Indexing with Nullable Boolean Arrays -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Previously indexing with a nullable Boolean array containing ``NA`` would raise a ``ValueError``, however this is now permitted with ``NA`` being treated as ``False``. (:issue:`31503`) - -.. ipython:: python - - s = pd.Series([1, 2, 3, 4]) - mask = pd.array([True, True, False, None], dtype="boolean") - s - mask - -*pandas 1.0.x* - -.. code-block:: python - - >>> s[mask] - Traceback (most recent call last): - ... - ValueError: cannot mask with array containing NA / NaN values - -*pandas 1.1.0* - -.. ipython:: python - - s[mask] - .. _whatsnew_110.enhancements.other: Other enhancements From e1e39fe14d8fd4d5db429579db113022acf44231 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Thu, 13 Feb 2020 08:55:22 -0600 Subject: [PATCH 51/58] Add issue number to tests --- pandas/tests/arrays/categorical/test_indexing.py | 1 + pandas/tests/extension/base/getitem.py | 1 + pandas/tests/indexing/test_check_indexer.py | 1 + pandas/tests/indexing/test_na_indexing.py | 1 + 4 files changed, 4 insertions(+) diff --git a/pandas/tests/arrays/categorical/test_indexing.py b/pandas/tests/arrays/categorical/test_indexing.py index a866eb08f1f4e..3d9469c252914 100644 --- a/pandas/tests/arrays/categorical/test_indexing.py +++ b/pandas/tests/arrays/categorical/test_indexing.py @@ -241,6 +241,7 @@ def test_mask_with_boolean(index): @pytest.mark.parametrize("index", [True, False]) def test_mask_with_boolean_na_treated_as_false(index): + # https://github.com/pandas-dev/pandas/issues/31503 s = Series(range(3)) idx = Categorical([True, False, None]) if index: diff --git a/pandas/tests/extension/base/getitem.py b/pandas/tests/extension/base/getitem.py index d69efb90388e5..b08a64cc076b6 100644 --- a/pandas/tests/extension/base/getitem.py +++ b/pandas/tests/extension/base/getitem.py @@ -159,6 +159,7 @@ def test_getitem_boolean_array_mask(self, data): self.assert_series_equal(result, expected) def test_getitem_boolean_na_treated_as_false(self, data): + # https://github.com/pandas-dev/pandas/issues/31503 mask = pd.array(np.zeros(data.shape, dtype="bool"), dtype="boolean") mask[:2] = pd.NA mask[2:4] = True diff --git a/pandas/tests/indexing/test_check_indexer.py b/pandas/tests/indexing/test_check_indexer.py index 64f841fdcc41b..69d4065234d93 100644 --- a/pandas/tests/indexing/test_check_indexer.py +++ b/pandas/tests/indexing/test_check_indexer.py @@ -35,6 +35,7 @@ def test_valid_input(indexer, expected): "indexer", [[True, False, None], pd.array([True, False, None], dtype="boolean")], ) def test_boolean_na_returns_indexer(indexer): + # https://github.com/pandas-dev/pandas/issues/31503 arr = np.array([1, 2, 3]) result = check_array_indexer(arr, indexer) diff --git a/pandas/tests/indexing/test_na_indexing.py b/pandas/tests/indexing/test_na_indexing.py index 4a9c93d753cc7..345ca30ec77eb 100644 --- a/pandas/tests/indexing/test_na_indexing.py +++ b/pandas/tests/indexing/test_na_indexing.py @@ -63,6 +63,7 @@ def test_series_mask_boolean(values, dtype, mask, box_mask, frame): @pytest.mark.parametrize("frame", [True, False]) def test_na_treated_as_false(frame): + # https://github.com/pandas-dev/pandas/issues/31503 s = pd.Series([1, 2, 3], name="name") if frame: From 5a72b2f9e9c99b133a6173e695c77da759fedd74 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Thu, 13 Feb 2020 09:36:59 -0600 Subject: [PATCH 52/58] Add some setitem tests --- pandas/tests/extension/base/setitem.py | 30 ++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/pandas/tests/extension/base/setitem.py b/pandas/tests/extension/base/setitem.py index 590bcd586900a..79b0a37c06f5f 100644 --- a/pandas/tests/extension/base/setitem.py +++ b/pandas/tests/extension/base/setitem.py @@ -133,6 +133,36 @@ def test_setitem_mask_broadcast(self, data, setter): assert ser[0] == data[10] assert ser[1] == data[10] + def test_setitem_boolean_na_mask(self, data, box_in_series): + # https://github.com/pandas-dev/pandas/issues/31503 + if box_in_series: + data = pd.Series(data) + + mask = pd.array(np.zeros(len(data), dtype=bool), dtype="boolean") + mask[:2] = pd.NA + mask[2:4] = True + original = data.copy() + + data[mask] = data[mask.fillna(False)] + + if box_in_series: + tm.assert_series_equal(data, original) + else: + tm.assert_extension_array_equal(data, original) + + def test_setitem_boolean_na_mask_frame(self, data): + # https://github.com/pandas-dev/pandas/issues/31503 + df = pd.DataFrame({"a": range(len(data)), "b": data}) + original = df.copy() + + mask = pd.array(np.zeros(len(data), dtype=bool), dtype="boolean") + mask[:2] = pd.NA + mask[2:4] = True + + df.loc[mask, "b"] = data[mask.fillna(False)] + + tm.assert_frame_equal(df, original) + def test_setitem_expand_columns(self, data): df = pd.DataFrame({"A": data}) result = df.copy() From c0e8dc72e2e78f1d4290ab60c583bc4b034c20f7 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Thu, 13 Feb 2020 10:35:29 -0600 Subject: [PATCH 53/58] Revert "Add some setitem tests" This reverts commit 5a72b2f9e9c99b133a6173e695c77da759fedd74. --- pandas/tests/extension/base/setitem.py | 30 -------------------------- 1 file changed, 30 deletions(-) diff --git a/pandas/tests/extension/base/setitem.py b/pandas/tests/extension/base/setitem.py index 79b0a37c06f5f..590bcd586900a 100644 --- a/pandas/tests/extension/base/setitem.py +++ b/pandas/tests/extension/base/setitem.py @@ -133,36 +133,6 @@ def test_setitem_mask_broadcast(self, data, setter): assert ser[0] == data[10] assert ser[1] == data[10] - def test_setitem_boolean_na_mask(self, data, box_in_series): - # https://github.com/pandas-dev/pandas/issues/31503 - if box_in_series: - data = pd.Series(data) - - mask = pd.array(np.zeros(len(data), dtype=bool), dtype="boolean") - mask[:2] = pd.NA - mask[2:4] = True - original = data.copy() - - data[mask] = data[mask.fillna(False)] - - if box_in_series: - tm.assert_series_equal(data, original) - else: - tm.assert_extension_array_equal(data, original) - - def test_setitem_boolean_na_mask_frame(self, data): - # https://github.com/pandas-dev/pandas/issues/31503 - df = pd.DataFrame({"a": range(len(data)), "b": data}) - original = df.copy() - - mask = pd.array(np.zeros(len(data), dtype=bool), dtype="boolean") - mask[:2] = pd.NA - mask[2:4] = True - - df.loc[mask, "b"] = data[mask.fillna(False)] - - tm.assert_frame_equal(df, original) - def test_setitem_expand_columns(self, data): df = pd.DataFrame({"A": data}) result = df.copy() From 607d9ed7e8c41b0607f87092d04f8506c97a3ea2 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Thu, 13 Feb 2020 11:09:04 -0600 Subject: [PATCH 54/58] Update setitem tests --- pandas/tests/extension/base/setitem.py | 18 ++---------------- pandas/tests/extension/test_numpy.py | 4 ---- 2 files changed, 2 insertions(+), 20 deletions(-) diff --git a/pandas/tests/extension/base/setitem.py b/pandas/tests/extension/base/setitem.py index af70799c0236e..b7bb9fd7d5a08 100644 --- a/pandas/tests/extension/base/setitem.py +++ b/pandas/tests/extension/base/setitem.py @@ -98,8 +98,9 @@ def test_setitem_iloc_scalar_multiple_homogoneous(self, data): [ np.array([True, True, True, False, False]), pd.array([True, True, True, False, False], dtype="boolean"), + pd.array([True, True, True, pd.NA, pd.NA], dtype="boolean"), ], - ids=["numpy-array", "boolean-array"], + ids=["numpy-array", "boolean-array", "boolean-array-na"], ) def test_setitem_mask(self, data, mask, box_in_series): arr = data[:5].copy() @@ -124,21 +125,6 @@ def test_setitem_mask_raises(self, data, box_in_series): with pytest.raises(IndexError, match="wrong length"): data[mask] = data[0] - def test_setitem_mask_boolean_array_raises(self, data, box_in_series): - # missing values in mask - mask = pd.array(np.zeros(data.shape, dtype="bool"), dtype="boolean") - mask[:2] = pd.NA - - if box_in_series: - data = pd.Series(data) - - msg = ( - "Cannot mask with a boolean indexer containing NA values|" - "cannot mask with array containing NA / NaN values" - ) - with pytest.raises(ValueError, match=msg): - data[mask] = data[0] - @pytest.mark.parametrize( "idx", [[0, 1, 2], pd.array([0, 1, 2], dtype="Int64"), np.array([0, 1, 2])], diff --git a/pandas/tests/extension/test_numpy.py b/pandas/tests/extension/test_numpy.py index 80a093530a8cd..61c5925383f88 100644 --- a/pandas/tests/extension/test_numpy.py +++ b/pandas/tests/extension/test_numpy.py @@ -415,10 +415,6 @@ def test_setitem_mask(self, data, mask, box_in_series): def test_setitem_mask_raises(self, data, box_in_series): super().test_setitem_mask_raises(data, box_in_series) - @skip_nested - def test_setitem_mask_boolean_array_raises(self, data, box_in_series): - super().test_setitem_mask_boolean_array_raises(data, box_in_series) - @skip_nested @pytest.mark.parametrize( "idx", From c6d23f68f1d732a1657f07ccd37ceb877ca61706 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Mon, 17 Feb 2020 11:55:35 -0600 Subject: [PATCH 55/58] Add setitem test --- pandas/tests/extension/base/setitem.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/pandas/tests/extension/base/setitem.py b/pandas/tests/extension/base/setitem.py index b7bb9fd7d5a08..a4fe89df158fa 100644 --- a/pandas/tests/extension/base/setitem.py +++ b/pandas/tests/extension/base/setitem.py @@ -125,6 +125,18 @@ def test_setitem_mask_raises(self, data, box_in_series): with pytest.raises(IndexError, match="wrong length"): data[mask] = data[0] + def test_setitem_mask_boolean_array_with_na(self, data, box_in_series): + mask = pd.array(np.zeros(data.shape, dtype="bool"), dtype="boolean") + mask[:3] = True + mask[3:5] = pd.NA + + if box_in_series: + data = pd.Series(data) + + data[mask] = data[0] + + assert (data[:3] == data[0]).all() + @pytest.mark.parametrize( "idx", [[0, 1, 2], pd.array([0, 1, 2], dtype="Int64"), np.array([0, 1, 2])], From fbda99d30496e9e3a2c037b24bf475ea046f2ecf Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Wed, 19 Feb 2020 09:04:53 -0600 Subject: [PATCH 56/58] Move whatsnew note --- doc/source/whatsnew/v1.0.2.rst | 48 ++++++++++++++++------------------ 1 file changed, 23 insertions(+), 25 deletions(-) diff --git a/doc/source/whatsnew/v1.0.2.rst b/doc/source/whatsnew/v1.0.2.rst index 093c8df1ba788..25fc9a4590fce 100644 --- a/doc/source/whatsnew/v1.0.2.rst +++ b/doc/source/whatsnew/v1.0.2.rst @@ -23,32 +23,8 @@ Fixed regressions .. --------------------------------------------------------------------------- -.. _whatsnew_102.bug_fixes: - -Bug fixes -~~~~~~~~~ - -**Categorical** - -- Fixed bug where :meth:`Categorical.from_codes` improperly raised a ``ValueError`` when passed nullable integer codes. (:issue:`31779`) -- Bug in :class:`Categorical` that would ignore or crash when calling :meth:`Series.replace` with a list-like ``to_replace`` (:issue:`31720`) - -**I/O** - -- Using ``pd.NA`` with :meth:`DataFrame.to_json` now correctly outputs a null value instead of an empty object (:issue:`31615`) -- Fixed bug in parquet roundtrip with nullable unsigned integer dtypes (:issue:`31896`). - - - -**Experimental dtypes** - -- Fix bug in :meth:`DataFrame.convert_dtypes` for columns that were already using the ``"string"`` dtype (:issue:`31731`). -- Fixed bug in setting values using a slice indexer with string dtype (:issue:`31772`) - -.. --------------------------------------------------------------------------- - Indexing with Nullable Boolean Arrays -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Previously indexing with a nullable Boolean array containing ``NA`` would raise a ``ValueError``, however this is now permitted with ``NA`` being treated as ``False``. (:issue:`31503`) @@ -74,6 +50,28 @@ Previously indexing with a nullable Boolean array containing ``NA`` would raise s[mask] +.. _whatsnew_102.bug_fixes: + +Bug fixes +~~~~~~~~~ + +**Categorical** + +- Fixed bug where :meth:`Categorical.from_codes` improperly raised a ``ValueError`` when passed nullable integer codes. (:issue:`31779`) +- Bug in :class:`Categorical` that would ignore or crash when calling :meth:`Series.replace` with a list-like ``to_replace`` (:issue:`31720`) + +**I/O** + +- Using ``pd.NA`` with :meth:`DataFrame.to_json` now correctly outputs a null value instead of an empty object (:issue:`31615`) +- Fixed bug in parquet roundtrip with nullable unsigned integer dtypes (:issue:`31896`). + +**Experimental dtypes** + +- Fix bug in :meth:`DataFrame.convert_dtypes` for columns that were already using the ``"string"`` dtype (:issue:`31731`). +- Fixed bug in setting values using a slice indexer with string dtype (:issue:`31772`) + +.. --------------------------------------------------------------------------- + .. _whatsnew_102.contributors: Contributors From 3bf932740a25ad51f825912a7c076418394a3183 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Wed, 19 Feb 2020 09:09:11 -0600 Subject: [PATCH 57/58] Add back example --- doc/source/user_guide/boolean.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/doc/source/user_guide/boolean.rst b/doc/source/user_guide/boolean.rst index d45bdc08e4a60..6370a523b9a0d 100644 --- a/doc/source/user_guide/boolean.rst +++ b/doc/source/user_guide/boolean.rst @@ -31,6 +31,12 @@ pandas allows indexing with ``NA`` values in a boolean array, which are treated mask = pd.array([True, False, pd.NA], dtype="boolean") s[mask] +If you would prefer to keep the ``NA`` values you can manually fill them with ``fillna(True)``. + +.. ipython:: python + + s[mask.fillna(True)] + .. _boolean.kleene: Kleene Logical Operations From 080d1d2129f101460af12b23d6f2b400e1a66e61 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Fri, 21 Feb 2020 13:44:06 -0600 Subject: [PATCH 58/58] Update comment --- pandas/core/indexing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index ab909a5792f66..5ae237eb7dc32 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -2191,7 +2191,7 @@ def check_bool_indexer(index: Index, key) -> np.ndarray: ) result = result.astype(bool)._values elif is_object_dtype(key): - # key might be sparse / object-dtype bool, check_array_indexer needs bool array + # key might be object-dtype bool, check_array_indexer needs bool array result = np.asarray(result, dtype=bool) result = check_array_indexer(index, result) else: