pandas-dev · TomAugspurger · Dec 13, 2019
diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py
@@ -24,6 +24,7 @@
 )
 from pandas.core.dtypes.dtypes import register_extension_dtype
 from pandas.core.dtypes.generic import ABCDataFrame, ABCIndexClass, ABCSeries
+from pandas.core.dtypes.inference import is_array_like
 from pandas.core.dtypes.missing import isna, notna
 
 from pandas.core import nanops, ops
@@ -294,6 +295,14 @@ def __getitem__(self, item):
             if self._mask[item]:
                 return self.dtype.na_value
             return self._data[item]
+        elif is_array_like(item) and is_bool_dtype(item.dtype):
+            if isinstance(item, BooleanArray):
+                # items, mask = item._data, item._mask
+                take = item._data | item._mask
+                result = self._data[take]
+                # output masked anywhere where self is masked, or the input was masked
+                omask = (self._mask | item._mask)[take]
+                return type(self)(result, omask)
         return type(self)(self._data[item], self._mask[item])
 
     def _coerce_to_ndarray(self, dtype=None, na_value: "Scalar" = libmissing.NA):

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
@@ -46,6 +46,7 @@
 from pandas.core.accessor import PandasDelegate, delegate_names
 import pandas.core.algorithms as algorithms
 from pandas.core.algorithms import _get_data_algo, factorize, take, take_1d, unique1d
+from pandas.core.arrays import BooleanArray
 from pandas.core.base import NoNewAttributesMixin, PandasObject, _shared_docs
 import pandas.core.common as com
 from pandas.core.construction import array, extract_array, sanitize_array
@@ -1996,10 +1997,14 @@ def __getitem__(self, key):
                 return np.nan
             else:
                 return self.categories[i]
+        elif com.is_bool_indexer(key) and isinstance(key, BooleanArray):
+            take = key._data | key._mask
+            values = self._codes[take]
+            values[key._mask[take]] = -1
         else:
-            return self._constructor(
-                values=self._codes[key], dtype=self.dtype, fastpath=True
-            )
+            values = self._codes[key]
+
+        return self._constructor(values=values, dtype=self.dtype, fastpath=True)
 
     def __setitem__(self, key, value):
         """

diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py
@@ -40,6 +40,7 @@
 from pandas._typing import DatetimeLikeScalar
 from pandas.core import missing, nanops
 from pandas.core.algorithms import checked_add_with_arr, take, unique1d, value_counts
+from pandas.core.arrays import BooleanArray
 import pandas.core.common as com
 from pandas.core.ops.common import unpack_zerodim_and_defer
 from pandas.core.ops.invalid import make_invalid_op
@@ -415,8 +416,15 @@ def __getitem__(self, key):
             val = getitem(key)
             return self._box_func(val)
 
+        mask = None
         if com.is_bool_indexer(key):
-            key = np.asarray(key, dtype=bool)
+            if isinstance(key, BooleanArray):
+                # TODO: Handle all boolean indexers.
+                mask = key._mask
+                key = key._data | mask
+                mask = mask[key]
+            else:
+                key = np.asarray(key, dtype=bool)
             if key.all():
                 key = slice(0, None, None)
             else:
@@ -438,6 +446,10 @@ def __getitem__(self, key):
                 freq = self.freq
 
         result = getitem(key)
+        if mask is not None:
+            # TODO: Check that we've copied!
+            result[mask] = iNaT
+
         if result.ndim > 1:
             # To support MPL which performs slicing with 2 dim
             # even though it only has 1 dim by definition

diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py
@@ -21,6 +21,7 @@
     is_scalar,
 )
 from pandas.core.dtypes.dtypes import register_extension_dtype
+from pandas.core.dtypes.inference import is_array_like
 from pandas.core.dtypes.missing import isna, notna
 
 from pandas.core import nanops, ops
@@ -366,10 +367,20 @@ def fmt(x):
         return fmt
 
     def __getitem__(self, item):
+        from pandas.core.arrays import BooleanArray
+
         if is_integer(item):
             if self._mask[item]:
                 return self.dtype.na_value
             return self._data[item]
+        elif is_array_like(item) and is_bool_dtype(item.dtype):
+            if isinstance(item, BooleanArray):
+                # items, mask = item._data, item._mask
+                take = item._data | item._mask
+                result = self._data[take]
+                # output masked anywhere where self is masked, or the input was masked
+                omask = (self._mask | item._mask)[take]
+                return type(self)(result, omask)
         return type(self)(self._data[item], self._mask[item])
 
     def _coerce_to_ndarray(self):

diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py
@@ -13,7 +13,8 @@
 
 from pandas import compat
 from pandas.core import ops
-from pandas.core.arrays import PandasArray
+from pandas.core.arrays import BooleanArray, PandasArray
+import pandas.core.common as com
 from pandas.core.construction import extract_array
 from pandas.core.missing import isna
 
@@ -232,6 +233,19 @@ def __setitem__(self, key, value):
 
         super().__setitem__(key, value)
 
+    def __getitem__(self, item):
+        # Doing this here, as PandasArray.__getitem__ can't guarantee dtype stability
+        # when getting with a boolean mask.
+        if com.is_bool_indexer(item):
+            if isinstance(item, BooleanArray):
+                # items, mask = item._data, item._mask
+                take = item._data | item._mask
+                result = self[take]
+                # That copies, right?
+                result[item._mask[take]] = self.dtype.na_value
+                return result
+        return super().__getitem__(item)
+
     def fillna(self, value=None, method=None, limit=None):
         # TODO: validate dtype
         return super().fillna(value, method, limit)

diff --git a/pandas/core/common.py b/pandas/core/common.py
@@ -132,9 +132,9 @@ def is_bool_indexer(key: Any) -> bool:
         elif is_bool_dtype(key.dtype):
             # an ndarray with bool-dtype by definition has no missing values.
             # So we only need to check for NAs in ExtensionArrays
-            if is_extension_array_dtype(key.dtype):
-                if np.any(key.isna()):
-                    raise ValueError(na_msg)
+            # if is_extension_array_dtype(key.dtype):
+            #     if np.any(key.isna()):
+            #         raise ValueError(na_msg)
             return True
     elif isinstance(key, list):
         try:

diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
@@ -65,7 +65,7 @@
 from pandas.core import ops
 from pandas.core.accessor import CachedAccessor
 import pandas.core.algorithms as algos
-from pandas.core.arrays import ExtensionArray
+from pandas.core.arrays import BooleanArray, ExtensionArray
 from pandas.core.base import IndexOpsMixin, PandasObject
 import pandas.core.common as com
 from pandas.core.construction import extract_array
@@ -4014,7 +4014,11 @@ def __getitem__(self, key):
             return promote(getitem(key))
 
         if com.is_bool_indexer(key):
-            key = np.asarray(key, dtype=bool)
+            if isinstance(key, BooleanArray):
+                # TODO: Handle all boolean indexers.
+                key = key._data | key._mask
+            else:
+                key = np.asarray(key, dtype=bool)
 
         key = com.values_from_object(key)
         result = getitem(key)

diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
@@ -8,6 +8,7 @@
 from pandas.util._decorators import Appender
 
 from pandas.core.dtypes.common import (
+    is_extension_array_dtype,
     is_float,
     is_integer,
     is_iterator,
@@ -19,6 +20,7 @@
 )
 from pandas.core.dtypes.concat import concat_compat
 from pandas.core.dtypes.generic import ABCDataFrame, ABCMultiIndex, ABCSeries
+from pandas.core.dtypes.inference import is_array_like
 from pandas.core.dtypes.missing import _infer_fill_value, isna
 
 import pandas.core.common as com
@@ -2309,6 +2311,7 @@ def check_bool_indexer(index: Index, key) -> np.ndarray:
         If the index of the key is unalignable to index.
     """
     result = key
+    mask = None
     if isinstance(key, ABCSeries) and not key.index.equals(index):
         result = result.reindex(index)
         mask = isna(result._values)
@@ -2319,6 +2322,9 @@ def check_bool_indexer(index: Index, key) -> np.ndarray:
                 "the indexed object do not match)."
             )
         result = result.astype(bool)._values
+    elif is_array_like(result) and is_extension_array_dtype(result.dtype):
+        assert result.dtype.kind == "b"
+        mask = isna(result)
     else:
         if is_sparse(result):
             result = result.to_dense()
@@ -2330,7 +2336,7 @@ def check_bool_indexer(index: Index, key) -> np.ndarray:
                 "Item wrong length {} instead of {}.".format(len(result), len(index))
             )
 
-    return result
+    return result, mask
 
 
 def convert_missing_indexer(indexer):

diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
@@ -291,7 +291,7 @@ def __setstate__(self, state):
         self.values = state[1]
         self.ndim = self.values.ndim
 
-    def _slice(self, slicer):
+    def _slice(self, slicer, mask=None):
         """ return a slice of my values """
         return self.values[slicer]
 
@@ -1810,7 +1810,7 @@ def _can_hold_element(self, element: Any) -> bool:
         # We're doing the same as CategoricalBlock here.
         return True
 
-    def _slice(self, slicer):
+    def _slice(self, slicer, mask=None):
         """ return a slice of my values """
 
         # slice the category
@@ -2307,7 +2307,7 @@ def to_dense(self):
         # expects that behavior.
         return np.asarray(self.values, dtype=_NS_DTYPE)
 
-    def _slice(self, slicer):
+    def _slice(self, slicer, mask=None):
         """ return a slice of my values """
         if isinstance(slicer, tuple):
             col, loc = slicer

diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
@@ -709,7 +709,7 @@ def combine(self, blocks, copy=True):
 
         return type(self)(new_blocks, axes, do_integrity_check=False)
 
-    def get_slice(self, slobj, axis=0):
+    def get_slice(self, slobj, axis=0, mask=None):
         if axis >= self.ndim:
             raise IndexError("Requested axis not found in manager")
 
@@ -1505,11 +1505,13 @@ def _blklocs(self):
         """ compat with BlockManager """
         return None
 
-    def get_slice(self, slobj, axis=0):
+    def get_slice(self, slobj, axis=0, mask=None):
         if axis >= self.ndim:
             raise IndexError("Requested axis not found in manager")
 
-        return type(self)(self._block._slice(slobj), self.index[slobj], fastpath=True)
+        return type(self)(
+            self._block._slice(slobj, mask=mask), self.index[slobj], fastpath=True
+        )
 
     @property
     def index(self):

diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -892,11 +892,13 @@ def __getitem__(self, key):
             key = list(key)
 
         if com.is_bool_indexer(key):
-            key = check_bool_indexer(self.index, key)
+            key, mask = check_bool_indexer(self.index, key)
+        else:
+            mask = None
 
-        return self._get_with(key)
+        return self._get_with(key, mask)
 
-    def _get_with(self, key):
+    def _get_with(self, key, mask=None):
         # other: fancy integer or otherwise
         if isinstance(key, slice):
             return self._slice(key)
@@ -917,12 +919,13 @@ def _get_with(self, key):
                         return self._get_values(key)
                 raise
 
-        if not isinstance(key, (list, np.ndarray, Series, Index)):
+        if not is_list_like(key):
             key = list(key)
 
         if isinstance(key, Index):
             key_type = key.inferred_type
         else:
+            # TODO: why not use key.dtype?
             key_type = lib.infer_dtype(key, skipna=False)
 
         if key_type == "integer":
@@ -931,7 +934,7 @@ def _get_with(self, key):
             else:
                 return self._get_values(key)
         elif key_type == "boolean":
-            return self._get_values(key)
+            return self._get_values(key, mask)
 
         if isinstance(key, (list, tuple)):
             # TODO: de-dup with tuple case handled above?
@@ -960,10 +963,10 @@ def _get_values_tuple(self, key):
             self
         )
 
-    def _get_values(self, indexer):
+    def _get_values(self, indexer, mask=None):
         try:
             return self._constructor(
-                self._data.get_slice(indexer), fastpath=True
+                self._data.get_slice(indexer, mask=mask), fastpath=True
             ).__finalize__(self)
         except ValueError:
             # mpl compat if we look up e.g. ser[:, np.newaxis];

diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py
@@ -2013,6 +2013,7 @@ def test_index_single_double_tuples(self, tpl):
         tm.assert_frame_equal(result, expected)
 
     def test_boolean_indexing(self):
+        # TODO: parametrize
         idx = list(range(3))
         cols = ["A", "B", "C"]
         df1 = DataFrame(
@@ -2036,6 +2037,7 @@ def test_boolean_indexing(self):
             df1[df1.index[:-1] > 2] = -1
 
     def test_boolean_indexing_mixed(self):
+        # TODO: parametrize?
         df = DataFrame(
             {
                 0: {35: np.nan, 40: np.nan, 43: np.nan, 49: np.nan, 50: np.nan},