pandas-dev · jreback · Jun 20, 2018 · Jun 16, 2018 · Jun 17, 2018 · Jun 18, 2018
diff --git a/asv_bench/benchmarks/categoricals.py b/asv_bench/benchmarks/categoricals.py
@@ -202,7 +202,11 @@ class Contains(object):
     def setup(self):
         N = 10**5
         self.ci = tm.makeCategoricalIndex(N)
-        self.cat = self.ci.categories[0]
+        self.c = self.ci.values
+        self.key = self.ci.categories[0]
 
-    def time_contains(self):
-        self.cat in self.ci
+    def time_categorical_index_contains(self):
+        self.key in self.ci
+
+    def time_categorical_contains(self):
+        self.key in self.c
diff --git a/doc/source/whatsnew/v0.23.2.txt b/doc/source/whatsnew/v0.23.2.txt
@@ -26,7 +26,7 @@ Performance Improvements
 
 - Improved performance of membership checks in :class:`CategoricalIndex`
   (i.e. ``x in ci``-style checks are much faster). :meth:`CategoricalIndex.contains`
-  is likewise much faster (:issue:`21369`)
+  is likewise much faster (:issue:`21369`, :issue:`21508`)
 - Improved performance of :meth:`MultiIndex.is_unique` (:issue:`21522`)
 -
 

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
@@ -157,6 +157,57 @@ def _maybe_to_categorical(array):
     return array
 
 
+def contains(cat, key, container):
+    """
+    Helper for membership check for ``key`` in ``cat``.
+
+    This is a helper method for :method:`__contains__`
+    and :class:`CategoricalIndex.__contains__`.
+
+    Returns True if ``key`` is in ``cat.categories`` and the
+    location of ``key`` in ``categories`` is in ``container``.
+
+    Parameters
+    ----------
+    cat : :class:`Categorical`or :class:`categoricalIndex`
+    key : a hashable object
+        The key to check membership for.
+    container : Container (e.g. list-like or mapping)
+        The container to check for membership in.
+
+    Returns
+    -------
+    is_in : bool
+        True if ``key`` is in ``self.categories`` and location of
+        ``key`` in ``categories`` is in ``container``, else False.
+
+    Notes
+    -----
+    This method does not check for NaN values. Do that separately
+    before calling this method.
+    """
+    hash(key)
+
+    # get location of key in categories.
+    # If a KeyError, the key isn't in categories, so logically
+    #  can't be in container either.
+    try:
+        loc = cat.categories.get_loc(key)
+    except KeyError:
+        return False
+
+    # loc is the location of key in categories, but also the *value*
+    # for key in container. So, `key` may be in categories,
+    # but still not in `container`. Example ('b' in categories,
+    # but not in values):
+    # 'b' in Categorical(['a'], categories=['a', 'b'])  # False
+    if is_scalar(loc):
+        return loc in container
+    else:
+        # if categories is an IntervalIndex, loc is an array.
+        return any(loc_ in container for loc_ in loc)
+
+
 _codes_doc = """The category codes of this categorical.
 
 Level codes are an array if integer which are the positions of the real
@@ -1846,6 +1897,14 @@ def __iter__(self):
         """Returns an Iterator over the values of this Categorical."""
         return iter(self.get_values().tolist())
 
+    def __contains__(self, key):
+        """Returns True if `key` is in this Categorical."""
+        # if key is a NaN, check if any NaN is in self.
+        if isna(key):
+            return self.isna().any()
+
+        return contains(self, key, container=self._codes)
+
     def _tidy_repr(self, max_vals=10, footer=True):
         """ a short repr displaying only max_vals and an optional (but default
         footer)

diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py
@@ -24,6 +24,7 @@
 import pandas.core.common as com
 import pandas.core.missing as missing
 import pandas.core.indexes.base as ibase
+from pandas.core.arrays.categorical import Categorical, contains
 
 _index_doc_kwargs = dict(ibase._index_doc_kwargs)
 _index_doc_kwargs.update(dict(target_klass='CategoricalIndex'))
@@ -125,7 +126,6 @@ def _create_from_codes(self, codes, categories=None, ordered=None,
         CategoricalIndex
         """
 
-        from pandas.core.arrays import Categorical
         if categories is None:
             categories = self.categories
         if ordered is None:
@@ -162,7 +162,6 @@ def _create_categorical(self, data, categories=None, ordered=None,
         if not isinstance(data, ABCCategorical):
             if ordered is None and dtype is None:
                 ordered = False
-            from pandas.core.arrays import Categorical
             data = Categorical(data, categories=categories, ordered=ordered,
                                dtype=dtype)
         else:
@@ -323,32 +322,14 @@ def _reverse_indexer(self):
 
     @Appender(_index_shared_docs['__contains__'] % _index_doc_kwargs)
     def __contains__(self, key):
-        hash(key)
-
-        if isna(key):  # if key is a NaN, check if any NaN is in self.
+        # if key is a NaN, check if any NaN is in self.
+        if isna(key):
             return self.hasnans
 
-        # is key in self.categories? Then get its location.
-        # If not (i.e. KeyError), it logically can't be in self either
-        try:
-            loc = self.categories.get_loc(key)
-        except KeyError:
-            return False
-
-        # loc is the location of key in self.categories, but also the value
-        # for key in self.codes and in self._engine. key may be in categories,
-        # but still not in self, check this. Example:
-        # 'b' in CategoricalIndex(['a'], categories=['a', 'b']) #  False
-        if is_scalar(loc):
-            return loc in self._engine
-        else:
-            # if self.categories is IntervalIndex, loc is an array
-            # check if any scalar of the array is in self._engine
-            return any(loc_ in self._engine for loc_ in loc)
+        return contains(self, key, container=self._engine)
 
     @Appender(_index_shared_docs['contains'] % _index_doc_kwargs)
     def contains(self, key):
-        hash(key)
         return key in self
 
     def __array__(self, dtype=None):
@@ -479,7 +460,6 @@ def where(self, cond, other=None):
             other = self._na_value
         values = np.where(cond, self.values, other)
 
-        from pandas.core.arrays import Categorical
         cat = Categorical(values,
                           categories=self.categories,
                           ordered=self.ordered)
@@ -862,7 +842,6 @@ def _delegate_method(self, name, *args, **kwargs):
     def _add_accessors(cls):
         """ add in Categorical accessor methods """
 
-        from pandas.core.arrays import Categorical
         CategoricalIndex._add_delegate_accessors(
             delegate=Categorical, accessors=["rename_categories",
                                              "reorder_categories",

diff --git a/pandas/tests/categorical/test_operators.py b/pandas/tests/categorical/test_operators.py
@@ -291,3 +291,20 @@ def test_numeric_like_ops(self):
 
         # invalid ufunc
         pytest.raises(TypeError, lambda: np.log(s))
+
+    def test_contains(self):
+        # GH21508
+        c = pd.Categorical(list('aabbca'), categories=list('cab'))
+
+        assert 'b' in c
+        assert 'z' not in c
+        assert np.nan not in c
+        with pytest.raises(TypeError):
+            assert [1] in c
+
+        # assert codes NOT in index
+        assert 0 not in c
+        assert 1 not in c
+
+        c = pd.Categorical(list('aabbca') + [np.nan], categories=list('cab'))
+        assert np.nan in c