Skip to content

Commit

Permalink
BUG: fixed .str.contains(..., na=False) for categorical series (#22170)
Browse files Browse the repository at this point in the history
  • Loading branch information
pulkitmaloo authored and jreback committed Nov 20, 2018
1 parent 71ba5bf commit 2af56d4
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 11 deletions.
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v0.24.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1280,7 +1280,7 @@ Strings

- Bug in :meth:`Index.str.partition` was not nan-safe (:issue:`23558`).
- Bug in :meth:`Index.str.split` was not nan-safe (:issue:`23677`).
-
- Bug :func:`Series.str.contains` not respecting the ``na`` argument for a ``Categorical`` dtype ``Series`` (:issue:`22158`)

Interval
^^^^^^^^
Expand Down
9 changes: 5 additions & 4 deletions pandas/core/strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -1857,7 +1857,7 @@ def __iter__(self):
g = self.get(i)

def _wrap_result(self, result, use_codes=True,
name=None, expand=None):
name=None, expand=None, fill_value=np.nan):

from pandas.core.index import Index, MultiIndex

Expand All @@ -1867,7 +1867,8 @@ def _wrap_result(self, result, use_codes=True,
# so make it possible to skip this step as the method already did this
# before the transformation...
if use_codes and self._is_categorical:
result = take_1d(result, self._orig.cat.codes)
result = take_1d(result, self._orig.cat.codes,
fill_value=fill_value)

if not hasattr(result, 'ndim') or not hasattr(result, 'dtype'):
return result
Expand Down Expand Up @@ -2520,12 +2521,12 @@ def join(self, sep):
def contains(self, pat, case=True, flags=0, na=np.nan, regex=True):
result = str_contains(self._parent, pat, case=case, flags=flags, na=na,
regex=regex)
return self._wrap_result(result)
return self._wrap_result(result, fill_value=na)

@copy(str_match)
def match(self, pat, case=True, flags=0, na=np.nan):
result = str_match(self._parent, pat, case=case, flags=flags, na=na)
return self._wrap_result(result)
return self._wrap_result(result, fill_value=na)

@copy(str_replace)
def replace(self, pat, repl, n=-1, case=None, flags=0, regex=True):
Expand Down
30 changes: 24 additions & 6 deletions pandas/tests/test_strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -512,10 +512,28 @@ def test_contains(self):
assert result.dtype == np.bool_
tm.assert_numpy_array_equal(result, expected)

# na
values = Series(['om', 'foo', np.nan])
res = values.str.contains('foo', na="foo")
assert res.loc[2] == "foo"
def test_contains_for_object_category(self):
# gh 22158

# na for category
values = Series(["a", "b", "c", "a", np.nan], dtype="category")
result = values.str.contains('a', na=True)
expected = Series([True, False, False, True, True])
tm.assert_series_equal(result, expected)

result = values.str.contains('a', na=False)
expected = Series([True, False, False, True, False])
tm.assert_series_equal(result, expected)

# na for objects
values = Series(["a", "b", "c", "a", np.nan])
result = values.str.contains('a', na=True)
expected = Series([True, False, False, True, True])
tm.assert_series_equal(result, expected)

result = values.str.contains('a', na=False)
expected = Series([True, False, False, True, False])
tm.assert_series_equal(result, expected)

def test_startswith(self):
values = Series(['om', NA, 'foo_nom', 'nom', 'bar_foo', NA, 'foo'])
Expand Down Expand Up @@ -2893,7 +2911,7 @@ def test_get_complex_nested(self, to_type):
expected = Series([np.nan])
tm.assert_series_equal(result, expected)

def test_more_contains(self):
def test_contains_moar(self):
# PR #1179
s = Series(['A', 'B', 'C', 'Aaba', 'Baca', '', NA,
'CABA', 'dog', 'cat'])
Expand Down Expand Up @@ -2943,7 +2961,7 @@ def test_contains_nan(self):
expected = Series([np.nan, np.nan, np.nan], dtype=np.object_)
assert_series_equal(result, expected)

def test_more_replace(self):
def test_replace_moar(self):
# PR #1179
s = Series(['A', 'B', 'C', 'Aaba', 'Baca', '', NA, 'CABA',
'dog', 'cat'])
Expand Down

0 comments on commit 2af56d4

Please sign in to comment.