Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

Already on GitHub? Sign in to your account

BUG, TST: Fix pandas.core.strings.str_contains when handling regex=False and case=False #7505

Merged
merged 2 commits into from Jun 30, 2014
Jump to file or symbol
Failed to load files and symbols.
+18 −1
Split
View
@@ -249,3 +249,4 @@ Bug Fixes
- Bug in non-monotonic ``Index.union`` may preserve ``name`` incorrectly (:issue:`7458`)
- Bug in ``DatetimeIndex.intersection`` doesn't preserve timezone (:issue:`4690`)
+- Bug in ``pandas.core.strings.str_contains`` does not properly match in a case insensitive fashion when ``regex=False`` and ``case=False`` (:issue:`7505`)
View
@@ -189,7 +189,12 @@ def str_contains(arr, pat, case=True, flags=0, na=np.nan, regex=True):
f = lambda x: bool(regex.search(x))
else:
- f = lambda x: pat in x
+ if case:
+ f = lambda x: pat in x
+ else:
+ upper_pat = pat.upper()
+ f = lambda x: upper_pat in x
+ return _na_map(f, str_upper(arr), na, dtype=bool)
return _na_map(f, arr, na, dtype=bool)
@@ -189,6 +189,17 @@ def test_contains(self):
self.assertEqual(result.dtype, np.bool_)
tm.assert_almost_equal(result, expected)
+ # case insensitive using regex
+ values = ['Foo', 'xYz', 'fOOomMm__fOo', 'MMM_']
+ result = strings.str_contains(values, 'FOO|mmm', case=False)
+ expected = [True, False, True, True]
+ tm.assert_almost_equal(result, expected)
+
+ # case insensitive without regex
+ result = strings.str_contains(values, 'foo', regex=False, case=False)
+ expected = [True, False, True, False]
+ tm.assert_almost_equal(result, expected)
+
# mixed
mixed = ['a', NA, 'b', True, datetime.today(), 'foo', None, 1, 2.]
rs = strings.str_contains(mixed, 'o')