diff --git a/pandas/tests/series/test_api.py b/pandas/tests/series/test_api.py index f944d6f8c9d08..65f5c59deba36 100644 --- a/pandas/tests/series/test_api.py +++ b/pandas/tests/series/test_api.py @@ -602,82 +602,6 @@ def f(): ordered=True)) tm.assert_series_equal(result, expected) - def test_str_accessor_api_for_categorical(self): - # https://github.com/pandas-dev/pandas/issues/10661 - from pandas.core.strings import StringMethods - s = Series(list('aabb')) - s = s + " " + s - c = s.astype('category') - assert isinstance(c.str, StringMethods) - - # str functions, which need special arguments - special_func_defs = [ - ('cat', (list("zyxw"),), {"sep": ","}), - ('center', (10,), {}), - ('contains', ("a",), {}), - ('count', ("a",), {}), - ('decode', ("UTF-8",), {}), - ('encode', ("UTF-8",), {}), - ('endswith', ("a",), {}), - ('extract', ("([a-z]*) ",), {"expand": False}), - ('extract', ("([a-z]*) ",), {"expand": True}), - ('extractall', ("([a-z]*) ",), {}), - ('find', ("a",), {}), - ('findall', ("a",), {}), - ('index', (" ",), {}), - ('ljust', (10,), {}), - ('match', ("a"), {}), # deprecated... - ('normalize', ("NFC",), {}), - ('pad', (10,), {}), - ('partition', (" ",), {"expand": False}), # not default - ('partition', (" ",), {"expand": True}), # default - ('repeat', (3,), {}), - ('replace', ("a", "z"), {}), - ('rfind', ("a",), {}), - ('rindex', (" ",), {}), - ('rjust', (10,), {}), - ('rpartition', (" ",), {"expand": False}), # not default - ('rpartition', (" ",), {"expand": True}), # default - ('slice', (0, 1), {}), - ('slice_replace', (0, 1, "z"), {}), - ('split', (" ",), {"expand": False}), # default - ('split', (" ",), {"expand": True}), # not default - ('startswith', ("a",), {}), - ('wrap', (2,), {}), - ('zfill', (10,), {}) - ] - _special_func_names = [f[0] for f in special_func_defs] - - # * get, join: they need a individual elements of type lists, but - # we can't make a categorical with lists as individual categories. - # -> `s.str.split(" ").astype("category")` will error! - # * `translate` has different interfaces for py2 vs. py3 - _ignore_names = ["get", "join", "translate"] - - str_func_names = [f for f in dir(s.str) if not ( - f.startswith("_") or - f in _special_func_names or - f in _ignore_names)] - - func_defs = [(f, (), {}) for f in str_func_names] - func_defs.extend(special_func_defs) - - for func, args, kwargs in func_defs: - res = getattr(c.str, func)(*args, **kwargs) - exp = getattr(s.str, func)(*args, **kwargs) - - if isinstance(res, DataFrame): - tm.assert_frame_equal(res, exp) - else: - tm.assert_series_equal(res, exp) - - invalid = Series([1, 2, 3]).astype('category') - msg = "Can only use .str accessor with string" - - with pytest.raises(AttributeError, match=msg): - invalid.str - assert not hasattr(invalid, 'str') - def test_dt_accessor_api_for_categorical(self): # https://github.com/pandas-dev/pandas/issues/10661 from pandas.core.indexes.accessors import Properties diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index c0aab5d25e3fe..bfabaa7a1069a 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -26,6 +26,98 @@ def assert_series_or_index_equal(left, right): assert_index_equal(left, right) +_any_string_method = [ + ('cat', (), {'sep': ','}), # noqa: E241 + ('cat', (Series(list('zyx')),), {'sep': ',', # noqa: E241 + 'join': 'left'}), + ('center', (10,), {}), # noqa: E241 + ('contains', ('a',), {}), # noqa: E241 + ('count', ('a',), {}), # noqa: E241 + ('decode', ('UTF-8',), {}), # noqa: E241 + ('encode', ('UTF-8',), {}), # noqa: E241 + ('endswith', ('a',), {}), # noqa: E241 + ('extract', ('([a-z]*)',), {'expand': False}), # noqa: E241 + ('extract', ('([a-z]*)',), {'expand': True}), # noqa: E241 + ('extractall', ('([a-z]*)',), {}), # noqa: E241 + ('find', ('a',), {}), # noqa: E241 + ('findall', ('a',), {}), # noqa: E241 + ('get', (0,), {}), # noqa: E241 + # because "index" (and "rindex") fail intentionally + # if the string is not found, search only for empty string + ('index', ('',), {}), # noqa: E241 + ('join', (',',), {}), # noqa: E241 + ('ljust', (10,), {}), # noqa: E241 + ('match', ('a',), {}), # noqa: E241 + ('normalize', ('NFC',), {}), # noqa: E241 + ('pad', (10,), {}), # noqa: E241 + ('partition', (' ',), {'expand': False}), # noqa: E241 + ('partition', (' ',), {'expand': True}), # noqa: E241 + ('repeat', (3,), {}), # noqa: E241 + ('replace', ('a', 'z',), {}), # noqa: E241 + ('rfind', ('a',), {}), # noqa: E241 + ('rindex', ('',), {}), # noqa: E241 + ('rjust', (10,), {}), # noqa: E241 + ('rpartition', (' ',), {'expand': False}), # noqa: E241 + ('rpartition', (' ',), {'expand': True}), # noqa: E241 + ('slice', (0, 1,), {}), # noqa: E241 + ('slice_replace', (0, 1, 'z',), {}), # noqa: E241 + ('split', (' ',), {'expand': False}), # noqa: E241 + ('split', (' ',), {'expand': True}), # noqa: E241 + ('startswith', ('a',), {}), # noqa: E241 + # translating unicode points of "a" to "d" + ('translate', ({97: 100},), {}), # noqa: E241 + ('wrap', (2,), {}), # noqa: E241 + ('zfill', (10,), {}) # noqa: E241 +] + list(zip([ + # methods without positional arguments: zip with empty tuple and empty dict + 'capitalize', 'cat', 'get_dummies', + 'isalnum', 'isalpha', 'isdecimal', + 'isdigit', 'islower', 'isnumeric', + 'isspace', 'istitle', 'isupper', + 'len', 'lower', 'lstrip', 'partition', + 'rpartition', 'rsplit', 'rstrip', + 'slice', 'slice_replace', 'split', + 'strip', 'swapcase', 'title', 'upper' +], [()] * 100, [{}] * 100)) +ids, _, _ = zip(*_any_string_method) # use method name as fixture-id + + +# test that the above list captures all methods of StringMethods +missing_methods = {f for f in dir(strings.StringMethods) + if not f.startswith('_')} - set(ids) +assert not missing_methods + + +@pytest.fixture(params=_any_string_method, ids=ids) +def any_string_method(request): + """ + Fixture for all public methods of `StringMethods` + + This fixture returns a tuple of the method name and sample arguments + necessary to call the method. + + Returns + ------- + method_name : str + The name of the method in `StringMethods` + args : tuple + Sample values for the positional arguments + kwargs : dict + Sample values for the keyword arguments + + Examples + -------- + >>> def test_something(any_string_method): + ... s = pd.Series(['a', 'b', np.nan, 'd']) + ... + ... method_name, args, kwargs = any_string_method + ... method = getattr(s.str, method_name) + ... # will not raise + ... method(*args, **kwargs) + """ + return request.param + + class TestStringMethods(object): def test_api(self): @@ -40,6 +132,26 @@ def test_api(self): invalid.str assert not hasattr(invalid, 'str') + def test_api_for_categorical(self, any_string_method): + # https://github.com/pandas-dev/pandas/issues/10661 + s = Series(list('aabb')) + s = s + " " + s + c = s.astype('category') + assert isinstance(c.str, strings.StringMethods) + + method_name, args, kwargs = any_string_method + + result = getattr(c.str, method_name)(*args, **kwargs) + expected = getattr(s.str, method_name)(*args, **kwargs) + + if isinstance(result, DataFrame): + tm.assert_frame_equal(result, expected) + elif isinstance(result, Series): + tm.assert_series_equal(result, expected) + else: + # str.cat(others=None) returns string, for example + assert result == expected + def test_iter(self): # GH3638 strs = 'google', 'wikimedia', 'wikipedia', 'wikitravel'