Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

TST: move .str-test to strings.py & parametrize it; precursor to #23582 #23777

Merged
merged 1 commit into from
Nov 20, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
76 changes: 0 additions & 76 deletions pandas/tests/series/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -602,82 +602,6 @@ def f():
ordered=True))
tm.assert_series_equal(result, expected)

def test_str_accessor_api_for_categorical(self):
# https://github.com/pandas-dev/pandas/issues/10661
from pandas.core.strings import StringMethods
s = Series(list('aabb'))
s = s + " " + s
c = s.astype('category')
assert isinstance(c.str, StringMethods)

# str functions, which need special arguments
special_func_defs = [
('cat', (list("zyxw"),), {"sep": ","}),
('center', (10,), {}),
('contains', ("a",), {}),
('count', ("a",), {}),
('decode', ("UTF-8",), {}),
('encode', ("UTF-8",), {}),
('endswith', ("a",), {}),
('extract', ("([a-z]*) ",), {"expand": False}),
('extract', ("([a-z]*) ",), {"expand": True}),
('extractall', ("([a-z]*) ",), {}),
('find', ("a",), {}),
('findall', ("a",), {}),
('index', (" ",), {}),
('ljust', (10,), {}),
('match', ("a"), {}), # deprecated...
('normalize', ("NFC",), {}),
('pad', (10,), {}),
('partition', (" ",), {"expand": False}), # not default
('partition', (" ",), {"expand": True}), # default
('repeat', (3,), {}),
('replace', ("a", "z"), {}),
('rfind', ("a",), {}),
('rindex', (" ",), {}),
('rjust', (10,), {}),
('rpartition', (" ",), {"expand": False}), # not default
('rpartition', (" ",), {"expand": True}), # default
('slice', (0, 1), {}),
('slice_replace', (0, 1, "z"), {}),
('split', (" ",), {"expand": False}), # default
('split', (" ",), {"expand": True}), # not default
('startswith', ("a",), {}),
('wrap', (2,), {}),
('zfill', (10,), {})
]
_special_func_names = [f[0] for f in special_func_defs]

# * get, join: they need a individual elements of type lists, but
# we can't make a categorical with lists as individual categories.
# -> `s.str.split(" ").astype("category")` will error!
# * `translate` has different interfaces for py2 vs. py3
_ignore_names = ["get", "join", "translate"]

str_func_names = [f for f in dir(s.str) if not (
f.startswith("_") or
f in _special_func_names or
f in _ignore_names)]

func_defs = [(f, (), {}) for f in str_func_names]
func_defs.extend(special_func_defs)

for func, args, kwargs in func_defs:
res = getattr(c.str, func)(*args, **kwargs)
exp = getattr(s.str, func)(*args, **kwargs)

if isinstance(res, DataFrame):
tm.assert_frame_equal(res, exp)
else:
tm.assert_series_equal(res, exp)

invalid = Series([1, 2, 3]).astype('category')
msg = "Can only use .str accessor with string"

with pytest.raises(AttributeError, match=msg):
invalid.str
assert not hasattr(invalid, 'str')

def test_dt_accessor_api_for_categorical(self):
# https://github.com/pandas-dev/pandas/issues/10661
from pandas.core.indexes.accessors import Properties
Expand Down
112 changes: 112 additions & 0 deletions pandas/tests/test_strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,98 @@ def assert_series_or_index_equal(left, right):
assert_index_equal(left, right)


_any_string_method = [
('cat', (), {'sep': ','}), # noqa: E241
('cat', (Series(list('zyx')),), {'sep': ',', # noqa: E241
'join': 'left'}),
('center', (10,), {}), # noqa: E241
('contains', ('a',), {}), # noqa: E241
('count', ('a',), {}), # noqa: E241
('decode', ('UTF-8',), {}), # noqa: E241
('encode', ('UTF-8',), {}), # noqa: E241
('endswith', ('a',), {}), # noqa: E241
('extract', ('([a-z]*)',), {'expand': False}), # noqa: E241
('extract', ('([a-z]*)',), {'expand': True}), # noqa: E241
('extractall', ('([a-z]*)',), {}), # noqa: E241
('find', ('a',), {}), # noqa: E241
('findall', ('a',), {}), # noqa: E241
('get', (0,), {}), # noqa: E241
# because "index" (and "rindex") fail intentionally
# if the string is not found, search only for empty string
('index', ('',), {}), # noqa: E241
('join', (',',), {}), # noqa: E241
('ljust', (10,), {}), # noqa: E241
('match', ('a',), {}), # noqa: E241
('normalize', ('NFC',), {}), # noqa: E241
('pad', (10,), {}), # noqa: E241
('partition', (' ',), {'expand': False}), # noqa: E241
('partition', (' ',), {'expand': True}), # noqa: E241
('repeat', (3,), {}), # noqa: E241
('replace', ('a', 'z',), {}), # noqa: E241
('rfind', ('a',), {}), # noqa: E241
('rindex', ('',), {}), # noqa: E241
('rjust', (10,), {}), # noqa: E241
('rpartition', (' ',), {'expand': False}), # noqa: E241
('rpartition', (' ',), {'expand': True}), # noqa: E241
('slice', (0, 1,), {}), # noqa: E241
('slice_replace', (0, 1, 'z',), {}), # noqa: E241
('split', (' ',), {'expand': False}), # noqa: E241
('split', (' ',), {'expand': True}), # noqa: E241
('startswith', ('a',), {}), # noqa: E241
# translating unicode points of "a" to "d"
('translate', ({97: 100},), {}), # noqa: E241
('wrap', (2,), {}), # noqa: E241
('zfill', (10,), {}) # noqa: E241
] + list(zip([
# methods without positional arguments: zip with empty tuple and empty dict
'capitalize', 'cat', 'get_dummies',
'isalnum', 'isalpha', 'isdecimal',
'isdigit', 'islower', 'isnumeric',
'isspace', 'istitle', 'isupper',
'len', 'lower', 'lstrip', 'partition',
'rpartition', 'rsplit', 'rstrip',
'slice', 'slice_replace', 'split',
'strip', 'swapcase', 'title', 'upper'
], [()] * 100, [{}] * 100))
ids, _, _ = zip(*_any_string_method) # use method name as fixture-id


# test that the above list captures all methods of StringMethods
missing_methods = {f for f in dir(strings.StringMethods)
if not f.startswith('_')} - set(ids)
assert not missing_methods


@pytest.fixture(params=_any_string_method, ids=ids)
def any_string_method(request):
"""
Fixture for all public methods of `StringMethods`

This fixture returns a tuple of the method name and sample arguments
necessary to call the method.

Returns
-------
method_name : str
The name of the method in `StringMethods`
args : tuple
Sample values for the positional arguments
kwargs : dict
Sample values for the keyword arguments

Examples
--------
>>> def test_something(any_string_method):
... s = pd.Series(['a', 'b', np.nan, 'd'])
...
... method_name, args, kwargs = any_string_method
... method = getattr(s.str, method_name)
... # will not raise
... method(*args, **kwargs)
"""
return request.param


class TestStringMethods(object):

def test_api(self):
Expand All @@ -40,6 +132,26 @@ def test_api(self):
invalid.str
assert not hasattr(invalid, 'str')

def test_api_for_categorical(self, any_string_method):
# https://github.com/pandas-dev/pandas/issues/10661
s = Series(list('aabb'))
s = s + " " + s
c = s.astype('category')
assert isinstance(c.str, strings.StringMethods)

method_name, args, kwargs = any_string_method

result = getattr(c.str, method_name)(*args, **kwargs)
expected = getattr(s.str, method_name)(*args, **kwargs)

if isinstance(result, DataFrame):
tm.assert_frame_equal(result, expected)
elif isinstance(result, Series):
tm.assert_series_equal(result, expected)
else:
# str.cat(others=None) returns string, for example
assert result == expected

def test_iter(self):
# GH3638
strs = 'google', 'wikimedia', 'wikipedia', 'wikitravel'
Expand Down