Skip to content

Commit

Permalink
Merge pull request #9282 from sinhrks/string_ismethods
Browse files Browse the repository at this point in the history
ENH: StringMethods supports is_xxx methods
  • Loading branch information
jreback committed Jan 29, 2015
2 parents 576818f + bc63677 commit ee3a1f1
Show file tree
Hide file tree
Showing 5 changed files with 106 additions and 54 deletions.
7 changes: 7 additions & 0 deletions doc/source/api.rst
Expand Up @@ -549,6 +549,13 @@ strings and apply several methods to it. These can be acccessed like
Series.str.strip
Series.str.title
Series.str.upper
Series.str.isalnum
Series.str.isalpha
Series.str.isdigit
Series.str.isspace
Series.str.islower
Series.str.isupper
Series.str.istitle
Series.str.get_dummies

.. _api.categorical:
Expand Down
7 changes: 7 additions & 0 deletions doc/source/text.rst
Expand Up @@ -228,3 +228,10 @@ Method Summary
:meth:`~Series.str.lstrip`,Equivalent to ``str.lstrip``
:meth:`~Series.str.lower`,Equivalent to ``str.lower``
:meth:`~Series.str.upper`,Equivalent to ``str.upper``
:meth:`~Series.str.isalnum`,Equivalent to ``str.isalnum``
:meth:`~Series.str.isalpha`,Equivalent to ``str.isalpha``
:meth:`~Series.str.isdigit`,Equivalent to ``str.isdigit``
:meth:`~Series.str.isspace`,Equivalent to ``str.isspace``
:meth:`~Series.str.islower`,Equivalent to ``str.islower``
:meth:`~Series.str.isupper`,Equivalent to ``str.isupper``
:meth:`~Series.str.istitle`,Equivalent to ``str.istitle``
3 changes: 2 additions & 1 deletion doc/source/whatsnew/v0.16.0.txt
Expand Up @@ -104,11 +104,12 @@ Enhancements
- Added ``Series.str.slice_replace()``, which previously raised NotImplementedError (:issue:`8888`)
- Added ``Timestamp.to_datetime64()`` to complement ``Timedelta.to_timedelta64()`` (:issue:`9255`)
- ``tseries.frequencies.to_offset()`` now accepts ``Timedelta`` as input (:issue:`9064`)

- ``Timedelta`` will now accept nanoseconds keyword in constructor (:issue:`9273`)
- SQL code now safely escapes table and column names (:issue:`8986`)

- Added auto-complete for ``Series.str.<tab>``, ``Series.dt.<tab>`` and ``Series.cat.<tab>`` (:issue:`9322`)
- Added ``StringMethods.isalnum()``, ``isalpha()``, ``isdigit()``, ``isspace()``, ``islower()``,
``isupper()``, ``istitle()`` which behave as the same as standard ``str`` (:issue:`9282`)

Performance
~~~~~~~~~~~
Expand Down
108 changes: 55 additions & 53 deletions pandas/core/strings.py
Expand Up @@ -9,6 +9,9 @@
import textwrap


_shared_docs = dict()


def _get_array_list(arr, others):
from pandas.core.series import Series

Expand Down Expand Up @@ -124,17 +127,6 @@ def g(x):
return lib.map_infer(arr, f)


def str_title(arr):
"""
Convert strings to titlecased version
Returns
-------
titled : array
"""
return _na_map(lambda x: x.title(), arr)


def str_count(arr, pat, flags=0):
"""
Count occurrences of pattern in each string
Expand Down Expand Up @@ -197,7 +189,8 @@ def str_contains(arr, pat, case=True, flags=0, na=np.nan, regex=True):
else:
upper_pat = pat.upper()
f = lambda x: upper_pat in x
return _na_map(f, str_upper(arr), na, dtype=bool)
uppered = _na_map(lambda x: x.upper(), arr)
return _na_map(f, uppered, na, dtype=bool)
return _na_map(f, arr, na, dtype=bool)


Expand Down Expand Up @@ -239,28 +232,6 @@ def str_endswith(arr, pat, na=np.nan):
return _na_map(f, arr, na, dtype=bool)


def str_lower(arr):
"""
Convert strings in array to lowercase
Returns
-------
lowercase : array
"""
return _na_map(lambda x: x.lower(), arr)


def str_upper(arr):
"""
Convert strings in array to uppercase
Returns
-------
uppercase : array
"""
return _na_map(lambda x: x.upper(), arr)


def str_replace(arr, pat, repl, n=-1, case=True, flags=0):
"""
Replace
Expand Down Expand Up @@ -553,17 +524,6 @@ def str_join(arr, sep):
return _na_map(sep.join, arr)


def str_len(arr):
"""
Compute length of each string in array.
Returns
-------
lengths : array
"""
return _na_map(len, arr, dtype=int)


def str_findall(arr, pat, flags=0):
"""
Find all occurrences of pattern or regular expression
Expand Down Expand Up @@ -884,14 +844,16 @@ def str_encode(arr, encoding, errors="strict"):
return _na_map(f, arr)


def _noarg_wrapper(f):
def _noarg_wrapper(f, docstring=None, **kargs):
def wrapper(self):
result = f(self.series)
result = _na_map(f, self.series, **kargs)
return self._wrap_result(result)

wrapper.__name__ = f.__name__
if f.__doc__:
wrapper.__doc__ = f.__doc__
if docstring is not None:
wrapper.__doc__ = docstring
else:
raise ValueError('Provide docstring')

return wrapper

Expand Down Expand Up @@ -1076,7 +1038,47 @@ def get_dummies(self, sep='|'):
findall = _pat_wrapper(str_findall, flags=True)
extract = _pat_wrapper(str_extract, flags=True)

len = _noarg_wrapper(str_len)
lower = _noarg_wrapper(str_lower)
upper = _noarg_wrapper(str_upper)
title = _noarg_wrapper(str_title)
_shared_docs['len'] = ("""
Compute length of each string in array.
Returns
-------
lengths : array
""")
len = _noarg_wrapper(len, docstring=_shared_docs['len'], dtype=int)

_shared_docs['casemethods'] = ("""
Convert strings in array to %s
Returns
-------
uppercase : array
""")
lower = _noarg_wrapper(lambda x: x.lower(),
docstring=_shared_docs['casemethods'] % 'lowercase')
upper = _noarg_wrapper(lambda x: x.upper(),
docstring=_shared_docs['casemethods'] % 'uppercase')
title = _noarg_wrapper(lambda x: x.title(),
docstring=_shared_docs['casemethods'] % 'titlecase')

_shared_docs['ismethods'] = ("""
Check whether all characters in each string in the array are %s
Returns
-------
Series of boolean values
""")
isalnum = _noarg_wrapper(lambda x: x.isalnum(),
docstring=_shared_docs['ismethods'] % 'alphanumeric')
isalpha = _noarg_wrapper(lambda x: x.isalpha(),
docstring=_shared_docs['ismethods'] % 'alphabetic')
isdigit = _noarg_wrapper(lambda x: x.isdigit(),
docstring=_shared_docs['ismethods'] % 'digits')
isspace = _noarg_wrapper(lambda x: x.isspace(),
docstring=_shared_docs['ismethods'] % 'whitespace')
islower = _noarg_wrapper(lambda x: x.islower(),
docstring=_shared_docs['ismethods'] % 'lowercase')
isupper = _noarg_wrapper(lambda x: x.isupper(),
docstring=_shared_docs['ismethods'] % 'uppercase')
istitle = _noarg_wrapper(lambda x: x.istitle(),
docstring=_shared_docs['ismethods'] % 'titlecase')
35 changes: 35 additions & 0 deletions pandas/tests/test_strings.py
Expand Up @@ -623,6 +623,41 @@ def test_empty_str_methods(self):
tm.assert_series_equal(empty_str, empty.str.get(0))
tm.assert_series_equal(empty_str, empty_bytes.str.decode('ascii'))
tm.assert_series_equal(empty_bytes, empty.str.encode('ascii'))
tm.assert_series_equal(empty_str, empty.str.isalnum())
tm.assert_series_equal(empty_str, empty.str.isalpha())
tm.assert_series_equal(empty_str, empty.str.isdigit())
tm.assert_series_equal(empty_str, empty.str.isspace())
tm.assert_series_equal(empty_str, empty.str.islower())
tm.assert_series_equal(empty_str, empty.str.isupper())
tm.assert_series_equal(empty_str, empty.str.istitle())

def test_ismethods(self):
values = ['A', 'b', 'Xy', '4', '3A', '', 'TT', '55', '-', ' ']
str_s = Series(values)
alnum_e = [True, True, True, True, True, False, True, True, False, False]
alpha_e = [True, True, True, False, False, False, True, False, False, False]
digit_e = [False, False, False, True, False, False, False, True, False, False]
num_e = [False, False, False, True, False, False, False, True, False, False]
space_e = [False, False, False, False, False, False, False, False, False, True]
lower_e = [False, True, False, False, False, False, False, False, False, False]
upper_e = [True, False, False, False, True, False, True, False, False, False]
title_e = [True, False, True, False, True, False, False, False, False, False]

tm.assert_series_equal(str_s.str.isalnum(), Series(alnum_e))
tm.assert_series_equal(str_s.str.isalpha(), Series(alpha_e))
tm.assert_series_equal(str_s.str.isdigit(), Series(digit_e))
tm.assert_series_equal(str_s.str.isspace(), Series(space_e))
tm.assert_series_equal(str_s.str.islower(), Series(lower_e))
tm.assert_series_equal(str_s.str.isupper(), Series(upper_e))
tm.assert_series_equal(str_s.str.istitle(), Series(title_e))

self.assertEquals(str_s.str.isalnum().tolist(), [v.isalnum() for v in values])
self.assertEquals(str_s.str.isalpha().tolist(), [v.isalpha() for v in values])
self.assertEquals(str_s.str.isdigit().tolist(), [v.isdigit() for v in values])
self.assertEquals(str_s.str.isspace().tolist(), [v.isspace() for v in values])
self.assertEquals(str_s.str.islower().tolist(), [v.islower() for v in values])
self.assertEquals(str_s.str.isupper().tolist(), [v.isupper() for v in values])
self.assertEquals(str_s.str.istitle().tolist(), [v.istitle() for v in values])

def test_get_dummies(self):
s = Series(['a|b', 'a|c', np.nan])
Expand Down

0 comments on commit ee3a1f1

Please sign in to comment.