diff --git a/doc/source/api.rst b/doc/source/api.rst index a8097f2648c4b..4fc9df52a4300 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -549,6 +549,13 @@ strings and apply several methods to it. These can be acccessed like Series.str.strip Series.str.title Series.str.upper + Series.str.isalnum + Series.str.isalpha + Series.str.isdigit + Series.str.isspace + Series.str.islower + Series.str.isupper + Series.str.istitle Series.str.get_dummies .. _api.categorical: diff --git a/doc/source/text.rst b/doc/source/text.rst index eb11cfb1248a9..4cd96613f6d51 100644 --- a/doc/source/text.rst +++ b/doc/source/text.rst @@ -228,3 +228,10 @@ Method Summary :meth:`~Series.str.lstrip`,Equivalent to ``str.lstrip`` :meth:`~Series.str.lower`,Equivalent to ``str.lower`` :meth:`~Series.str.upper`,Equivalent to ``str.upper`` + :meth:`~Series.str.isalnum`,Equivalent to ``str.isalnum`` + :meth:`~Series.str.isalpha`,Equivalent to ``str.isalpha`` + :meth:`~Series.str.isdigit`,Equivalent to ``str.isdigit`` + :meth:`~Series.str.isspace`,Equivalent to ``str.isspace`` + :meth:`~Series.str.islower`,Equivalent to ``str.islower`` + :meth:`~Series.str.isupper`,Equivalent to ``str.isupper`` + :meth:`~Series.str.istitle`,Equivalent to ``str.istitle`` diff --git a/doc/source/whatsnew/v0.16.0.txt b/doc/source/whatsnew/v0.16.0.txt index 6082a58687c2c..feee9e4fba4b0 100644 --- a/doc/source/whatsnew/v0.16.0.txt +++ b/doc/source/whatsnew/v0.16.0.txt @@ -104,11 +104,12 @@ Enhancements - Added ``Series.str.slice_replace()``, which previously raised NotImplementedError (:issue:`8888`) - Added ``Timestamp.to_datetime64()`` to complement ``Timedelta.to_timedelta64()`` (:issue:`9255`) - ``tseries.frequencies.to_offset()`` now accepts ``Timedelta`` as input (:issue:`9064`) - - ``Timedelta`` will now accept nanoseconds keyword in constructor (:issue:`9273`) - SQL code now safely escapes table and column names (:issue:`8986`) - Added auto-complete for ``Series.str.``, ``Series.dt.`` and ``Series.cat.`` (:issue:`9322`) +- Added ``StringMethods.isalnum()``, ``isalpha()``, ``isdigit()``, ``isspace()``, ``islower()``, +``isupper()``, ``istitle()`` which behave as the same as standard ``str`` (:issue:`9282`) Performance ~~~~~~~~~~~ diff --git a/pandas/core/strings.py b/pandas/core/strings.py index 75d10654977cd..1556d3290cd01 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -9,6 +9,9 @@ import textwrap +_shared_docs = dict() + + def _get_array_list(arr, others): from pandas.core.series import Series @@ -124,17 +127,6 @@ def g(x): return lib.map_infer(arr, f) -def str_title(arr): - """ - Convert strings to titlecased version - - Returns - ------- - titled : array - """ - return _na_map(lambda x: x.title(), arr) - - def str_count(arr, pat, flags=0): """ Count occurrences of pattern in each string @@ -197,7 +189,8 @@ def str_contains(arr, pat, case=True, flags=0, na=np.nan, regex=True): else: upper_pat = pat.upper() f = lambda x: upper_pat in x - return _na_map(f, str_upper(arr), na, dtype=bool) + uppered = _na_map(lambda x: x.upper(), arr) + return _na_map(f, uppered, na, dtype=bool) return _na_map(f, arr, na, dtype=bool) @@ -239,28 +232,6 @@ def str_endswith(arr, pat, na=np.nan): return _na_map(f, arr, na, dtype=bool) -def str_lower(arr): - """ - Convert strings in array to lowercase - - Returns - ------- - lowercase : array - """ - return _na_map(lambda x: x.lower(), arr) - - -def str_upper(arr): - """ - Convert strings in array to uppercase - - Returns - ------- - uppercase : array - """ - return _na_map(lambda x: x.upper(), arr) - - def str_replace(arr, pat, repl, n=-1, case=True, flags=0): """ Replace @@ -553,17 +524,6 @@ def str_join(arr, sep): return _na_map(sep.join, arr) -def str_len(arr): - """ - Compute length of each string in array. - - Returns - ------- - lengths : array - """ - return _na_map(len, arr, dtype=int) - - def str_findall(arr, pat, flags=0): """ Find all occurrences of pattern or regular expression @@ -884,14 +844,16 @@ def str_encode(arr, encoding, errors="strict"): return _na_map(f, arr) -def _noarg_wrapper(f): +def _noarg_wrapper(f, docstring=None, **kargs): def wrapper(self): - result = f(self.series) + result = _na_map(f, self.series, **kargs) return self._wrap_result(result) wrapper.__name__ = f.__name__ - if f.__doc__: - wrapper.__doc__ = f.__doc__ + if docstring is not None: + wrapper.__doc__ = docstring + else: + raise ValueError('Provide docstring') return wrapper @@ -1076,7 +1038,47 @@ def get_dummies(self, sep='|'): findall = _pat_wrapper(str_findall, flags=True) extract = _pat_wrapper(str_extract, flags=True) - len = _noarg_wrapper(str_len) - lower = _noarg_wrapper(str_lower) - upper = _noarg_wrapper(str_upper) - title = _noarg_wrapper(str_title) + _shared_docs['len'] = (""" + Compute length of each string in array. + + Returns + ------- + lengths : array + """) + len = _noarg_wrapper(len, docstring=_shared_docs['len'], dtype=int) + + _shared_docs['casemethods'] = (""" + Convert strings in array to %s + + Returns + ------- + uppercase : array + """) + lower = _noarg_wrapper(lambda x: x.lower(), + docstring=_shared_docs['casemethods'] % 'lowercase') + upper = _noarg_wrapper(lambda x: x.upper(), + docstring=_shared_docs['casemethods'] % 'uppercase') + title = _noarg_wrapper(lambda x: x.title(), + docstring=_shared_docs['casemethods'] % 'titlecase') + + _shared_docs['ismethods'] = (""" + Check whether all characters in each string in the array are %s + + Returns + ------- + Series of boolean values + """) + isalnum = _noarg_wrapper(lambda x: x.isalnum(), + docstring=_shared_docs['ismethods'] % 'alphanumeric') + isalpha = _noarg_wrapper(lambda x: x.isalpha(), + docstring=_shared_docs['ismethods'] % 'alphabetic') + isdigit = _noarg_wrapper(lambda x: x.isdigit(), + docstring=_shared_docs['ismethods'] % 'digits') + isspace = _noarg_wrapper(lambda x: x.isspace(), + docstring=_shared_docs['ismethods'] % 'whitespace') + islower = _noarg_wrapper(lambda x: x.islower(), + docstring=_shared_docs['ismethods'] % 'lowercase') + isupper = _noarg_wrapper(lambda x: x.isupper(), + docstring=_shared_docs['ismethods'] % 'uppercase') + istitle = _noarg_wrapper(lambda x: x.istitle(), + docstring=_shared_docs['ismethods'] % 'titlecase') diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index b8f1a6ac342af..2d7463249bd65 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -623,6 +623,41 @@ def test_empty_str_methods(self): tm.assert_series_equal(empty_str, empty.str.get(0)) tm.assert_series_equal(empty_str, empty_bytes.str.decode('ascii')) tm.assert_series_equal(empty_bytes, empty.str.encode('ascii')) + tm.assert_series_equal(empty_str, empty.str.isalnum()) + tm.assert_series_equal(empty_str, empty.str.isalpha()) + tm.assert_series_equal(empty_str, empty.str.isdigit()) + tm.assert_series_equal(empty_str, empty.str.isspace()) + tm.assert_series_equal(empty_str, empty.str.islower()) + tm.assert_series_equal(empty_str, empty.str.isupper()) + tm.assert_series_equal(empty_str, empty.str.istitle()) + + def test_ismethods(self): + values = ['A', 'b', 'Xy', '4', '3A', '', 'TT', '55', '-', ' '] + str_s = Series(values) + alnum_e = [True, True, True, True, True, False, True, True, False, False] + alpha_e = [True, True, True, False, False, False, True, False, False, False] + digit_e = [False, False, False, True, False, False, False, True, False, False] + num_e = [False, False, False, True, False, False, False, True, False, False] + space_e = [False, False, False, False, False, False, False, False, False, True] + lower_e = [False, True, False, False, False, False, False, False, False, False] + upper_e = [True, False, False, False, True, False, True, False, False, False] + title_e = [True, False, True, False, True, False, False, False, False, False] + + tm.assert_series_equal(str_s.str.isalnum(), Series(alnum_e)) + tm.assert_series_equal(str_s.str.isalpha(), Series(alpha_e)) + tm.assert_series_equal(str_s.str.isdigit(), Series(digit_e)) + tm.assert_series_equal(str_s.str.isspace(), Series(space_e)) + tm.assert_series_equal(str_s.str.islower(), Series(lower_e)) + tm.assert_series_equal(str_s.str.isupper(), Series(upper_e)) + tm.assert_series_equal(str_s.str.istitle(), Series(title_e)) + + self.assertEquals(str_s.str.isalnum().tolist(), [v.isalnum() for v in values]) + self.assertEquals(str_s.str.isalpha().tolist(), [v.isalpha() for v in values]) + self.assertEquals(str_s.str.isdigit().tolist(), [v.isdigit() for v in values]) + self.assertEquals(str_s.str.isspace().tolist(), [v.isspace() for v in values]) + self.assertEquals(str_s.str.islower().tolist(), [v.islower() for v in values]) + self.assertEquals(str_s.str.isupper().tolist(), [v.isupper() for v in values]) + self.assertEquals(str_s.str.istitle().tolist(), [v.istitle() for v in values]) def test_get_dummies(self): s = Series(['a|b', 'a|c', np.nan])