From 9b8fed65b097aff53e112aee6716321f1df50393 Mon Sep 17 00:00:00 2001 From: Kaiqi Dong Date: Sat, 23 Feb 2019 20:42:40 +0100 Subject: [PATCH 01/16] collect updated master --- pandas/core/strings.py | 5 +++++ pandas/tests/test_strings.py | 10 +++++++++- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/pandas/core/strings.py b/pandas/core/strings.py index cc7a4db515c42..5565cef538133 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -2943,6 +2943,7 @@ def rindex(self, sub, start=0, end=None): remaining to lowercase. Series.str.swapcase : Converts uppercase to lowercase and lowercase to uppercase. + Series.str.casefold: Removes all case distinctions in the string. Examples -------- @@ -2995,6 +2996,7 @@ def rindex(self, sub, start=0, end=None): _shared_docs['capitalize'] = dict(type='be capitalized', method='capitalize') _shared_docs['swapcase'] = dict(type='be swapcased', method='swapcase') + _shared_docs['casefold'] = dict(type='be casefolded', method='casefold') lower = _noarg_wrapper(lambda x: x.lower(), docstring=_shared_docs['casemethods'] % _shared_docs['lower']) @@ -3010,6 +3012,9 @@ def rindex(self, sub, start=0, end=None): swapcase = _noarg_wrapper(lambda x: x.swapcase(), docstring=_shared_docs['casemethods'] % _shared_docs['swapcase']) + casefold = _noarg_wrapper(lambda x: x.casefold(), + docstring=_shared_docs['casemethods'] % + _shared_docs['casefold']) _shared_docs['ismethods'] = (""" Check whether all characters in each string are %(type)s. diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index 7cea3be03d1a7..117ed653f2c39 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -76,7 +76,7 @@ def assert_series_or_index_equal(left, right): 'len', 'lower', 'lstrip', 'partition', 'rpartition', 'rsplit', 'rstrip', 'slice', 'slice_replace', 'split', - 'strip', 'swapcase', 'title', 'upper' + 'strip', 'swapcase', 'title', 'upper', 'casefold' ], [()] * 100, [{}] * 100)) ids, _, _ = zip(*_any_string_method) # use method name as fixture-id @@ -3424,3 +3424,11 @@ def test_method_on_bytes(self): expected = Series(np.array( ['ad', 'be', 'cf'], 'S2').astype(object)) tm.assert_series_equal(result, expected) + + def test_casefold(self): + values = Series(['ss', NA, 'case', 'ssd']) + s = Series(['ß', NA, 'case', 'ßd']) + exp = s.str.casefold() + + assert isinstance(exp, Series) + assert_series_equal(exp, values) From c0d067d6392474ff8be3ed4add3f6e84b359bfc8 Mon Sep 17 00:00:00 2001 From: Kaiqi Dong Date: Sat, 23 Feb 2019 20:46:07 +0100 Subject: [PATCH 02/16] add whatsnew --- doc/source/whatsnew/v0.25.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index c0e00c7bf6f54..8f98566ceae56 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -22,7 +22,7 @@ Other Enhancements - Indexing of ``DataFrame`` and ``Series`` now accepts zerodim ``np.ndarray`` (:issue:`24919`) - :meth:`Timestamp.replace` now supports the ``fold`` argument to disambiguate DST transition times (:issue:`25017`) - :meth:`DataFrame.at_time` and :meth:`Series.at_time` now support :meth:`datetime.time` objects with timezones (:issue:`24043`) -- +- Add ``casefold`` to ``Series.str`` (:issue:`25405`) .. _whatsnew_0250.api_breaking: From bfb3fa846b338c466717e4321834c1287407bb2f Mon Sep 17 00:00:00 2001 From: Kaiqi Dong Date: Sat, 23 Feb 2019 20:48:50 +0100 Subject: [PATCH 03/16] small change --- pandas/tests/test_strings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index 117ed653f2c39..803be189350a2 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -3431,4 +3431,4 @@ def test_casefold(self): exp = s.str.casefold() assert isinstance(exp, Series) - assert_series_equal(exp, values) + tm.assert_series_equal(exp, values) From 6608c25450a0e24f9823cf47bacc124aaf6908d4 Mon Sep 17 00:00:00 2001 From: Kaiqi Dong Date: Sat, 23 Feb 2019 21:04:28 +0100 Subject: [PATCH 04/16] remove unnecessary test --- pandas/tests/test_strings.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index 803be189350a2..8213d23492001 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -3426,9 +3426,8 @@ def test_method_on_bytes(self): tm.assert_series_equal(result, expected) def test_casefold(self): - values = Series(['ss', NA, 'case', 'ssd']) + casefolded = Series(['ss', NA, 'case', 'ssd']) s = Series(['ß', NA, 'case', 'ßd']) exp = s.str.casefold() - assert isinstance(exp, Series) - tm.assert_series_equal(exp, values) + tm.assert_series_equal(exp, casefolded) From 0d9ebec9e1cdb184244ee4219d101fbc8e57c58d Mon Sep 17 00:00:00 2001 From: Kaiqi Dong Date: Sat, 23 Feb 2019 21:05:44 +0100 Subject: [PATCH 05/16] rename --- pandas/tests/test_strings.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index 8213d23492001..87b3b553ec8be 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -3428,6 +3428,6 @@ def test_method_on_bytes(self): def test_casefold(self): casefolded = Series(['ss', NA, 'case', 'ssd']) s = Series(['ß', NA, 'case', 'ßd']) - exp = s.str.casefold() + result = s.str.casefold() - tm.assert_series_equal(exp, casefolded) + tm.assert_series_equal(result, casefolded) From 13b2442736880d61e7410182086f026361ef5a70 Mon Sep 17 00:00:00 2001 From: Kaiqi Dong Date: Sat, 23 Feb 2019 21:50:37 +0100 Subject: [PATCH 06/16] add series.str.casefold in reference --- doc/source/reference/series.rst | 1 + pandas/tests/test_strings.py | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/doc/source/reference/series.rst b/doc/source/reference/series.rst index a6ac40b5203bf..b406893e3414a 100644 --- a/doc/source/reference/series.rst +++ b/doc/source/reference/series.rst @@ -409,6 +409,7 @@ strings and apply several methods to it. These can be accessed like :template: autosummary/accessor_method.rst Series.str.capitalize + Series.str.casefold Series.str.cat Series.str.center Series.str.contains diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index 87b3b553ec8be..53ec1cf5be369 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -3426,8 +3426,8 @@ def test_method_on_bytes(self): tm.assert_series_equal(result, expected) def test_casefold(self): - casefolded = Series(['ss', NA, 'case', 'ssd']) - s = Series(['ß', NA, 'case', 'ßd']) + casefolded = Series(['ss', 'case', 'ssd']) + s = Series(['ß', 'case', 'ßd']) result = s.str.casefold() tm.assert_series_equal(result, casefolded) From 3448d76648cdd8fb77aa595e7f1fa3e17267ca03 Mon Sep 17 00:00:00 2001 From: Kaiqi Dong Date: Sat, 23 Feb 2019 22:03:19 +0100 Subject: [PATCH 07/16] add reference in text rst --- doc/source/user_guide/text.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/user_guide/text.rst b/doc/source/user_guide/text.rst index e4f60a761750d..6b65d9e299628 100644 --- a/doc/source/user_guide/text.rst +++ b/doc/source/user_guide/text.rst @@ -618,3 +618,4 @@ Method Summary :meth:`~Series.str.istitle`;Equivalent to ``str.istitle`` :meth:`~Series.str.isnumeric`;Equivalent to ``str.isnumeric`` :meth:`~Series.str.isdecimal`;Equivalent to ``str.isdecimal`` + :meth:`~Series.str.casefold`;Equivalent to ``str.casefold`` From d1470754d9e5fce7f60941fb1b79ce1bafcc8c60 Mon Sep 17 00:00:00 2001 From: Kaiqi Dong Date: Sat, 23 Feb 2019 22:16:20 +0100 Subject: [PATCH 08/16] add skipif to avoid failure --- pandas/tests/test_strings.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index 53ec1cf5be369..db28226ef466c 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -3425,9 +3425,10 @@ def test_method_on_bytes(self): ['ad', 'be', 'cf'], 'S2').astype(object)) tm.assert_series_equal(result, expected) + @pytest.mark.skipif(compat.PY2, reason='not in python2') def test_casefold(self): - casefolded = Series(['ss', 'case', 'ssd']) - s = Series(['ß', 'case', 'ßd']) + casefolded = Series(['ss', NA, 'case', 'ssd']) + s = Series(['ß', NA, 'case', 'ßd']) result = s.str.casefold() tm.assert_series_equal(result, casefolded) From 983332ea448d859113be622cb97f8d486e50611e Mon Sep 17 00:00:00 2001 From: Kaiqi Dong Date: Sat, 23 Feb 2019 22:16:59 +0100 Subject: [PATCH 09/16] add issue number --- pandas/tests/test_strings.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index db28226ef466c..827dc59cbf582 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -3427,6 +3427,7 @@ def test_method_on_bytes(self): @pytest.mark.skipif(compat.PY2, reason='not in python2') def test_casefold(self): + #GH25405 casefolded = Series(['ss', NA, 'case', 'ssd']) s = Series(['ß', NA, 'case', 'ßd']) result = s.str.casefold() From f9e52cc42de43c54f671db78a6ecc05b51fc618e Mon Sep 17 00:00:00 2001 From: Kaiqi Dong Date: Sat, 23 Feb 2019 22:29:45 +0100 Subject: [PATCH 10/16] fix pep8 --- pandas/tests/test_strings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index 827dc59cbf582..56be5f0c6073c 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -3427,7 +3427,7 @@ def test_method_on_bytes(self): @pytest.mark.skipif(compat.PY2, reason='not in python2') def test_casefold(self): - #GH25405 + # GH25405 casefolded = Series(['ss', NA, 'case', 'ssd']) s = Series(['ß', NA, 'case', 'ßd']) result = s.str.casefold() From a1a8891782cd4caef8e4c9dc4af668a5365fa908 Mon Sep 17 00:00:00 2001 From: Kaiqi Dong Date: Sun, 24 Feb 2019 08:47:58 +0100 Subject: [PATCH 11/16] changes based on review --- doc/source/whatsnew/v0.25.0.rst | 2 +- pandas/core/strings.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 8f98566ceae56..acb1a1ffa0562 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -22,7 +22,7 @@ Other Enhancements - Indexing of ``DataFrame`` and ``Series`` now accepts zerodim ``np.ndarray`` (:issue:`24919`) - :meth:`Timestamp.replace` now supports the ``fold`` argument to disambiguate DST transition times (:issue:`25017`) - :meth:`DataFrame.at_time` and :meth:`Series.at_time` now support :meth:`datetime.time` objects with timezones (:issue:`24043`) -- Add ``casefold`` to ``Series.str`` (:issue:`25405`) +- ``Series.str`` has gained :meth:`Series.str.casefold` method to removes all case distinctions present in a string (:issue:`25405`) .. _whatsnew_0250.api_breaking: diff --git a/pandas/core/strings.py b/pandas/core/strings.py index 5565cef538133..c4c7f18402dad 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -2944,6 +2944,7 @@ def rindex(self, sub, start=0, end=None): Series.str.swapcase : Converts uppercase to lowercase and lowercase to uppercase. Series.str.casefold: Removes all case distinctions in the string. + .. versionadded:: 0.25.0 Examples -------- From 893d4266c9783c4ead1c9ef2bc9f0a09815ad310 Mon Sep 17 00:00:00 2001 From: Kaiqi Dong Date: Sun, 24 Feb 2019 09:02:07 +0100 Subject: [PATCH 12/16] move position up --- doc/source/user_guide/text.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/user_guide/text.rst b/doc/source/user_guide/text.rst index 6b65d9e299628..9304c0b1829af 100644 --- a/doc/source/user_guide/text.rst +++ b/doc/source/user_guide/text.rst @@ -606,6 +606,7 @@ Method Summary :meth:`~Series.str.index`;Equivalent to ``str.index`` :meth:`~Series.str.rindex`;Equivalent to ``str.rindex`` :meth:`~Series.str.capitalize`;Equivalent to ``str.capitalize`` + :meth:`~Series.str.casefold`;Equivalent to ``str.casefold`` :meth:`~Series.str.swapcase`;Equivalent to ``str.swapcase`` :meth:`~Series.str.normalize`;Return Unicode normal form. Equivalent to ``unicodedata.normalize`` :meth:`~Series.str.translate`;Equivalent to ``str.translate`` @@ -618,4 +619,3 @@ Method Summary :meth:`~Series.str.istitle`;Equivalent to ``str.istitle`` :meth:`~Series.str.isnumeric`;Equivalent to ``str.isnumeric`` :meth:`~Series.str.isdecimal`;Equivalent to ``str.isdecimal`` - :meth:`~Series.str.casefold`;Equivalent to ``str.casefold`` From 522c0213e711b6e295c3152ef91c45fe3f9addb1 Mon Sep 17 00:00:00 2001 From: Kaiqi Dong Date: Sun, 24 Feb 2019 11:29:13 +0100 Subject: [PATCH 13/16] minor --- doc/source/user_guide/text.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/user_guide/text.rst b/doc/source/user_guide/text.rst index 9304c0b1829af..6f21a7d9beb36 100644 --- a/doc/source/user_guide/text.rst +++ b/doc/source/user_guide/text.rst @@ -600,13 +600,13 @@ Method Summary :meth:`~Series.str.partition`;Equivalent to ``str.partition`` :meth:`~Series.str.rpartition`;Equivalent to ``str.rpartition`` :meth:`~Series.str.lower`;Equivalent to ``str.lower`` + :meth:`~Series.str.casefold`;Equivalent to ``str.casefold`` :meth:`~Series.str.upper`;Equivalent to ``str.upper`` :meth:`~Series.str.find`;Equivalent to ``str.find`` :meth:`~Series.str.rfind`;Equivalent to ``str.rfind`` :meth:`~Series.str.index`;Equivalent to ``str.index`` :meth:`~Series.str.rindex`;Equivalent to ``str.rindex`` :meth:`~Series.str.capitalize`;Equivalent to ``str.capitalize`` - :meth:`~Series.str.casefold`;Equivalent to ``str.casefold`` :meth:`~Series.str.swapcase`;Equivalent to ``str.swapcase`` :meth:`~Series.str.normalize`;Return Unicode normal form. Equivalent to ``unicodedata.normalize`` :meth:`~Series.str.translate`;Equivalent to ``str.translate`` From bf3593557da1f2e71eca198a5af587fd4b0eee2d Mon Sep 17 00:00:00 2001 From: Kaiqi Dong Date: Wed, 27 Feb 2019 20:20:17 +0100 Subject: [PATCH 14/16] minor change on naming convention --- pandas/tests/test_strings.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index 56be5f0c6073c..1ecfedc8685da 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -3428,8 +3428,8 @@ def test_method_on_bytes(self): @pytest.mark.skipif(compat.PY2, reason='not in python2') def test_casefold(self): # GH25405 - casefolded = Series(['ss', NA, 'case', 'ssd']) + expected = Series(['ss', NA, 'case', 'ssd']) s = Series(['ß', NA, 'case', 'ßd']) result = s.str.casefold() - tm.assert_series_equal(result, casefolded) + tm.assert_series_equal(result, expected) From bf4946790acb61cb821a66f50585dac07a24dded Mon Sep 17 00:00:00 2001 From: Kaiqi Dong Date: Thu, 28 Feb 2019 15:03:15 +0100 Subject: [PATCH 15/16] new args for version --- pandas/core/strings.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/pandas/core/strings.py b/pandas/core/strings.py index c4c7f18402dad..35a92b9710087 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -2926,7 +2926,7 @@ def rindex(self, sub, start=0, end=None): _shared_docs['casemethods'] = (""" Convert strings in the Series/Index to %(type)s. - + %(version)s Equivalent to :meth:`str.%(method)s`. Returns @@ -2944,7 +2944,6 @@ def rindex(self, sub, start=0, end=None): Series.str.swapcase : Converts uppercase to lowercase and lowercase to uppercase. Series.str.casefold: Removes all case distinctions in the string. - .. versionadded:: 0.25.0 Examples -------- @@ -2991,13 +2990,15 @@ def rindex(self, sub, start=0, end=None): 3 sWaPcAsE dtype: object """) - _shared_docs['lower'] = dict(type='lowercase', method='lower') - _shared_docs['upper'] = dict(type='uppercase', method='upper') - _shared_docs['title'] = dict(type='titlecase', method='title') + _shared_docs['lower'] = dict(type='lowercase', method='lower', version='') + _shared_docs['upper'] = dict(type='uppercase', method='upper', version='') + _shared_docs['title'] = dict(type='titlecase', method='title', version='') _shared_docs['capitalize'] = dict(type='be capitalized', - method='capitalize') - _shared_docs['swapcase'] = dict(type='be swapcased', method='swapcase') - _shared_docs['casefold'] = dict(type='be casefolded', method='casefold') + method='capitalize', version='') + _shared_docs['swapcase'] = dict(type='be swapcased', method='swapcase', + version='') + _shared_docs['casefold'] = dict(type='be casefolded', method='casefold', + version='.. versionadded:: 0.25.0') lower = _noarg_wrapper(lambda x: x.lower(), docstring=_shared_docs['casemethods'] % _shared_docs['lower']) From 22717a15c04e23a2d5fd45d566545837bf3c83d2 Mon Sep 17 00:00:00 2001 From: Kaiqi Dong Date: Thu, 28 Feb 2019 20:59:26 +0100 Subject: [PATCH 16/16] add \n --- pandas/core/strings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/strings.py b/pandas/core/strings.py index 35a92b9710087..9577b07360f65 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -2998,7 +2998,7 @@ def rindex(self, sub, start=0, end=None): _shared_docs['swapcase'] = dict(type='be swapcased', method='swapcase', version='') _shared_docs['casefold'] = dict(type='be casefolded', method='casefold', - version='.. versionadded:: 0.25.0') + version='\n .. versionadded:: 0.25.0\n') lower = _noarg_wrapper(lambda x: x.lower(), docstring=_shared_docs['casemethods'] % _shared_docs['lower'])