From 82c52a4bdbfd0e225b26b682b0b018da31f3bfc3 Mon Sep 17 00:00:00 2001 From: Kaiqi Dong Date: Sat, 2 Feb 2019 16:14:38 +0000 Subject: [PATCH 01/11] one in all --- doc/source/whatsnew/v0.24.1.rst | 2 +- pandas/core/internals/managers.py | 31 +++++++++++------ pandas/core/reshape/merge.py | 10 ++++-- pandas/tests/reshape/merge/test_merge.py | 44 ++++++++++++++++++++++++ 4 files changed, 73 insertions(+), 14 deletions(-) diff --git a/doc/source/whatsnew/v0.24.1.rst b/doc/source/whatsnew/v0.24.1.rst index bc45a14666ba2..7c42db4547833 100644 --- a/doc/source/whatsnew/v0.24.1.rst +++ b/doc/source/whatsnew/v0.24.1.rst @@ -72,7 +72,7 @@ Fixed Regressions **Other** -- Fixed AttributeError when printing a DataFrame's HTML repr after accessing the IPython config object (:issue:`25036`) +- Bug in :func:`pandas.merge` doesn't work correctly if None is in suffixes (:issue: `24782`). .. _whatsnew_0.241.contributors: diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 050c3d3e87fc6..50c26c60eb2ff 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1967,20 +1967,31 @@ def items_overlap_with_suffix(left, lsuffix, right, rsuffix): if len(to_rename) == 0: return left, right else: - if not lsuffix and not rsuffix: - raise ValueError('columns overlap but no suffix specified: ' - '{rename}'.format(rename=to_rename)) + # if column name is string, raise error if suffix is a combination of + # empty string and None, or two Nones + if isinstance(to_rename[0], str): + if not lsuffix and not rsuffix: + raise ValueError('columns overlap but no suffix specified: ' + '{rename}'.format(rename=to_rename)) + else: + # if not, only suffix with (None, None) will raise error + if lsuffix is None and rsuffix is None: + raise ValueError('columns overlap but no suffix specified: ' + '{rename}'.format(rename=to_rename)) - def lrenamer(x): - if x in to_rename: - return '{x}{lsuffix}'.format(x=x, lsuffix=lsuffix) - return x + def renamer(x, suffix): + """Rename the left and right indices. - def rrenamer(x): - if x in to_rename: - return '{x}{rsuffix}'.format(x=x, rsuffix=rsuffix) + If there is overlap, and suffix is not None, add + suffix, otherwise, leave it as-is. + """ + if x in to_rename and suffix is not None: + return '{x}{suffix}'.format(x=x, suffix=suffix) return x + lrenamer = partial(renamer, suffix=lsuffix) + rrenamer = partial(renamer, suffix=rsuffix) + return (_transform_index(left, lrenamer), _transform_index(right, rrenamer)) diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 1dd19a7c1514e..2b18321b666ad 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -159,9 +159,13 @@ def merge_ordered(left, right, on=None, left DataFrame fill_method : {'ffill', None}, default None Interpolation method for data - suffixes : 2-length sequence (tuple, list, ...) - Suffix to apply to overlapping column names in the left and right - side, respectively + suffixes : Sequence, default is ("_x", "_y") + A length-2 sequence where each element is optionally a string + indicating the suffix to add to overlapping column names in + `left` and `right` respectively. Pass a value of `None` instead + of a string to indicate that the column name from `left` or + `right` should be left as-is, with no suffix. At least one of the + values must not be None. how : {'left', 'right', 'outer', 'inner'}, default 'outer' * left: use only keys from left frame (SQL: left outer join) * right: use only keys from right frame (SQL: right outer join) diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index a0a20d1da6cef..2fd0917b758e4 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -1526,3 +1526,47 @@ def test_merge_series(on, left_on, right_on, left_index, right_index, nm): with pytest.raises(ValueError, match=msg): result = pd.merge(a, b, on=on, left_on=left_on, right_on=right_on, left_index=left_index, right_index=right_index) + + +@pytest.mark.parametrize("col1, col2, kwargs, expected_cols", [ + (0, 0, dict(suffixes=("", "_dup")), ["0", "0_dup"]), + (0, 0, dict(suffixes=(None, "_dup")), [0, "0_dup"]), + (0, 0, dict(suffixes=("_x", "_y")), ["0_x", "0_y"]), + ("a", 0, dict(suffixes=(None, "_y")), ["a", 0]), + (0.0, 0.0, dict(suffixes=("_x", None)), ["0.0_x", 0.0]), + ("b", "b", dict(suffixes=(None, "_y")), ["b", "b_y"]), + ("a", "a", dict(suffixes=("_x", None)), ["a_x", "a"]), + ("a", "b", dict(suffixes=("_x", None)), ["a", "b"]), + ("a", "a", dict(suffixes=[None, "_x"]), ["a", "a_x"]), + (0, 0, dict(suffixes=(["_a", None])), ["0_a", 0]), + (0, 0, dict(suffixes=('', None)), ["0", 0]), + ("a", "a", dict(), ["a_x", "a_y"]), + (0, 0, dict(), ["0_x", "0_y"]) +]) +def test_merge_suffix(col1, col2, kwargs, expected_cols): + # issue: 24782 + a = pd.DataFrame({col1: [1, 2, 3]}) + b = pd.DataFrame({col2: [4, 5, 6]}) + + expected = pd.DataFrame([[1, 4], [2, 5], [3, 6]], + columns=expected_cols) + + result = a.merge(b, left_index=True, right_index=True, **kwargs) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("col, suffixes", [ + ('a', (None, None)), + ('a', [None, None]), + ('a', ('', None)), + ('a', [None, '']), + (0, (None, None)) +]) +def test_merge_suffix_errors(col, suffixes): + # issue: 24782 + a = pd.DataFrame({col: [1, 2, 3]}) + b = pd.DataFrame({col: [4, 5, 6]}) + + with pytest.raises(ValueError, + match="columns overlap but no suffix specified"): + a.merge(b, left_index=True, right_index=True, suffixes=suffixes) From dd605e0a7187e9d1832f4c60681fa6b82d4b392d Mon Sep 17 00:00:00 2001 From: Kaiqi Dong Date: Sat, 2 Feb 2019 16:16:40 +0000 Subject: [PATCH 02/11] change whatsnew to 0.25 --- doc/source/whatsnew/v0.24.1.rst | 1 - doc/source/whatsnew/v0.25.0.rst | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.24.1.rst b/doc/source/whatsnew/v0.24.1.rst index 7c42db4547833..6640c1ba4bf92 100644 --- a/doc/source/whatsnew/v0.24.1.rst +++ b/doc/source/whatsnew/v0.24.1.rst @@ -72,7 +72,6 @@ Fixed Regressions **Other** -- Bug in :func:`pandas.merge` doesn't work correctly if None is in suffixes (:issue: `24782`). .. _whatsnew_0.241.contributors: diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 09626be713c4f..221de1a54213a 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -181,6 +181,7 @@ Groupby/Resample/Rolling Reshaping ^^^^^^^^^ +- Bug in :func:`pandas.merge` doesn't work correctly if None is in suffixes (:issue: `24782`). - Bug in :func:`merge` when merging by index name would sometimes result in an incorrectly numbered index (:issue:`24212`) - :func:`to_records` now accepts dtypes to its `column_dtypes` parameter (:issue:`24895`) - From af7f9ad3321e2d19143c997320f70eb75159fc12 Mon Sep 17 00:00:00 2001 From: Kaiqi Dong Date: Sat, 2 Feb 2019 16:22:30 +0000 Subject: [PATCH 03/11] add back other comment --- doc/source/whatsnew/v0.24.1.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v0.24.1.rst b/doc/source/whatsnew/v0.24.1.rst index 6640c1ba4bf92..bc45a14666ba2 100644 --- a/doc/source/whatsnew/v0.24.1.rst +++ b/doc/source/whatsnew/v0.24.1.rst @@ -72,6 +72,7 @@ Fixed Regressions **Other** +- Fixed AttributeError when printing a DataFrame's HTML repr after accessing the IPython config object (:issue:`25036`) .. _whatsnew_0.241.contributors: From 4d5e1a97d8cd5caf346f8f973d134e4ec89301cb Mon Sep 17 00:00:00 2001 From: Kaiqi Dong Date: Sat, 2 Feb 2019 16:32:14 +0000 Subject: [PATCH 04/11] double check test --- pandas/tests/reshape/merge/test_merge.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index 2fd0917b758e4..5a9566b8d51f9 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -1554,6 +1554,9 @@ def test_merge_suffix(col1, col2, kwargs, expected_cols): result = a.merge(b, left_index=True, right_index=True, **kwargs) tm.assert_frame_equal(result, expected) + result = pd.merge(a, b, left_index=True, right_index=True, **kwargs) + tm.assert_frame_equal(result, expected) + @pytest.mark.parametrize("col, suffixes", [ ('a', (None, None)), From 3f65bf19b2739612e068c3659ba5f2d6bdd13377 Mon Sep 17 00:00:00 2001 From: Kaiqi Dong Date: Sat, 2 Feb 2019 17:54:16 +0000 Subject: [PATCH 05/11] changes based on discussion --- pandas/core/internals/managers.py | 11 ---------- pandas/core/reshape/merge.py | 7 +++--- pandas/tests/reshape/merge/test_merge.py | 28 ++++++++---------------- 3 files changed, 13 insertions(+), 33 deletions(-) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 50c26c60eb2ff..2a2f4ad1a8a99 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1967,17 +1967,6 @@ def items_overlap_with_suffix(left, lsuffix, right, rsuffix): if len(to_rename) == 0: return left, right else: - # if column name is string, raise error if suffix is a combination of - # empty string and None, or two Nones - if isinstance(to_rename[0], str): - if not lsuffix and not rsuffix: - raise ValueError('columns overlap but no suffix specified: ' - '{rename}'.format(rename=to_rename)) - else: - # if not, only suffix with (None, None) will raise error - if lsuffix is None and rsuffix is None: - raise ValueError('columns overlap but no suffix specified: ' - '{rename}'.format(rename=to_rename)) def renamer(x, suffix): """Rename the left and right indices. diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 2b18321b666ad..3be29bd2109b9 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -159,13 +159,12 @@ def merge_ordered(left, right, on=None, left DataFrame fill_method : {'ffill', None}, default None Interpolation method for data - suffixes : Sequence, default is ("_x", "_y") + suffixes : Sequence or None, default is ("_x", "_y") A length-2 sequence where each element is optionally a string indicating the suffix to add to overlapping column names in `left` and `right` respectively. Pass a value of `None` instead of a string to indicate that the column name from `left` or - `right` should be left as-is, with no suffix. At least one of the - values must not be None. + `right` should be left as-is, with no suffix. how : {'left', 'right', 'outer', 'inner'}, default 'outer' * left: use only keys from left frame (SQL: left outer join) * right: use only keys from right frame (SQL: right outer join) @@ -493,6 +492,8 @@ def __init__(self, left, right, how='inner', on=None, self.copy = copy self.suffixes = suffixes + if suffixes is None: + self.suffixes = (None, None) self.sort = sort self.left_index = left_index diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index 5a9566b8d51f9..7a417c0cbdfb0 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -1538,10 +1538,17 @@ def test_merge_series(on, left_on, right_on, left_index, right_index, nm): ("a", "a", dict(suffixes=("_x", None)), ["a_x", "a"]), ("a", "b", dict(suffixes=("_x", None)), ["a", "b"]), ("a", "a", dict(suffixes=[None, "_x"]), ["a", "a_x"]), - (0, 0, dict(suffixes=(["_a", None])), ["0_a", 0]), + (0, 0, dict(suffixes=["_a", None]), ["0_a", 0]), (0, 0, dict(suffixes=('', None)), ["0", 0]), ("a", "a", dict(), ["a_x", "a_y"]), - (0, 0, dict(), ["0_x", "0_y"]) + (0, 0, dict(), ["0_x", "0_y"]), + ("a", "a", dict(suffixes=(None, None)), ["a", "a"]), + ("a", "a", dict(suffixes=[None, None]), ["a", "a"]), + (0, 0, dict(suffixes=(None, None)), [0, 0]), + ("a", "a", dict(suffixes=['', None]), ["a", "a"]), + (0, 0, dict(suffixes=['', None]), ["0", 0]), + (0, 0, dict(suffixes=None), [0, 0]), + ("a", "a", dict(suffixes=None), ["a", "a"]) ]) def test_merge_suffix(col1, col2, kwargs, expected_cols): # issue: 24782 @@ -1556,20 +1563,3 @@ def test_merge_suffix(col1, col2, kwargs, expected_cols): result = pd.merge(a, b, left_index=True, right_index=True, **kwargs) tm.assert_frame_equal(result, expected) - - -@pytest.mark.parametrize("col, suffixes", [ - ('a', (None, None)), - ('a', [None, None]), - ('a', ('', None)), - ('a', [None, '']), - (0, (None, None)) -]) -def test_merge_suffix_errors(col, suffixes): - # issue: 24782 - a = pd.DataFrame({col: [1, 2, 3]}) - b = pd.DataFrame({col: [4, 5, 6]}) - - with pytest.raises(ValueError, - match="columns overlap but no suffix specified"): - a.merge(b, left_index=True, right_index=True, suffixes=suffixes) From ce7e4b8b63afc51093f8c7336e2bd5341868212a Mon Sep 17 00:00:00 2001 From: Kaiqi Dong Date: Sun, 3 Feb 2019 10:46:09 +0000 Subject: [PATCH 06/11] changes based on discussion with joris --- pandas/core/internals/managers.py | 11 ++++++++++ pandas/core/reshape/merge.py | 5 ++--- pandas/tests/reshape/merge/test_merge.py | 27 ++++++++++++++++-------- 3 files changed, 31 insertions(+), 12 deletions(-) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 2a2f4ad1a8a99..50c26c60eb2ff 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1967,6 +1967,17 @@ def items_overlap_with_suffix(left, lsuffix, right, rsuffix): if len(to_rename) == 0: return left, right else: + # if column name is string, raise error if suffix is a combination of + # empty string and None, or two Nones + if isinstance(to_rename[0], str): + if not lsuffix and not rsuffix: + raise ValueError('columns overlap but no suffix specified: ' + '{rename}'.format(rename=to_rename)) + else: + # if not, only suffix with (None, None) will raise error + if lsuffix is None and rsuffix is None: + raise ValueError('columns overlap but no suffix specified: ' + '{rename}'.format(rename=to_rename)) def renamer(x, suffix): """Rename the left and right indices. diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 3be29bd2109b9..fca942e2a4aab 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -164,7 +164,8 @@ def merge_ordered(left, right, on=None, indicating the suffix to add to overlapping column names in `left` and `right` respectively. Pass a value of `None` instead of a string to indicate that the column name from `left` or - `right` should be left as-is, with no suffix. + `right` should be left as-is, with no suffix. At least one of the + values must not be None. how : {'left', 'right', 'outer', 'inner'}, default 'outer' * left: use only keys from left frame (SQL: left outer join) * right: use only keys from right frame (SQL: right outer join) @@ -492,8 +493,6 @@ def __init__(self, left, right, how='inner', on=None, self.copy = copy self.suffixes = suffixes - if suffixes is None: - self.suffixes = (None, None) self.sort = sort self.left_index = left_index diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index 7a417c0cbdfb0..5935450b9cda0 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -1539,16 +1539,9 @@ def test_merge_series(on, left_on, right_on, left_index, right_index, nm): ("a", "b", dict(suffixes=("_x", None)), ["a", "b"]), ("a", "a", dict(suffixes=[None, "_x"]), ["a", "a_x"]), (0, 0, dict(suffixes=["_a", None]), ["0_a", 0]), - (0, 0, dict(suffixes=('', None)), ["0", 0]), + (0, 0, dict(suffixes=(None, "")), [0, "0"]), ("a", "a", dict(), ["a_x", "a_y"]), - (0, 0, dict(), ["0_x", "0_y"]), - ("a", "a", dict(suffixes=(None, None)), ["a", "a"]), - ("a", "a", dict(suffixes=[None, None]), ["a", "a"]), - (0, 0, dict(suffixes=(None, None)), [0, 0]), - ("a", "a", dict(suffixes=['', None]), ["a", "a"]), - (0, 0, dict(suffixes=['', None]), ["0", 0]), - (0, 0, dict(suffixes=None), [0, 0]), - ("a", "a", dict(suffixes=None), ["a", "a"]) + (0, 0, dict(), ["0_x", "0_y"]) ]) def test_merge_suffix(col1, col2, kwargs, expected_cols): # issue: 24782 @@ -1563,3 +1556,19 @@ def test_merge_suffix(col1, col2, kwargs, expected_cols): result = pd.merge(a, b, left_index=True, right_index=True, **kwargs) tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("col1, col2, suffixes", [ + ("a", "a", [None, None]), + ("a", "a", (None, None)), + ("a", "a", ("", None)), + (0, 0, [None, None]) +]) +def test_merge_error(col1, col2, suffixes): + # issue: 24782 + a = pd.DataFrame({col1: [1, 2, 3]}) + b = pd.DataFrame({col2: [3, 4, 5]}) + + msg = "columns overlap but no suffix specified" + with pytest.raises(ValueError, match=msg): + pd.merge(a, b, left_index=True, right_index=True, suffixes=suffixes) From 90ca9cdd759da2432b63faf96d7235c17bca8b22 Mon Sep 17 00:00:00 2001 From: Kaiqi Dong Date: Sun, 3 Feb 2019 12:42:13 +0000 Subject: [PATCH 07/11] slight change --- pandas/core/internals/managers.py | 14 +++----------- pandas/tests/reshape/merge/test_merge.py | 4 ++-- 2 files changed, 5 insertions(+), 13 deletions(-) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 50c26c60eb2ff..b7314b864c054 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1967,17 +1967,9 @@ def items_overlap_with_suffix(left, lsuffix, right, rsuffix): if len(to_rename) == 0: return left, right else: - # if column name is string, raise error if suffix is a combination of - # empty string and None, or two Nones - if isinstance(to_rename[0], str): - if not lsuffix and not rsuffix: - raise ValueError('columns overlap but no suffix specified: ' - '{rename}'.format(rename=to_rename)) - else: - # if not, only suffix with (None, None) will raise error - if lsuffix is None and rsuffix is None: - raise ValueError('columns overlap but no suffix specified: ' - '{rename}'.format(rename=to_rename)) + if not lsuffix and not rsuffix: + raise ValueError('columns overlap but no suffix specified: ' + '{rename}'.format(rename=to_rename)) def renamer(x, suffix): """Rename the left and right indices. diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index 5935450b9cda0..c084d21cc72ba 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -1539,7 +1539,6 @@ def test_merge_series(on, left_on, right_on, left_index, right_index, nm): ("a", "b", dict(suffixes=("_x", None)), ["a", "b"]), ("a", "a", dict(suffixes=[None, "_x"]), ["a", "a_x"]), (0, 0, dict(suffixes=["_a", None]), ["0_a", 0]), - (0, 0, dict(suffixes=(None, "")), [0, "0"]), ("a", "a", dict(), ["a_x", "a_y"]), (0, 0, dict(), ["0_x", "0_y"]) ]) @@ -1562,7 +1561,8 @@ def test_merge_suffix(col1, col2, kwargs, expected_cols): ("a", "a", [None, None]), ("a", "a", (None, None)), ("a", "a", ("", None)), - (0, 0, [None, None]) + (0, 0, [None, None]), + (0, 0, (None, "")) ]) def test_merge_error(col1, col2, suffixes): # issue: 24782 From e995a042d5c9724dbdc2e8a6415a2a3fd2fcd12f Mon Sep 17 00:00:00 2001 From: Kaiqi Dong Date: Mon, 4 Feb 2019 11:31:51 +0100 Subject: [PATCH 08/11] slight changes --- pandas/core/reshape/merge.py | 2 +- pandas/tests/reshape/merge/test_merge.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index fca942e2a4aab..2b18321b666ad 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -159,7 +159,7 @@ def merge_ordered(left, right, on=None, left DataFrame fill_method : {'ffill', None}, default None Interpolation method for data - suffixes : Sequence or None, default is ("_x", "_y") + suffixes : Sequence, default is ("_x", "_y") A length-2 sequence where each element is optionally a string indicating the suffix to add to overlapping column names in `left` and `right` respectively. Pass a value of `None` instead diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index c084d21cc72ba..a522bd5f957f7 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -1569,6 +1569,7 @@ def test_merge_error(col1, col2, suffixes): a = pd.DataFrame({col1: [1, 2, 3]}) b = pd.DataFrame({col2: [3, 4, 5]}) + # TODO: might reconsider current raise behaviour, see issue 24782 msg = "columns overlap but no suffix specified" with pytest.raises(ValueError, match=msg): pd.merge(a, b, left_index=True, right_index=True, suffixes=suffixes) From 9c3dfbd94065943193f8e57e59fcd438cd52ded7 Mon Sep 17 00:00:00 2001 From: Kaiqi Dong Date: Mon, 4 Feb 2019 16:29:29 +0100 Subject: [PATCH 09/11] changes based on jeff review --- doc/source/whatsnew/v0.25.0.rst | 2 +- pandas/core/internals/managers.py | 9 +++++++++ pandas/core/reshape/merge.py | 2 ++ pandas/tests/reshape/merge/test_merge.py | 17 ++++++++++++++++- 4 files changed, 28 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 221de1a54213a..80c76d14a5938 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -181,7 +181,7 @@ Groupby/Resample/Rolling Reshaping ^^^^^^^^^ -- Bug in :func:`pandas.merge` doesn't work correctly if None is in suffixes (:issue: `24782`). +- Bug in :func:`pandas.merge` adds a string of ``None`` if ``None`` is assigned in suffixes instead of remain the column name as-is (:issue:`24782`). - Bug in :func:`merge` when merging by index name would sometimes result in an incorrectly numbered index (:issue:`24212`) - :func:`to_records` now accepts dtypes to its `column_dtypes` parameter (:issue:`24895`) - diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index b7314b864c054..5cae6e1a89170 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1976,6 +1976,15 @@ def renamer(x, suffix): If there is overlap, and suffix is not None, add suffix, otherwise, leave it as-is. + + Parameters + ---------- + x : original column name + suffix : str or None + + Returns + ------- + x : renamed column name """ if x in to_rename and suffix is not None: return '{x}{suffix}'.format(x=x, suffix=suffix) diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 2b18321b666ad..ad3327e694b67 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -166,6 +166,8 @@ def merge_ordered(left, right, on=None, of a string to indicate that the column name from `left` or `right` should be left as-is, with no suffix. At least one of the values must not be None. + + .. versionchanged:: 0.25.0 how : {'left', 'right', 'outer', 'inner'}, default 'outer' * left: use only keys from left frame (SQL: left outer join) * right: use only keys from right frame (SQL: right outer join) diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index a522bd5f957f7..2dd9558056fcc 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -1564,7 +1564,7 @@ def test_merge_suffix(col1, col2, kwargs, expected_cols): (0, 0, [None, None]), (0, 0, (None, "")) ]) -def test_merge_error(col1, col2, suffixes): +def test_merge_suffix_error(col1, col2, suffixes): # issue: 24782 a = pd.DataFrame({col1: [1, 2, 3]}) b = pd.DataFrame({col2: [3, 4, 5]}) @@ -1573,3 +1573,18 @@ def test_merge_error(col1, col2, suffixes): msg = "columns overlap but no suffix specified" with pytest.raises(ValueError, match=msg): pd.merge(a, b, left_index=True, right_index=True, suffixes=suffixes) + + +@pytest.mark.parametrize("col1, col2, suffixes", [ + ("a", "a", None), + (0, 0, None) +]) +def test_merge_suffix_none_error(col1, col2, suffixes): + # issue: 24782 + a = pd.DataFrame({col1: [1, 2, 3]}) + b = pd.DataFrame({col2: [3, 4, 5]}) + + # TODO: might reconsider current raise behaviour, see issue 24782 + msg = "'NoneType' object is not iterable" + with pytest.raises(TypeError, match=msg): + pd.merge(a, b, left_index=True, right_index=True, suffixes=suffixes) From 441e9a519a63d59f541d79500ed353792d2c486b Mon Sep 17 00:00:00 2001 From: Kaiqi Dong Date: Mon, 4 Feb 2019 17:34:20 +0100 Subject: [PATCH 10/11] fix test error --- pandas/tests/reshape/merge/test_merge.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index 2dd9558056fcc..143e609bdbcfd 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -1585,6 +1585,6 @@ def test_merge_suffix_none_error(col1, col2, suffixes): b = pd.DataFrame({col2: [3, 4, 5]}) # TODO: might reconsider current raise behaviour, see issue 24782 - msg = "'NoneType' object is not iterable" + msg = "iterable" with pytest.raises(TypeError, match=msg): pd.merge(a, b, left_index=True, right_index=True, suffixes=suffixes) From 71729b242da04c01856f640194f56a76c77ebf12 Mon Sep 17 00:00:00 2001 From: Kaiqi Dong Date: Mon, 4 Feb 2019 18:16:22 +0100 Subject: [PATCH 11/11] ci fail, try again --- pandas/tests/reshape/merge/test_merge.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index 143e609bdbcfd..25487ccc76e62 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -1584,7 +1584,7 @@ def test_merge_suffix_none_error(col1, col2, suffixes): a = pd.DataFrame({col1: [1, 2, 3]}) b = pd.DataFrame({col2: [3, 4, 5]}) - # TODO: might reconsider current raise behaviour, see issue 24782 + # TODO: might reconsider current raise behaviour, see GH24782 msg = "iterable" with pytest.raises(TypeError, match=msg): pd.merge(a, b, left_index=True, right_index=True, suffixes=suffixes)