Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -937,6 +937,7 @@ Performance improvements
- Performance improvement in :meth:`RangeIndex.reindex` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57647`, :issue:`57752`)
- Performance improvement in :meth:`RangeIndex.take` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57445`, :issue:`57752`)
- Performance improvement in :func:`merge` if hash-join can be used (:issue:`57970`)
- Performance improvement in :func:`merge` when join keys have different dtypes and need to be upcast (:issue:`62902`)
- Performance improvement in :meth:`CategoricalDtype.update_dtype` when ``dtype`` is a :class:`CategoricalDtype` with non ``None`` categories and ordered (:issue:`59647`)
- Performance improvement in :meth:`DataFrame.__getitem__` when ``key`` is a :class:`DataFrame` with many columns (:issue:`61010`)
- Performance improvement in :meth:`DataFrame.astype` when converting to extension floating dtypes, e.g. "Float64" (:issue:`60066`)
Expand Down
8 changes: 4 additions & 4 deletions pandas/core/reshape/merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -1186,8 +1186,8 @@ def _indicator_pre_merge(
"Cannot use name of an existing column for indicator column"
)

left = left.copy()
right = right.copy()
left = left.copy(deep=False)
right = right.copy(deep=False)

left["_left_indicator"] = 1
left["_left_indicator"] = left["_left_indicator"].astype("int8")
Expand Down Expand Up @@ -1865,11 +1865,11 @@ def _maybe_coerce_merge_keys(self) -> None:
# incompatible dtypes. See GH 16900.
if name in self.left.columns:
typ = cast(Categorical, lk).categories.dtype if lk_is_cat else object
self.left = self.left.copy()
self.left = self.left.copy(deep=False)
self.left[name] = self.left[name].astype(typ)
if name in self.right.columns:
typ = cast(Categorical, rk).categories.dtype if rk_is_cat else object
self.right = self.right.copy()
self.right = self.right.copy(deep=False)
self.right[name] = self.right[name].astype(typ)

def _validate_left_right_on(self, left_on, right_on):
Expand Down
23 changes: 23 additions & 0 deletions pandas/tests/copy_view/test_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,29 @@ def test_merge_copy_keyword():
assert np.shares_memory(get_array(df2, "b"), get_array(result, "b"))


def test_merge_upcasting_no_copy():
left = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
left_copy = left.copy()
right = DataFrame({"a": [1, 2, 3], "c": [7, 8, 9]}, dtype=object)
result = merge(left, right, on="a")
assert np.shares_memory(get_array(result, "b"), get_array(left, "b"))
assert not np.shares_memory(get_array(result, "a"), get_array(left, "a"))
tm.assert_frame_equal(left, left_copy)

result = merge(right, left, on="a")
assert np.shares_memory(get_array(result, "b"), get_array(left, "b"))
assert not np.shares_memory(get_array(result, "a"), get_array(left, "a"))
tm.assert_frame_equal(left, left_copy)


def test_merge_indicator_no_deep_copy():
left = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
right = DataFrame({"a": [1, 2, 3], "c": [7, 8, 9]})
result = merge(left, right, on="a", indicator=True)
assert np.shares_memory(get_array(result, "b"), get_array(left, "b"))
assert np.shares_memory(get_array(result, "c"), get_array(right, "c"))


@pytest.mark.parametrize("dtype", [object, "str"])
def test_join_on_key(dtype):
df_index = Index(["a", "b", "c"], name="key", dtype=dtype)
Expand Down
Loading