From adeae840585a137528ba6d8d69ffb923c2f6080c Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Tue, 28 Oct 2025 23:44:56 +0000
Subject: [PATCH 1/2] PERF: Avoid deep copies when casting dtypes in merge

---
 doc/source/whatsnew/v3.0.0.rst           |  1 +
 pandas/core/reshape/merge.py             |  8 ++++----
 pandas/tests/copy_view/test_functions.py | 23 +++++++++++++++++++++++
 3 files changed, 28 insertions(+), 4 deletions(-)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index 75b4c5c0fe14d..a6ce88dcc44e2 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -937,6 +937,7 @@ Performance improvements
 - Performance improvement in :meth:`RangeIndex.reindex` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57647`, :issue:`57752`)
 - Performance improvement in :meth:`RangeIndex.take` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57445`, :issue:`57752`)
 - Performance improvement in :func:`merge` if hash-join can be used (:issue:`57970`)
+- Performance improvement in :func:`merge` when join keys have different dtypes and need to be upcast (:issue:`60545`)
 - Performance improvement in :meth:`CategoricalDtype.update_dtype` when ``dtype`` is a :class:`CategoricalDtype` with non ``None`` categories and ordered (:issue:`59647`)
 - Performance improvement in :meth:`DataFrame.__getitem__` when ``key`` is a :class:`DataFrame` with many columns (:issue:`61010`)
 - Performance improvement in :meth:`DataFrame.astype` when converting to extension floating dtypes, e.g. "Float64" (:issue:`60066`)
diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py
index 6516c843d637e..84cf81857cd52 100644
--- a/pandas/core/reshape/merge.py
+++ b/pandas/core/reshape/merge.py
@@ -1186,8 +1186,8 @@ def _indicator_pre_merge(
                 "Cannot use name of an existing column for indicator column"
             )
 
-        left = left.copy()
-        right = right.copy()
+        left = left.copy(deep=False)
+        right = right.copy(deep=False)
 
         left["_left_indicator"] = 1
         left["_left_indicator"] = left["_left_indicator"].astype("int8")
@@ -1865,11 +1865,11 @@ def _maybe_coerce_merge_keys(self) -> None:
             # incompatible dtypes. See GH 16900.
             if name in self.left.columns:
                 typ = cast(Categorical, lk).categories.dtype if lk_is_cat else object
-                self.left = self.left.copy()
+                self.left = self.left.copy(deep=False)
                 self.left[name] = self.left[name].astype(typ)
             if name in self.right.columns:
                 typ = cast(Categorical, rk).categories.dtype if rk_is_cat else object
-                self.right = self.right.copy()
+                self.right = self.right.copy(deep=False)
                 self.right[name] = self.right[name].astype(typ)
 
     def _validate_left_right_on(self, left_on, right_on):
diff --git a/pandas/tests/copy_view/test_functions.py b/pandas/tests/copy_view/test_functions.py
index d23263835c615..7ee1e45d1fea4 100644
--- a/pandas/tests/copy_view/test_functions.py
+++ b/pandas/tests/copy_view/test_functions.py
@@ -243,6 +243,29 @@ def test_merge_copy_keyword():
     assert np.shares_memory(get_array(df2, "b"), get_array(result, "b"))
 
 
+def test_merge_upcasting_no_copy():
+    left = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
+    left_copy = left.copy()
+    right = DataFrame({"a": [1, 2, 3], "c": [7, 8, 9]}, dtype=object)
+    result = merge(left, right, on="a")
+    assert np.shares_memory(get_array(result, "b"), get_array(left, "b"))
+    assert not np.shares_memory(get_array(result, "a"), get_array(left, "a"))
+    tm.assert_frame_equal(left, left_copy)
+
+    result = merge(right, left, on="a")
+    assert np.shares_memory(get_array(result, "b"), get_array(left, "b"))
+    assert not np.shares_memory(get_array(result, "a"), get_array(left, "a"))
+    tm.assert_frame_equal(left, left_copy)
+
+
+def test_merge_indicator_no_deep_copy():
+    left = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
+    right = DataFrame({"a": [1, 2, 3], "c": [7, 8, 9]})
+    result = merge(left, right, on="a", indicator=True)
+    assert np.shares_memory(get_array(result, "b"), get_array(left, "b"))
+    assert np.shares_memory(get_array(result, "c"), get_array(right, "c"))
+
+
 @pytest.mark.parametrize("dtype", [object, "str"])
 def test_join_on_key(dtype):
     df_index = Index(["a", "b", "c"], name="key", dtype=dtype)

From 739952c619c085750d65c1c5492aad8a3a411843 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <61934744+phofl@users.noreply.github.com>
Date: Tue, 28 Oct 2025 23:45:44 +0000
Subject: [PATCH 2/2] Update v3.0.0.rst

---
 doc/source/whatsnew/v3.0.0.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index a6ce88dcc44e2..06df616f0b0a2 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -937,7 +937,7 @@ Performance improvements
 - Performance improvement in :meth:`RangeIndex.reindex` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57647`, :issue:`57752`)
 - Performance improvement in :meth:`RangeIndex.take` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57445`, :issue:`57752`)
 - Performance improvement in :func:`merge` if hash-join can be used (:issue:`57970`)
-- Performance improvement in :func:`merge` when join keys have different dtypes and need to be upcast (:issue:`60545`)
+- Performance improvement in :func:`merge` when join keys have different dtypes and need to be upcast (:issue:`62902`)
 - Performance improvement in :meth:`CategoricalDtype.update_dtype` when ``dtype`` is a :class:`CategoricalDtype` with non ``None`` categories and ordered (:issue:`59647`)
 - Performance improvement in :meth:`DataFrame.__getitem__` when ``key`` is a :class:`DataFrame` with many columns (:issue:`61010`)
 - Performance improvement in :meth:`DataFrame.astype` when converting to extension floating dtypes, e.g. "Float64" (:issue:`60066`)