From b366c3d3925009d587dc9f9eed00433c1bf45af4 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <61934744+phofl@users.noreply.github.com>
Date: Fri, 24 Nov 2023 23:51:29 +0100
Subject: [PATCH 1/7] BUG: Index.str.cat casting result always to object

---
 doc/source/whatsnew/v2.1.4.rst   |  2 +-
 pandas/core/strings/accessor.py  |  7 ++-
 pandas/tests/strings/test_cat.py | 85 +++++++++++++++++---------------
 3 files changed, 52 insertions(+), 42 deletions(-)

diff --git a/doc/source/whatsnew/v2.1.4.rst b/doc/source/whatsnew/v2.1.4.rst
index 543a9864ced26..0f4d3a22f5129 100644
--- a/doc/source/whatsnew/v2.1.4.rst
+++ b/doc/source/whatsnew/v2.1.4.rst
@@ -25,7 +25,7 @@ Bug fixes
 - Bug in :meth:`Index.__getitem__` returning wrong result for Arrow dtypes and negative stepsize (:issue:`55832`)
 - Fixed bug in :meth:`DataFrame.__setitem__` casting :class:`Index` with object-dtype to PyArrow backed strings when ``infer_string`` option is set (:issue:`55638`)
 - Fixed bug in :meth:`Index.insert` casting object-dtype to PyArrow backed strings when ``infer_string`` option is set (:issue:`55638`)
--
+- Fixed bug in :meth:`Index.str.cat` always casting result to object dtype (:issue:`56157`)
 
 .. ---------------------------------------------------------------------------
 .. _whatsnew_214.other:
diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py
index 58b904fd31b6a..a05fae1524ffd 100644
--- a/pandas/core/strings/accessor.py
+++ b/pandas/core/strings/accessor.py
@@ -393,7 +393,7 @@ def cons_row(x):
                 else:
                     dtype = vdtype
             else:
-                dtype = vdtype
+                _dtype = vdtype
 
             if expand:
                 cons = self._orig._constructor_expanddim
@@ -689,8 +689,11 @@ def cat(
         out: Index | Series
         if isinstance(self._orig, ABCIndex):
             # add dtype for case that result is all-NA
+            dtype = None
+            if isna(result).all():
+                dtype = object
 
-            out = Index(result, dtype=object, name=self._orig.name)
+            out = Index(result, dtype=dtype, name=self._orig.name)
         else:  # Series
             if isinstance(self._orig.dtype, CategoricalDtype):
                 # We need to infer the new categories.
diff --git a/pandas/tests/strings/test_cat.py b/pandas/tests/strings/test_cat.py
index 3e620b7664335..497f87e245ba3 100644
--- a/pandas/tests/strings/test_cat.py
+++ b/pandas/tests/strings/test_cat.py
@@ -3,6 +3,8 @@
 import numpy as np
 import pytest
 
+import pandas.util._test_decorators as td
+
 from pandas import (
     DataFrame,
     Index,
@@ -10,6 +12,7 @@
     Series,
     _testing as tm,
     concat,
+    option_context,
 )
 
 
@@ -26,45 +29,49 @@ def test_str_cat_name(index_or_series, other):
     assert result.name == "name"
 
 
-def test_str_cat(index_or_series):
-    box = index_or_series
-    # test_cat above tests "str_cat" from ndarray;
-    # here testing "str.cat" from Series/Index to ndarray/list
-    s = box(["a", "a", "b", "b", "c", np.nan])
-
-    # single array
-    result = s.str.cat()
-    expected = "aabbc"
-    assert result == expected
-
-    result = s.str.cat(na_rep="-")
-    expected = "aabbc-"
-    assert result == expected
-
-    result = s.str.cat(sep="_", na_rep="NA")
-    expected = "a_a_b_b_c_NA"
-    assert result == expected
-
-    t = np.array(["a", np.nan, "b", "d", "foo", np.nan], dtype=object)
-    expected = box(["aa", "a-", "bb", "bd", "cfoo", "--"])
-
-    # Series/Index with array
-    result = s.str.cat(t, na_rep="-")
-    tm.assert_equal(result, expected)
-
-    # Series/Index with list
-    result = s.str.cat(list(t), na_rep="-")
-    tm.assert_equal(result, expected)
-
-    # errors for incorrect lengths
-    rgx = r"If `others` contains arrays or lists \(or other list-likes.*"
-    z = Series(["1", "2", "3"])
-
-    with pytest.raises(ValueError, match=rgx):
-        s.str.cat(z.values)
-
-    with pytest.raises(ValueError, match=rgx):
-        s.str.cat(list(z))
+@pytest.mark.parametrize(
+    "infer_string", [False, pytest.param(True, marks=td.skip_if_no("pyarrow"))]
+)
+def test_str_cat(index_or_series, infer_string):
+    with option_context("future.infer_string", infer_string):
+        box = index_or_series
+        # test_cat above tests "str_cat" from ndarray;
+        # here testing "str.cat" from Series/Index to ndarray/list
+        s = box(["a", "a", "b", "b", "c", np.nan])
+
+        # single array
+        result = s.str.cat()
+        expected = "aabbc"
+        assert result == expected
+
+        result = s.str.cat(na_rep="-")
+        expected = "aabbc-"
+        assert result == expected
+
+        result = s.str.cat(sep="_", na_rep="NA")
+        expected = "a_a_b_b_c_NA"
+        assert result == expected
+
+        t = np.array(["a", np.nan, "b", "d", "foo", np.nan], dtype=object)
+        expected = box(["aa", "a-", "bb", "bd", "cfoo", "--"])
+
+        # Series/Index with array
+        result = s.str.cat(t, na_rep="-")
+        tm.assert_equal(result, expected)
+
+        # Series/Index with list
+        result = s.str.cat(list(t), na_rep="-")
+        tm.assert_equal(result, expected)
+
+        # errors for incorrect lengths
+        rgx = r"If `others` contains arrays or lists \(or other list-likes.*"
+        z = Series(["1", "2", "3"])
+
+        with pytest.raises(ValueError, match=rgx):
+            s.str.cat(z.values)
+
+        with pytest.raises(ValueError, match=rgx):
+            s.str.cat(list(z))
 
 
 def test_str_cat_raises_intuitive_error(index_or_series):

From d43c3be8a0564975e57c2c5effdd6dc98aae8e2a Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <61934744+phofl@users.noreply.github.com>
Date: Fri, 24 Nov 2023 23:52:25 +0100
Subject: [PATCH 2/7] Update accessor.py

---
 pandas/core/strings/accessor.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py
index a05fae1524ffd..62f6a576db24f 100644
--- a/pandas/core/strings/accessor.py
+++ b/pandas/core/strings/accessor.py
@@ -393,7 +393,7 @@ def cons_row(x):
                 else:
                     dtype = vdtype
             else:
-                _dtype = vdtype
+                dtype = vdtype
 
             if expand:
                 cons = self._orig._constructor_expanddim

From 35bc604a8c1525ae887423547e3f7c9cd55cc941 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <61934744+phofl@users.noreply.github.com>
Date: Sat, 25 Nov 2023 00:22:37 +0100
Subject: [PATCH 3/7] Fix further bugs

---
 pandas/core/strings/accessor.py  |  5 ++-
 pandas/tests/strings/test_cat.py | 64 ++++++++++++++++++++------------
 2 files changed, 43 insertions(+), 26 deletions(-)

diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py
index 62f6a576db24f..35bfb3a1ad2f1 100644
--- a/pandas/core/strings/accessor.py
+++ b/pandas/core/strings/accessor.py
@@ -44,6 +44,7 @@
 )
 from pandas.core.dtypes.missing import isna
 
+from pandas.core.arrays import ExtensionArray
 from pandas.core.base import NoNewAttributesMixin
 from pandas.core.construction import extract_array
 
@@ -455,7 +456,7 @@ def _get_series_list(self, others):
                 # in case of list-like `others`, all elements must be
                 # either Series/Index/np.ndarray (1-dim)...
                 if all(
-                    isinstance(x, (ABCSeries, ABCIndex))
+                    isinstance(x, (ABCSeries, ABCIndex, ExtensionArray))
                     or (isinstance(x, np.ndarray) and x.ndim == 1)
                     for x in others
                 ):
@@ -697,7 +698,7 @@ def cat(
         else:  # Series
             if isinstance(self._orig.dtype, CategoricalDtype):
                 # We need to infer the new categories.
-                dtype = None
+                dtype = self._orig.dtype.categories.dtype
             else:
                 dtype = self._orig.dtype
             res_ser = Series(
diff --git a/pandas/tests/strings/test_cat.py b/pandas/tests/strings/test_cat.py
index 497f87e245ba3..284932491a65e 100644
--- a/pandas/tests/strings/test_cat.py
+++ b/pandas/tests/strings/test_cat.py
@@ -85,39 +85,54 @@ def test_str_cat_raises_intuitive_error(index_or_series):
         s.str.cat("    ")
 
 
+@pytest.mark.parametrize(
+    "infer_string", [False, pytest.param(True, marks=td.skip_if_no("pyarrow"))]
+)
 @pytest.mark.parametrize("sep", ["", None])
 @pytest.mark.parametrize("dtype_target", ["object", "category"])
 @pytest.mark.parametrize("dtype_caller", ["object", "category"])
-def test_str_cat_categorical(index_or_series, dtype_caller, dtype_target, sep):
+def test_str_cat_categorical(
+    index_or_series, dtype_caller, dtype_target, sep, infer_string
+):
     box = index_or_series
 
-    s = Index(["a", "a", "b", "a"], dtype=dtype_caller)
-    s = s if box == Index else Series(s, index=s)
-    t = Index(["b", "a", "b", "c"], dtype=dtype_target)
-
-    expected = Index(["ab", "aa", "bb", "ac"])
-    expected = expected if box == Index else Series(expected, index=s)
+    with option_context("future.infer_string", infer_string):
+        s = Index(["a", "a", "b", "a"], dtype=dtype_caller)
+        s = s if box == Index else Series(s, index=s)
+        t = Index(["b", "a", "b", "c"], dtype=dtype_target)
+
+        expected = Index(["ab", "aa", "bb", "ac"])
+        expected = (
+            expected
+            if box == Index
+            else Series(expected, index=Index(s, dtype=dtype_caller))
+        )
 
-    # Series/Index with unaligned Index -> t.values
-    result = s.str.cat(t.values, sep=sep)
-    tm.assert_equal(result, expected)
+        # Series/Index with unaligned Index -> t.values
+        result = s.str.cat(t.values, sep=sep)
+        tm.assert_equal(result, expected)
 
-    # Series/Index with Series having matching Index
-    t = Series(t.values, index=s)
-    result = s.str.cat(t, sep=sep)
-    tm.assert_equal(result, expected)
+        # Series/Index with Series having matching Index
+        t = Series(t.values, index=Index(s, dtype=dtype_caller))
+        result = s.str.cat(t, sep=sep)
+        tm.assert_equal(result, expected)
 
-    # Series/Index with Series.values
-    result = s.str.cat(t.values, sep=sep)
-    tm.assert_equal(result, expected)
+        # Series/Index with Series.values
+        result = s.str.cat(t.values, sep=sep)
+        tm.assert_equal(result, expected)
 
-    # Series/Index with Series having different Index
-    t = Series(t.values, index=t.values)
-    expected = Index(["aa", "aa", "bb", "bb", "aa"])
-    expected = expected if box == Index else Series(expected, index=expected.str[:1])
+        # Series/Index with Series having different Index
+        t = Series(t.values, index=t.values)
+        expected = Index(["aa", "aa", "bb", "bb", "aa"])
+        dtype = object if dtype_caller == "object" else s.dtype.categories.dtype
+        expected = (
+            expected
+            if box == Index
+            else Series(expected, index=Index(expected.str[:1], dtype=dtype))
+        )
 
-    result = s.str.cat(t, sep=sep)
-    tm.assert_equal(result, expected)
+        result = s.str.cat(t, sep=sep)
+        tm.assert_equal(result, expected)
 
 
 @pytest.mark.parametrize(
@@ -328,8 +343,9 @@ def test_str_cat_all_na(index_or_series, index_or_series2):
 
     # all-NA target
     if box == Series:
-        expected = Series([np.nan] * 4, index=s.index, dtype=object)
+        expected = Series([np.nan] * 4, index=s.index, dtype=s.dtype)
     else:  # box == Index
+        # TODO: Strimg option, this should return string dtype
         expected = Index([np.nan] * 4, dtype=object)
     result = s.str.cat(t, join="left")
     tm.assert_equal(result, expected)

From fa99d733259cbad3638769637e00095f7cd62c6a Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <61934744+phofl@users.noreply.github.com>
Date: Sat, 25 Nov 2023 00:49:23 +0100
Subject: [PATCH 4/7] Fix

---
 pandas/tests/strings/test_api.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/pandas/tests/strings/test_api.py b/pandas/tests/strings/test_api.py
index 2914b22a52e94..fd2501835318d 100644
--- a/pandas/tests/strings/test_api.py
+++ b/pandas/tests/strings/test_api.py
@@ -2,6 +2,7 @@
 import pytest
 
 from pandas import (
+    CategoricalDtype,
     DataFrame,
     Index,
     MultiIndex,
@@ -178,6 +179,7 @@ def test_api_for_categorical(any_string_method, any_string_dtype):
     s = Series(list("aabb"), dtype=any_string_dtype)
     s = s + " " + s
     c = s.astype("category")
+    c = c.astype(CategoricalDtype(c.dtype.categories.astype("object")))
     assert isinstance(c.str, StringMethods)
 
     method_name, args, kwargs = any_string_method

From 9fd924b63af733d5b3078c2c6770648a210cd631 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <61934744+phofl@users.noreply.github.com>
Date: Sat, 25 Nov 2023 18:21:04 +0100
Subject: [PATCH 5/7] Update accessor.py

---
 pandas/core/strings/accessor.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py
index 35bfb3a1ad2f1..1d118cada16c3 100644
--- a/pandas/core/strings/accessor.py
+++ b/pandas/core/strings/accessor.py
@@ -698,7 +698,7 @@ def cat(
         else:  # Series
             if isinstance(self._orig.dtype, CategoricalDtype):
                 # We need to infer the new categories.
-                dtype = self._orig.dtype.categories.dtype
+                dtype = self._orig.dtype.categories.dtype  # type: ignore[assignment]
             else:
                 dtype = self._orig.dtype
             res_ser = Series(

From 5b560687a3f1a815601a4f0c1b40096d61e46bc1 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <61934744+phofl@users.noreply.github.com>
Date: Fri, 8 Dec 2023 22:55:56 +0100
Subject: [PATCH 6/7] Update v2.1.4.rst

---
 doc/source/whatsnew/v2.1.4.rst | 1 -
 1 file changed, 1 deletion(-)

diff --git a/doc/source/whatsnew/v2.1.4.rst b/doc/source/whatsnew/v2.1.4.rst
index 83ec9bcd6a519..ee2d8efdbcc6b 100644
--- a/doc/source/whatsnew/v2.1.4.rst
+++ b/doc/source/whatsnew/v2.1.4.rst
@@ -30,7 +30,6 @@ Bug fixes
 - Fixed bug in :meth:`DataFrame.__setitem__` casting :class:`Index` with object-dtype to PyArrow backed strings when ``infer_string`` option is set (:issue:`55638`)
 - Fixed bug in :meth:`DataFrame.to_hdf` raising when columns have ``StringDtype`` (:issue:`55088`)
 - Fixed bug in :meth:`Index.insert` casting object-dtype to PyArrow backed strings when ``infer_string`` option is set (:issue:`55638`)
-- Fixed bug in :meth:`Index.str.cat` always casting result to object dtype (:issue:`56157`)
 - Fixed bug in :meth:`Series.__ne__` resulting in False for comparison between ``NA`` and string value for ``dtype="string[pyarrow_numpy]"`` (:issue:`56122`)
 - Fixed bug in :meth:`Series.mode` not keeping object dtype when ``infer_string`` is set (:issue:`56183`)
 - Fixed bug in :meth:`Series.str.split` and :meth:`Series.str.rsplit` when ``pat=None`` for :class:`ArrowDtype` with ``pyarrow.string`` (:issue:`56271`)

From 55784175b35f82f7011aed9d3138b175fc8ecd77 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <61934744+phofl@users.noreply.github.com>
Date: Fri, 8 Dec 2023 22:56:41 +0100
Subject: [PATCH 7/7] Update v2.2.0.rst

---
 doc/source/whatsnew/v2.2.0.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst
index c878fd2664dc4..99faad8aff986 100644
--- a/doc/source/whatsnew/v2.2.0.rst
+++ b/doc/source/whatsnew/v2.2.0.rst
@@ -576,6 +576,7 @@ Strings
 ^^^^^^^
 - Bug in :func:`pandas.api.types.is_string_dtype` while checking object array with no elements is of the string dtype (:issue:`54661`)
 - Bug in :meth:`DataFrame.apply` failing when ``engine="numba"`` and columns or index have ``StringDtype`` (:issue:`56189`)
+- Bug in :meth:`Index.str.cat` always casting result to object dtype (:issue:`56157`)
 - Bug in :meth:`Series.__mul__` for :class:`ArrowDtype` with ``pyarrow.string`` dtype and ``string[pyarrow]`` for the pyarrow backend (:issue:`51970`)
 - Bug in :meth:`Series.str.startswith` and :meth:`Series.str.endswith` with arguments of type ``tuple[str, ...]`` for ``string[pyarrow]`` (:issue:`54942`)