Backport PR #53118 on branch 2.0.x (REGR: read_sql dropping duplicate…

…d columns) (#53136) Backport PR #53118: REGR: read_sql dropping duplicated columns Co-authored-by: Patrick Hoefler <61934744+phofl@users.noreply.github.com>
pandas-dev · May 8, 2023 · 291acfb · 291acfb
1 parent 44f0a9b
commit 291acfb
Show file tree

Hide file tree

Showing 3 changed files with 16 additions and 1 deletion.
diff --git a/doc/source/whatsnew/v2.0.2.rst b/doc/source/whatsnew/v2.0.2.rst
@@ -13,6 +13,7 @@ including other versions of pandas.
 
 Fixed regressions
 ~~~~~~~~~~~~~~~~~
+- Fixed regression in :func:`read_sql` dropping columns with duplicated column names (:issue:`53117`)
 - Fixed regression in :meth:`DataFrame.loc` losing :class:`MultiIndex` name when enlarging object (:issue:`53053`)
 - Fixed regression in :meth:`DataFrame.to_string` printing a backslash at the end of the first row of data, instead of headers, when the DataFrame doesn't fit the line width (:issue:`53054`)
 - Fixed regression in :meth:`MultiIndex.join` returning levels in wrong order (:issue:`53093`)

diff --git a/pandas/io/sql.py b/pandas/io/sql.py
@@ -158,7 +158,9 @@ def _convert_arrays_to_dataframe(
             ArrowExtensionArray(pa.array(arr, from_pandas=True)) for arr in arrays
         ]
     if arrays:
-        return DataFrame(dict(zip(columns, arrays)))
+        df = DataFrame(dict(zip(list(range(len(columns))), arrays)))
+        df.columns = columns
+        return df
     else:
         return DataFrame(columns=columns)
 

diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py
@@ -1496,6 +1496,18 @@ def test_escaped_table_name(self):
 
         tm.assert_frame_equal(res, df)
 
+    def test_read_sql_duplicate_columns(self):
+        # GH#53117
+        df = DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3], "c": 1})
+        df.to_sql("test_table", self.conn, index=False)
+
+        result = pd.read_sql("SELECT a, b, a +1 as a, c FROM test_table;", self.conn)
+        expected = DataFrame(
+            [[1, 0.1, 2, 1], [2, 0.2, 3, 1], [3, 0.3, 4, 1]],
+            columns=["a", "b", "a", "c"],
+        )
+        tm.assert_frame_equal(result, expected)
+
 
 @pytest.mark.skipif(not SQLALCHEMY_INSTALLED, reason="SQLAlchemy not installed")
 class TestSQLApi(SQLAlchemyMixIn, _TestSQLApi):