From 0d61b8dbb0e5de44b410e3a94a29e9bc8ea53779 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=81lvaro=20Kothe?= <kothe65@gmail.com>
Date: Sat, 4 Oct 2025 18:24:35 -0300
Subject: [PATCH 1/6] fix(parser): integer overflow reads as PyLongObject

---
 pandas/_libs/parsers.pyx | 36 +++++++++++++++++++++++++++++++++---
 1 file changed, 33 insertions(+), 3 deletions(-)

diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx
index 91eddc3261164..20cc39f2760b6 100644
--- a/pandas/_libs/parsers.pyx
+++ b/pandas/_libs/parsers.pyx
@@ -29,6 +29,7 @@ from cpython.exc cimport (
     PyErr_Fetch,
     PyErr_Occurred,
 )
+from cpython.long cimport PyLong_FromString
 from cpython.object cimport PyObject
 from cpython.ref cimport (
     Py_INCREF,
@@ -1081,9 +1082,8 @@ cdef class TextReader:
                         np.dtype("object"), i, start, end, 0,
                         0, na_hashset, na_fset)
                 except OverflowError:
-                    col_res, na_count = self._convert_with_dtype(
-                        np.dtype("object"), i, start, end, na_filter,
-                        0, na_hashset, na_fset)
+                    col_res, na_count = _try_pylong(self.parser, i, start,
+                                                    end, na_filter, na_hashset)
 
                 if col_res is not None:
                     break
@@ -1873,6 +1873,36 @@ cdef int _try_int64_nogil(parser_t *parser, int64_t col,
 
     return 0
 
+cdef _try_pylong(parser_t *parser, Py_ssize_t col,
+                 int64_t line_start, int64_t line_end,
+                 bint na_filter, kh_str_starts_t *na_hashset):
+    cdef:
+        int na_count = 0
+        Py_ssize_t lines
+        coliter_t it
+        const char *word = NULL
+        ndarray[object] result
+        object NA = na_values[np.object_]
+
+    lines = line_end - line_start
+    result = np.empty(lines, dtype=object)
+    coliter_setup(&it, parser, col, line_start)
+
+    for i in range(lines):
+        COLITER_NEXT(it, word)
+        if na_filter and kh_get_str_starts_item(na_hashset, word):
+            # in the hash table
+            na_count += 1
+            result[i] = NA
+            continue
+
+        py_int = PyLong_FromString(word, NULL, 10)
+        if py_int is None:
+            raise ValueError("Invalid integer ", word)
+        result[i] = py_int
+
+    return result, na_count
+
 
 # -> tuple[ndarray[bool], int]
 cdef _try_bool_flex(parser_t *parser, int64_t col,

From 1ff5245a6016bc15e247c93c539f9f5f413c5b84 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=81lvaro=20Kothe?= <kothe65@gmail.com>
Date: Sat, 4 Oct 2025 18:55:32 -0300
Subject: [PATCH 2/6] test(parser): update expected results for overflow tests

---
 pandas/tests/io/parser/common/test_ints.py | 31 ++++++++++++++--------
 1 file changed, 20 insertions(+), 11 deletions(-)

diff --git a/pandas/tests/io/parser/common/test_ints.py b/pandas/tests/io/parser/common/test_ints.py
index 9322e8d54f5b8..73b8536f8964e 100644
--- a/pandas/tests/io/parser/common/test_ints.py
+++ b/pandas/tests/io/parser/common/test_ints.py
@@ -144,17 +144,22 @@ def test_int64_overflow(all_parsers, conv, request):
         if parser.engine == "pyarrow":
             mark = pytest.mark.xfail(reason="parses to float64")
             request.applymarker(mark)
+        elif parser.engine == "python":
+            mark = pytest.mark.xfail(
+                reason="TODO: Python engine reads bigint as string"
+            )
+            request.applymarker(mark)
 
         result = parser.read_csv(StringIO(data))
         expected = DataFrame(
             [
-                "00013007854817840016671868",
-                "00013007854817840016749251",
-                "00013007854817840016754630",
-                "00013007854817840016781876",
-                "00013007854817840017028824",
-                "00013007854817840017963235",
-                "00013007854817840018860166",
+                13007854817840016671868,
+                13007854817840016749251,
+                13007854817840016754630,
+                13007854817840016781876,
+                13007854817840017028824,
+                13007854817840017963235,
+                13007854817840018860166,
             ],
             columns=["ID"],
         )
@@ -185,7 +190,7 @@ def test_int64_overflow(all_parsers, conv, request):
 )
 def test_int64_uint64_range(all_parsers, val):
     # These numbers fall right inside the int64-uint64
-    # range, so they should be parsed as string.
+    # range, so they should be parsed as integer.
     parser = all_parsers
     result = parser.read_csv(StringIO(str(val)), header=None)
 
@@ -197,13 +202,17 @@ def test_int64_uint64_range(all_parsers, val):
 @pytest.mark.parametrize(
     "val", [np.iinfo(np.uint64).max + 1, np.iinfo(np.int64).min - 1]
 )
-def test_outside_int64_uint64_range(all_parsers, val):
+def test_outside_int64_uint64_range(all_parsers, val, request):
     # These numbers fall just outside the int64-uint64
-    # range, so they should be parsed as string.
+    # range, so they should be parsed as object.
     parser = all_parsers
+    if parser.engine == "python":
+        mark = pytest.mark.xfail(reason="TODO: Python engine reads bigint as string")
+        request.applymarker(mark)
+
     result = parser.read_csv(StringIO(str(val)), header=None)
 
-    expected = DataFrame([str(val)])
+    expected = DataFrame([val])
     tm.assert_frame_equal(result, expected)
 
 

From 6cfc8b84e535e89bd8ed4758a519b5802aa3699f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=81lvaro=20Kothe?= <kothe65@gmail.com>
Date: Sat, 4 Oct 2025 19:01:18 -0300
Subject: [PATCH 3/6] docs(io): add entry in whatsnew

---
 doc/source/whatsnew/v3.0.0.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index 8c8a16af6bd34..ae4be412c056f 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -1079,6 +1079,7 @@ I/O
 - Bug in :meth:`read_csv` raising ``TypeError`` when ``index_col`` is specified and ``na_values`` is a dict containing the key ``None``. (:issue:`57547`)
 - Bug in :meth:`read_csv` raising ``TypeError`` when ``nrows`` and ``iterator`` are specified without specifying a ``chunksize``. (:issue:`59079`)
 - Bug in :meth:`read_csv` where the order of the ``na_values`` makes an inconsistency when ``na_values`` is a list non-string values. (:issue:`59303`)
+- Bug in :meth:`read_csv` with ``engine="c"`` reading big integers as strings. Now reads them as python integers. (:issue:`51295`)
 - Bug in :meth:`read_csv` with ``engine="pyarrow"`` and ``dtype="Int64"`` losing precision (:issue:`56136`)
 - Bug in :meth:`read_excel` raising ``ValueError`` when passing array of boolean values when ``dtype="boolean"``. (:issue:`58159`)
 - Bug in :meth:`read_html` where ``rowspan`` in header row causes incorrect conversion to ``DataFrame``. (:issue:`60210`)

From 3e1a991963568bf9bdedaba3577bef4c6cb66398 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=81lvaro=20Kothe?= <kothe65@gmail.com>
Date: Sun, 5 Oct 2025 09:27:28 -0300
Subject: [PATCH 4/6] test(parser): add test with overflow followed by str

---
 pandas/tests/io/parser/common/test_ints.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/pandas/tests/io/parser/common/test_ints.py b/pandas/tests/io/parser/common/test_ints.py
index 73b8536f8964e..7380026cd5d96 100644
--- a/pandas/tests/io/parser/common/test_ints.py
+++ b/pandas/tests/io/parser/common/test_ints.py
@@ -216,6 +216,19 @@ def test_outside_int64_uint64_range(all_parsers, val, request):
     tm.assert_frame_equal(result, expected)
 
 
+@skip_pyarrow  # CSV parse error: Empty CSV file or block
+@pytest.mark.parametrize(
+    "val", [np.iinfo(np.uint64).max + 1, np.iinfo(np.int64).min - 1]
+)
+def test_outside_int64_uint64_range_follow_str(all_parsers, val, request):
+    parser = all_parsers
+
+    result = parser.read_csv(StringIO(f"{val}\nabc"), header=None)
+
+    expected = DataFrame([str(val), "abc"])
+    tm.assert_frame_equal(result, expected)
+
+
 @xfail_pyarrow  # gets float64 dtype instead of object
 @pytest.mark.parametrize("exp_data", [[str(-1), str(2**63)], [str(2**63), str(-1)]])
 def test_numeric_range_too_wide(all_parsers, exp_data):

From 7366e644170f4269c907b66451f7ed5e68af555d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=81lvaro=20Kothe?= <kothe65@gmail.com>
Date: Sun, 5 Oct 2025 10:05:29 -0300
Subject: [PATCH 5/6] fix: handle string after overflow

---
 pandas/_libs/parsers.pyx | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx
index 20cc39f2760b6..ca87fce555f75 100644
--- a/pandas/_libs/parsers.pyx
+++ b/pandas/_libs/parsers.pyx
@@ -1082,8 +1082,13 @@ cdef class TextReader:
                         np.dtype("object"), i, start, end, 0,
                         0, na_hashset, na_fset)
                 except OverflowError:
-                    col_res, na_count = _try_pylong(self.parser, i, start,
-                                                    end, na_filter, na_hashset)
+                    try:
+                        col_res, na_count = _try_pylong(self.parser, i, start,
+                                                        end, na_filter, na_hashset)
+                    except ValueError:
+                        col_res, na_count = self._convert_with_dtype(
+                            np.dtype("object"), i, start, end, 0,
+                            0, na_hashset, na_fset)
 
                 if col_res is not None:
                     break

From e2083bdee4ab650d209986e929a98fd18e5f7aa7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=81lvaro=20Kothe?= <kothe65@gmail.com>
Date: Sun, 5 Oct 2025 10:08:38 -0300
Subject: [PATCH 6/6] chore: remove unused request

---
 pandas/tests/io/parser/common/test_ints.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/io/parser/common/test_ints.py b/pandas/tests/io/parser/common/test_ints.py
index 7380026cd5d96..87db540f6293a 100644
--- a/pandas/tests/io/parser/common/test_ints.py
+++ b/pandas/tests/io/parser/common/test_ints.py
@@ -220,7 +220,7 @@ def test_outside_int64_uint64_range(all_parsers, val, request):
 @pytest.mark.parametrize(
     "val", [np.iinfo(np.uint64).max + 1, np.iinfo(np.int64).min - 1]
 )
-def test_outside_int64_uint64_range_follow_str(all_parsers, val, request):
+def test_outside_int64_uint64_range_follow_str(all_parsers, val):
     parser = all_parsers
 
     result = parser.read_csv(StringIO(f"{val}\nabc"), header=None)