From 2e3b0b3822ec3bc93f7f8cff82f03c859d9c302e Mon Sep 17 00:00:00 2001
From: u7397058 <Judy.Xie@anu.edu.au>
Date: Sun, 26 Oct 2025 14:35:28 +1100
Subject: [PATCH 01/11] Fixing make_reader so that it skip rows at file level

---
 pandas/io/parsers/python_parser.py | 20 +++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)

diff --git a/pandas/io/parsers/python_parser.py b/pandas/io/parsers/python_parser.py
index dc7a21c859a33..e5b9375214516 100644
--- a/pandas/io/parsers/python_parser.py
+++ b/pandas/io/parsers/python_parser.py
@@ -218,6 +218,15 @@ class MyDialect(csv.Dialect):
 
             if sep is not None:
                 dia.delimiter = sep
+                # Skip rows at file level before csv.reader sees them
+                # prevents CSV parsing errors on lines that will be discarded
+                if self.skiprows is not None:
+                    while self.skipfunc(self.pos):
+                        self.pos += 1
+                        try:
+                            f.readline()
+                        except (StopIteration, AttributeError):
+                            break
             else:
                 # attempt to sniff the delimiter from the first valid line,
                 # i.e. no comment line and not in skiprows
@@ -907,7 +916,12 @@ def _next_line(self) -> list[Scalar]:
         else:
             while self.skipfunc(self.pos):
                 self.pos += 1
-                next(self.data)
+                try:
+                    next(self.data)
+                except csv.Error:
+                    # CSV parsing error on a skipped line is acceptable
+                    # The line is being discarded without using its content
+                    pass
 
             while True:
                 orig_line = self._next_iter_line(row_num=self.pos + 1)
@@ -926,7 +940,7 @@ def _next_line(self) -> list[Scalar]:
                         break
 
         # This was the first line of the file,
-        # which could contain the BOM at the
+        # which could contain the BOM at theo
         # beginning of it.
         if self.pos == 1:
             line = self._check_for_bom(line)
@@ -1494,7 +1508,7 @@ def __init__(self, f: ReadCsvBuffer[str], **kwds) -> None:
         self.infer_nrows = kwds.pop("infer_nrows")
         PythonParser.__init__(self, f, **kwds)
 
-    def _make_reader(self, f: IO[str] | ReadCsvBuffer[str]) -> FixedWidthReader:
+    def _emake_rader(self, f: IO[str] | ReadCsvBuffer[str]) -> FixedWidthReader:
         return FixedWidthReader(
             f,
             self.colspecs,

From ec55b63d823549744772328d42e678cb26124bf2 Mon Sep 17 00:00:00 2001
From: u7397058 <Judy.Xie@anu.edu.au>
Date: Sun, 26 Oct 2025 14:35:45 +1100
Subject: [PATCH 02/11] Added tests

---
 .../io/parser/test_python_parser_only.py      | 43 +++++++++++++++++++
 1 file changed, 43 insertions(+)

diff --git a/pandas/tests/io/parser/test_python_parser_only.py b/pandas/tests/io/parser/test_python_parser_only.py
index 0de65ab889be8..55c950552512d 100644
--- a/pandas/tests/io/parser/test_python_parser_only.py
+++ b/pandas/tests/io/parser/test_python_parser_only.py
@@ -599,3 +599,46 @@ def fixer(bad_line):
         )
 
     tm.assert_frame_equal(result, expected)
+
+
+def test_read_csv_leading_quote_skip(python_parser_only):
+    # GH 62739
+    tbl = """\
+    "
+a b
+1 3
+"""
+    parser = python_parser_only
+    result = parser.read_csv(
+        StringIO(tbl),
+        delimiter=" ",
+        skiprows=1,
+    )
+    expected = DataFrame({"a": [1], "b": [3]})
+    tm.assert_frame_equal(result, expected)
+
+
+def test_read_csv_unclosed_double_quote_in_data_still_errors(python_parser_only):
+    # GH 62739
+    tbl = """\
+comment line
+a b
+"
+1 3
+"""
+    parser = python_parser_only
+    with pytest.raises(ParserError, match="unexpected end of data"):
+        parser.read_csv(StringIO(tbl), delimiter=" ", skiprows=1)
+
+
+def test_read_csv_skiprows_zero(python_parser_only):
+    # GH 62739
+    tbl = """\
+"
+a b
+1 3
+"""
+    parser = python_parser_only
+    # don't skip anything
+    with pytest.raises(ParserError, match="unexpected end of data"):
+        parser.read_csv(StringIO(tbl), delimiter=" ", skiprows=0, engine="python")

From 82beb0d5c8157b66fe40294dfafb8f58809f5020 Mon Sep 17 00:00:00 2001
From: Georgina Scott <154103190+georginas05@users.noreply.github.com>
Date: Sun, 26 Oct 2025 15:21:10 +1100
Subject: [PATCH 03/11] Update v3.0.0.rst

---
 doc/source/whatsnew/v3.0.0.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index 44bc82008e718..cf6fd727c1317 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -1104,6 +1104,7 @@ I/O
 - Bug in :meth:`set_option` where setting the pandas option ``display.html.use_mathjax`` to ``False`` has no effect (:issue:`59884`)
 - Bug in :meth:`to_csv` where ``quotechar``` is not escaped when ``escapechar`` is not None (:issue:`61407`)
 - Bug in :meth:`to_excel` where :class:`MultiIndex` columns would be merged to a single row when ``merge_cells=False`` is passed (:issue:`60274`)
+- Bug in :meth: `python_parser` where :class: `MyDialect` did not appropriately skip a line when instructed, causing `EmptyDataError` (:issue: `62739`)
 
 Period
 ^^^^^^

From c6cf7f6b81cf0d4460fd78092194225ac90e0b66 Mon Sep 17 00:00:00 2001
From: Georgina Scott <154103190+georginas05@users.noreply.github.com>
Date: Sun, 26 Oct 2025 15:31:21 +1100
Subject: [PATCH 04/11] Update v3.0.0.rst minor changes

---
 doc/source/whatsnew/v3.0.0.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index cf6fd727c1317..b9db3b6f1d831 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -1104,7 +1104,7 @@ I/O
 - Bug in :meth:`set_option` where setting the pandas option ``display.html.use_mathjax`` to ``False`` has no effect (:issue:`59884`)
 - Bug in :meth:`to_csv` where ``quotechar``` is not escaped when ``escapechar`` is not None (:issue:`61407`)
 - Bug in :meth:`to_excel` where :class:`MultiIndex` columns would be merged to a single row when ``merge_cells=False`` is passed (:issue:`60274`)
-- Bug in :meth: `python_parser` where :class: `MyDialect` did not appropriately skip a line when instructed, causing `EmptyDataError` (:issue: `62739`)
+- Bug in :meth:`python_parser` where :class:`MyDialect` did not appropriately skip a line when instructed, causing Empty Data Error (:issue:`62739`)
 
 Period
 ^^^^^^

From 6a3e5ad7b066d4a4306afd998610ac5ef3c3b886 Mon Sep 17 00:00:00 2001
From: Georgina Scott <154103190+georginas05@users.noreply.github.com>
Date: Sun, 26 Oct 2025 15:39:26 +1100
Subject: [PATCH 05/11] Update v3.0.0.rst - alphabetical order

---
 doc/source/whatsnew/v3.0.0.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index b9db3b6f1d831..32633fbc2159e 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -1086,6 +1086,7 @@ I/O
 - Bug in :meth:`HDFStore.get` was failing to save data of dtype datetime64[s] correctly (:issue:`59004`)
 - Bug in :meth:`HDFStore.select` causing queries on categorical string columns to return unexpected results (:issue:`57608`)
 - Bug in :meth:`MultiIndex.factorize` incorrectly raising on length-0 indexes (:issue:`57517`)
+- Bug in :meth:`python_parser` where :class:`MyDialect` did not appropriately skip a line when instructed, causing Empty Data Error (:issue:`62739`)
 - Bug in :meth:`read_csv` causing segmentation fault when ``encoding_errors`` is not a string. (:issue:`59059`)
 - Bug in :meth:`read_csv` raising ``TypeError`` when ``index_col`` is specified and ``na_values`` is a dict containing the key ``None``. (:issue:`57547`)
 - Bug in :meth:`read_csv` raising ``TypeError`` when ``nrows`` and ``iterator`` are specified without specifying a ``chunksize``. (:issue:`59079`)
@@ -1104,7 +1105,6 @@ I/O
 - Bug in :meth:`set_option` where setting the pandas option ``display.html.use_mathjax`` to ``False`` has no effect (:issue:`59884`)
 - Bug in :meth:`to_csv` where ``quotechar``` is not escaped when ``escapechar`` is not None (:issue:`61407`)
 - Bug in :meth:`to_excel` where :class:`MultiIndex` columns would be merged to a single row when ``merge_cells=False`` is passed (:issue:`60274`)
-- Bug in :meth:`python_parser` where :class:`MyDialect` did not appropriately skip a line when instructed, causing Empty Data Error (:issue:`62739`)
 
 Period
 ^^^^^^

From 048bebaef3584ae58730394b10f52ef0ac811611 Mon Sep 17 00:00:00 2001
From: zephyrieal <Judy.Xie@anu.edu.au>
Date: Sun, 26 Oct 2025 18:24:31 +1100
Subject: [PATCH 06/11] Fixed accidental typo in code

---
 pandas/io/parsers/python_parser.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/io/parsers/python_parser.py b/pandas/io/parsers/python_parser.py
index e5b9375214516..1791b8a3d221d 100644
--- a/pandas/io/parsers/python_parser.py
+++ b/pandas/io/parsers/python_parser.py
@@ -940,7 +940,7 @@ def _next_line(self) -> list[Scalar]:
                         break
 
         # This was the first line of the file,
-        # which could contain the BOM at theo
+        # which could contain the BOM at the
         # beginning of it.
         if self.pos == 1:
             line = self._check_for_bom(line)
@@ -1508,7 +1508,7 @@ def __init__(self, f: ReadCsvBuffer[str], **kwds) -> None:
         self.infer_nrows = kwds.pop("infer_nrows")
         PythonParser.__init__(self, f, **kwds)
 
-    def _emake_rader(self, f: IO[str] | ReadCsvBuffer[str]) -> FixedWidthReader:
+    def _make_reader(self, f: IO[str] | ReadCsvBuffer[str]) -> FixedWidthReader:
         return FixedWidthReader(
             f,
             self.colspecs,

From 9f5046c0541f6233a75c4712e277de38aec79dc7 Mon Sep 17 00:00:00 2001
From: zephyrieal <Judy.Xie@anu.edu.au>
Date: Sun, 26 Oct 2025 18:26:09 +1100
Subject: [PATCH 07/11] removed residue code

---
 pandas/io/parsers/python_parser.py | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/pandas/io/parsers/python_parser.py b/pandas/io/parsers/python_parser.py
index 1791b8a3d221d..0a95f5e9bc9f6 100644
--- a/pandas/io/parsers/python_parser.py
+++ b/pandas/io/parsers/python_parser.py
@@ -916,12 +916,7 @@ def _next_line(self) -> list[Scalar]:
         else:
             while self.skipfunc(self.pos):
                 self.pos += 1
-                try:
-                    next(self.data)
-                except csv.Error:
-                    # CSV parsing error on a skipped line is acceptable
-                    # The line is being discarded without using its content
-                    pass
+                next(self.data)
 
             while True:
                 orig_line = self._next_iter_line(row_num=self.pos + 1)

From 2d222cff5530f7ed5575078069bfe56f3b42de45 Mon Sep 17 00:00:00 2001
From: zephyrieal <Judy.Xie@anu.edu.au>
Date: Mon, 27 Oct 2025 02:57:21 +1100
Subject: [PATCH 08/11] break if end of line is reached

---
 pandas/io/parsers/python_parser.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/pandas/io/parsers/python_parser.py b/pandas/io/parsers/python_parser.py
index 0a95f5e9bc9f6..b08e013d7acb8 100644
--- a/pandas/io/parsers/python_parser.py
+++ b/pandas/io/parsers/python_parser.py
@@ -222,11 +222,10 @@ class MyDialect(csv.Dialect):
                 # prevents CSV parsing errors on lines that will be discarded
                 if self.skiprows is not None:
                     while self.skipfunc(self.pos):
-                        self.pos += 1
-                        try:
-                            f.readline()
-                        except (StopIteration, AttributeError):
+                        line = f.readline()
+                        if not line:
                             break
+                        self.pos += 1
             else:
                 # attempt to sniff the delimiter from the first valid line,
                 # i.e. no comment line and not in skiprows

From 820fa85845de25dc5859e2c19be9793bd4c5f388 Mon Sep 17 00:00:00 2001
From: zephyrieal <Judy.Xie@anu.edu.au>
Date: Mon, 27 Oct 2025 02:57:59 +1100
Subject: [PATCH 09/11] removed typo

---
 pandas/tests/io/parser/test_python_parser_only.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pandas/tests/io/parser/test_python_parser_only.py b/pandas/tests/io/parser/test_python_parser_only.py
index 55c950552512d..ee6484709be63 100644
--- a/pandas/tests/io/parser/test_python_parser_only.py
+++ b/pandas/tests/io/parser/test_python_parser_only.py
@@ -621,7 +621,6 @@ def test_read_csv_leading_quote_skip(python_parser_only):
 def test_read_csv_unclosed_double_quote_in_data_still_errors(python_parser_only):
     # GH 62739
     tbl = """\
-comment line
 a b
 "
 1 3

From 79c0737df1e17cdfedd9eaef7a279b2b2b3a8131 Mon Sep 17 00:00:00 2001
From: Zephy <74450862+zephyrieal@users.noreply.github.com>
Date: Sun, 2 Nov 2025 15:41:22 +1100
Subject: [PATCH 10/11] Update v3.0.0.rst

---
 doc/source/whatsnew/v3.0.0.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index f44b63bd55563..f37a7c217ef7a 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -1106,7 +1106,7 @@ I/O
 - Bug in :meth:`HDFStore.get` was failing to save data of dtype datetime64[s] correctly (:issue:`59004`)
 - Bug in :meth:`HDFStore.select` causing queries on categorical string columns to return unexpected results (:issue:`57608`)
 - Bug in :meth:`MultiIndex.factorize` incorrectly raising on length-0 indexes (:issue:`57517`)
-- Bug in :meth:`python_parser` where :class:`MyDialect` did not appropriately skip a line when instructed, causing Empty Data Error (:issue:`62739`)
+- Bug in :func:`read_csv` where it did not appropriately skip a line when instructed, causing Empty Data Error (:issue:`62739`)
 - Bug in :meth:`read_csv` causing segmentation fault when ``encoding_errors`` is not a string. (:issue:`59059`)
 - Bug in :meth:`read_csv` for the ``c`` and ``python`` engines where parsing numbers with large exponents caused overflows. Now, numbers with large positive exponents are parsed as ``inf`` or ``-inf`` depending on the sign of the mantissa, while those with large negative exponents are parsed as ``0.0`` (:issue:`62617`, :issue:`38794`, :issue:`62740`)
 - Bug in :meth:`read_csv` raising ``TypeError`` when ``index_col`` is specified and ``na_values`` is a dict containing the key ``None``. (:issue:`57547`)

From 9deae3035dbf9b32231b3e682cecb3152651bce8 Mon Sep 17 00:00:00 2001
From: Zephy <74450862+zephyrieal@users.noreply.github.com>
Date: Sun, 2 Nov 2025 15:53:09 +1100
Subject: [PATCH 11/11] Update v3.0.0.rst - fix ordering

---
 doc/source/whatsnew/v3.0.0.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index 3e8c9e5c88ae2..d3b67af4b3061 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -1116,11 +1116,11 @@ I/O
 - Bug in :meth:`HDFStore.get` was failing to save data of dtype datetime64[s] correctly (:issue:`59004`)
 - Bug in :meth:`HDFStore.select` causing queries on categorical string columns to return unexpected results (:issue:`57608`)
 - Bug in :meth:`MultiIndex.factorize` incorrectly raising on length-0 indexes (:issue:`57517`)
-- Bug in :func:`read_csv` where it did not appropriately skip a line when instructed, causing Empty Data Error (:issue:`62739`)
 - Bug in :meth:`read_csv` causing segmentation fault when ``encoding_errors`` is not a string. (:issue:`59059`)
 - Bug in :meth:`read_csv` for the ``c`` and ``python`` engines where parsing numbers with large exponents caused overflows. Now, numbers with large positive exponents are parsed as ``inf`` or ``-inf`` depending on the sign of the mantissa, while those with large negative exponents are parsed as ``0.0`` (:issue:`62617`, :issue:`38794`, :issue:`62740`)
 - Bug in :meth:`read_csv` raising ``TypeError`` when ``index_col`` is specified and ``na_values`` is a dict containing the key ``None``. (:issue:`57547`)
 - Bug in :meth:`read_csv` raising ``TypeError`` when ``nrows`` and ``iterator`` are specified without specifying a ``chunksize``. (:issue:`59079`)
+- Bug in :meth:`read_csv` where it did not appropriately skip a line when instructed, causing Empty Data Error (:issue:`62739`)
 - Bug in :meth:`read_csv` where the order of the ``na_values`` makes an inconsistency when ``na_values`` is a list non-string values. (:issue:`59303`)
 - Bug in :meth:`read_csv` with ``c`` and ``python`` engines reading big integers as strings. Now reads them as python integers. (:issue:`51295`)
 - Bug in :meth:`read_csv` with ``engine="c"`` reading large float numbers with preceding integers as strings. Now reads them as floats. (:issue:`51295`)