BUG: Fix read_csv raising TypeError when iterator and nrows are speci…

…fied without chunksize (#59080) BUG: Fix read_csv raising TypeError when iterator and nrows are specified without a chunksize
pandas-dev · Jun 24, 2024 · dfaaa39 · dfaaa39
1 parent fe785cc
commit dfaaa39
Show file tree

Hide file tree

Showing 3 changed files with 30 additions and 1 deletion.
diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
@@ -554,6 +554,7 @@ I/O
 - Bug in :meth:`DataFrame.to_string` that raised ``StopIteration`` with nested DataFrames. (:issue:`16098`)
 - Bug in :meth:`HDFStore.get` was failing to save data of dtype datetime64[s] correctly (:issue:`59004`)
 - Bug in :meth:`read_csv` raising ``TypeError`` when ``index_col`` is specified and ``na_values`` is a dict containing the key ``None``. (:issue:`57547`)
+- Bug in :meth:`read_csv` raising ``TypeError`` when ``nrows`` and ``iterator`` are specified without specifying a ``chunksize``. (:issue:`59079`)
 - Bug in :meth:`read_stata` raising ``KeyError`` when input file is stored in big-endian format and contains strL data. (:issue:`58638`)
 
 Period

diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py
@@ -1534,7 +1534,10 @@ def get_chunk(self, size: int | None = None) -> DataFrame:
         if self.nrows is not None:
             if self._currow >= self.nrows:
                 raise StopIteration
-            size = min(size, self.nrows - self._currow)
+            if size is None:
+                size = self.nrows - self._currow
+            else:
+                size = min(size, self.nrows - self._currow)
         return self.read(nrows=size)
 
     def __enter__(self) -> Self:

diff --git a/pandas/tests/io/parser/common/test_iterator.py b/pandas/tests/io/parser/common/test_iterator.py
@@ -98,6 +98,31 @@ def test_iterator_stop_on_chunksize(all_parsers):
     tm.assert_frame_equal(concat(result), expected)
 
 
+def test_nrows_iterator_without_chunksize(all_parsers):
+    # GH 59079
+    parser = all_parsers
+    data = """A,B,C
+foo,1,2,3
+bar,4,5,6
+baz,7,8,9
+"""
+    if parser.engine == "pyarrow":
+        msg = "The 'iterator' option is not supported with the 'pyarrow' engine"
+        with pytest.raises(ValueError, match=msg):
+            parser.read_csv(StringIO(data), iterator=True, nrows=2)
+        return
+
+    with parser.read_csv(StringIO(data), iterator=True, nrows=2) as reader:
+        result = reader.get_chunk()
+
+    expected = DataFrame(
+        [[1, 2, 3], [4, 5, 6]],
+        index=["foo", "bar"],
+        columns=["A", "B", "C"],
+    )
+    tm.assert_frame_equal(result, expected)
+
+
 @pytest.mark.parametrize(
     "kwargs", [{"iterator": True, "chunksize": 1}, {"iterator": True}, {"chunksize": 1}]
 )