Skip to content

Commit

Permalink
feat(python): add iterrrows (#5945)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Dec 30, 2022
1 parent 553069e commit 75b8c01
Show file tree
Hide file tree
Showing 4 changed files with 68 additions and 43 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ Manipulation/selection
DataFrame.insert_at_idx
DataFrame.interpolate
DataFrame.item
DataFrame.iterrows
DataFrame.join
DataFrame.join_asof
DataFrame.limit
Expand Down
10 changes: 10 additions & 0 deletions py-polars/polars/internals/dataframe/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -6616,6 +6616,16 @@ def rows(self) -> list[tuple[Any, ...]]:
"""
return self._df.row_tuples()

def iterrows(self) -> Iterator[tuple[Any, ...]]:
"""
Returns an iterator over the rows in the DataFrame.
This is very expensive and should not be used
in any performance critical code.
"""
for i in range(self.height):
yield self.row(i)

def shrink_to_fit(self: DF, in_place: bool = False) -> DF:
"""
Shrink DataFrame memory usage.
Expand Down
43 changes: 0 additions & 43 deletions py-polars/tests/unit/test_df.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
import polars as pl
from polars.datatypes import DTYPE_TEMPORAL_UNITS
from polars.dependencies import zoneinfo
from polars.exceptions import NoRowsReturned, TooManyRowsReturned
from polars.internals.construction import iterable_to_pydf
from polars.testing import assert_frame_equal, assert_series_equal
from polars.testing.parametric import columns
Expand Down Expand Up @@ -893,42 +892,6 @@ def test_df_fold() -> None:
assert df_width_one.fold(lambda s1, s2: s1).series_equal(df["a"])


def test_row_tuple() -> None:
df = pl.DataFrame({"a": ["foo", "bar", "2"], "b": [1, 2, 3], "c": [1.0, 2.0, 3.0]})

# return row by index
assert df.row(0) == ("foo", 1, 1.0)
assert df.row(1) == ("bar", 2, 2.0)
assert df.row(-1) == ("2", 3, 3.0)

# return row by predicate
assert df.row(by_predicate=pl.col("a") == "bar") == ("bar", 2, 2.0)
assert df.row(by_predicate=pl.col("b").is_in([2, 4, 6])) == ("bar", 2, 2.0)

# expected error conditions
with pytest.raises(TooManyRowsReturned):
df.row(by_predicate=pl.col("b").is_in([1, 3, 5]))

with pytest.raises(NoRowsReturned):
df.row(by_predicate=pl.col("a") == "???")

# cannot set both 'index' and 'by_predicate'
with pytest.raises(ValueError):
df.row(0, by_predicate=pl.col("a") == "bar")

# must call 'by_predicate' by keyword
with pytest.raises(TypeError):
df.row(None, pl.col("a") == "bar") # type: ignore[misc]

# cannot pass predicate into 'index'
with pytest.raises(TypeError):
df.row(pl.col("a") == "bar") # type: ignore[arg-type]

# at least one of 'index' and 'by_predicate' must be set
with pytest.raises(ValueError):
df.row()


def test_df_apply() -> None:
df = pl.DataFrame({"a": ["foo", "bar", "2"], "b": [1, 2, 3], "c": [1.0, 2.0, 3.0]})
out = df.apply(lambda x: len(x), None).to_series()
Expand Down Expand Up @@ -1188,12 +1151,6 @@ def test_to_html(df: pl.DataFrame) -> None:
assert "<table" in html


def test_rows() -> None:
df = pl.DataFrame({"a": [1, 2], "b": [1, 2]})
assert df.rows() == [(1, 1), (2, 2)]
assert df.reverse().rows() == [(2, 2), (1, 1)]


def test_rename(df: pl.DataFrame) -> None:
out = df.rename({"strings": "bars", "int": "foos"})
# check if wel can select these new columns
Expand Down
57 changes: 57 additions & 0 deletions py-polars/tests/unit/test_rows.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
import pytest

import polars as pl
from polars.exceptions import NoRowsReturned, TooManyRowsReturned


def test_iterrows() -> None:
df = pl.DataFrame({"a": [1, 2, 3], "b": [None, False, None]})

it = df.iterrows()
assert next(it) == (1, None)
assert next(it) == (2, False)
assert next(it) == (3, None)
with pytest.raises(StopIteration):
next(it)


def test_row_tuple() -> None:
df = pl.DataFrame({"a": ["foo", "bar", "2"], "b": [1, 2, 3], "c": [1.0, 2.0, 3.0]})

# return row by index
assert df.row(0) == ("foo", 1, 1.0)
assert df.row(1) == ("bar", 2, 2.0)
assert df.row(-1) == ("2", 3, 3.0)

# return row by predicate
assert df.row(by_predicate=pl.col("a") == "bar") == ("bar", 2, 2.0)
assert df.row(by_predicate=pl.col("b").is_in([2, 4, 6])) == ("bar", 2, 2.0)

# expected error conditions
with pytest.raises(TooManyRowsReturned):
df.row(by_predicate=pl.col("b").is_in([1, 3, 5]))

with pytest.raises(NoRowsReturned):
df.row(by_predicate=pl.col("a") == "???")

# cannot set both 'index' and 'by_predicate'
with pytest.raises(ValueError):
df.row(0, by_predicate=pl.col("a") == "bar")

# must call 'by_predicate' by keyword
with pytest.raises(TypeError):
df.row(None, pl.col("a") == "bar") # type: ignore[misc]

# cannot pass predicate into 'index'
with pytest.raises(TypeError):
df.row(pl.col("a") == "bar") # type: ignore[arg-type]

# at least one of 'index' and 'by_predicate' must be set
with pytest.raises(ValueError):
df.row()


def test_rows() -> None:
df = pl.DataFrame({"a": [1, 2], "b": [1, 2]})
assert df.rows() == [(1, 1), (2, 2)]
assert df.reverse().rows() == [(2, 2), (1, 1)]

0 comments on commit 75b8c01

Please sign in to comment.