Skip to content

Commit

Permalink
additional negative indexing support, and frame-level "take_every" (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
alexander-beedie committed Jul 4, 2022
1 parent 585c10c commit 25b58e2
Show file tree
Hide file tree
Showing 6 changed files with 109 additions and 6 deletions.
1 change: 1 addition & 0 deletions py-polars/docs/source/reference/dataframe.rst
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,7 @@ Manipulation/ selection
DataFrame.slice
DataFrame.sort
DataFrame.tail
DataFrame.take_every
DataFrame.to_dummies
DataFrame.to_series
DataFrame.transpose
Expand Down
1 change: 1 addition & 0 deletions py-polars/docs/source/reference/lazyframe.rst
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ Manipulation/ selection
LazyFrame.slice
LazyFrame.sort
LazyFrame.tail
LazyFrame.take_every
LazyFrame.unique
LazyFrame.unnest
LazyFrame.with_column
Expand Down
29 changes: 29 additions & 0 deletions py-polars/polars/internals/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1882,6 +1882,8 @@ def to_series(self, index: int = 0) -> pli.Series:
]
"""
if index < 0:
index = len(self.columns) + index
return pli.wrap_s(self._df.select_at_idx(index))

def reverse(self: DF) -> DF:
Expand Down Expand Up @@ -1974,6 +1976,8 @@ def insert_at_idx(self, index: int, series: pli.Series) -> None:
└─────┴─────┴─────┘
"""
if index < 0:
index = len(self.columns) + index
self._df.insert_at_idx(index, series._s)

def filter(self: DF, predicate: pli.Expr) -> DF:
Expand Down Expand Up @@ -2275,6 +2279,8 @@ def replace_at_idx(self, index: int, series: pli.Series) -> None:
└───────┴─────┴─────┘
"""
if index < 0:
index = len(self.columns) + index
self._df.replace_at_idx(index, series._s)

@overload
Expand Down Expand Up @@ -4079,6 +4085,8 @@ def select_at_idx(self, idx: int) -> pli.Series:
]
"""
if idx < 0:
idx = len(self.columns) + idx
return pli.wrap_s(self._df.select_at_idx(idx))

def cleared(self: DF) -> DF:
Expand Down Expand Up @@ -5462,6 +5470,27 @@ def shrink_to_fit(self: DF, in_place: bool = False) -> DF | None:
df._df.shrink_to_fit()
return df

def take_every(self: DF, n: int) -> DF:
"""
Take every nth row in the DataFrame and return as a new DataFrame.
Examples
--------
>>> s = pl.DataFrame({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]})
>>> s.take_every(2)
shape: (2, 2)
┌─────┬─────┐
│ a ┆ b │
│ --- ┆ --- │
│ i64 ┆ i64 │
╞═════╪═════╡
│ 1 ┆ 5 │
├╌╌╌╌╌┼╌╌╌╌╌┤
│ 3 ┆ 7 │
└─────┴─────┘
"""
return self.select(pli.col("*").take_every(n))

def hash_rows(
self, k0: int = 0, k1: int = 1, k2: int = 2, k3: int = 3
) -> pli.Series:
Expand Down
21 changes: 21 additions & 0 deletions py-polars/polars/internals/lazy_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1782,6 +1782,27 @@ def with_row_count(self: LDF, name: str = "row_nr", offset: int = 0) -> LDF:
"""
return self._from_pyldf(self._ldf.with_row_count(name, offset))

def take_every(self: LDF, n: int) -> LDF:
"""
Take every nth row in the LazyFrame and return as a new LazyFrame.
Examples
--------
>>> s = pl.DataFrame({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]}).lazy()
>>> s.take_every(2).collect()
shape: (2, 2)
┌─────┬─────┐
│ a ┆ b │
│ --- ┆ --- │
│ i64 ┆ i64 │
╞═════╪═════╡
│ 1 ┆ 5 │
├╌╌╌╌╌┼╌╌╌╌╌┤
│ 3 ┆ 7 │
└─────┴─────┘
"""
return self.select(pli.col("*").take_every(n))

def fill_null(self: LDF, fill_value: int | str | pli.Expr) -> LDF:
"""
Fill missing values with a literal or Expr.
Expand Down
56 changes: 50 additions & 6 deletions py-polars/tests/test_df.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from hypothesis import given

import polars as pl
from polars.testing import assert_series_equal, columns, dataframes
from polars.testing import assert_frame_equal, assert_series_equal, columns, dataframes

if sys.version_info >= (3, 8):
from typing import Literal
Expand All @@ -32,8 +32,7 @@ def test_repr(df: pl.DataFrame) -> None:
# print(df)


# note: *temporarily* constraining dtypes this test until #3843 and a windows-specific
# fixfor a related date bug is merged (tblocking the PR to merge hypothesis code).
# note: temporarily constraining dtypes for this test (possible windows-specific date bug)
@given(df=dataframes(allowed_dtypes=[pl.Boolean, pl.UInt64, pl.Utf8]))
def test_null_count(df: pl.DataFrame) -> None:
null_count, ncols = df.null_count(), len(df.columns)
Expand Down Expand Up @@ -548,6 +547,54 @@ def test_assignment() -> None:
assert df["foo"].to_list() == [1, 9, 9]


def test_select_at_idx() -> None:
df = pl.DataFrame({"x": [1, 2, 3], "y": [2, 3, 4], "z": [3, 4, 5]})
for idx in range(len(df.columns)):
assert_series_equal(
df.select_at_idx(idx), # regular positive indexing
df.select_at_idx(idx - len(df.columns)), # equivalent negative index
)


def test_insert_at_idx() -> None:
df = pl.DataFrame({"z": [3, 4, 5]})
df.insert_at_idx(0, pl.Series("x", [1, 2, 3]))
df.insert_at_idx(-1, pl.Series("y", [2, 3, 4]))

expected_df = pl.DataFrame({"x": [1, 2, 3], "y": [2, 3, 4], "z": [3, 4, 5]})
assert_frame_equal(expected_df, df)


def test_replace_at_idx() -> None:
df = pl.DataFrame({"x": [1, 2, 3], "y": [2, 3, 4], "z": [3, 4, 5]})
df.replace_at_idx(0, pl.Series("a", [4, 5, 6]))
df.replace_at_idx(-2, pl.Series("b", [5, 6, 7]))
df.replace_at_idx(-1, pl.Series("c", [6, 7, 8]))

expected_df = pl.DataFrame({"a": [4, 5, 6], "b": [5, 6, 7], "c": [6, 7, 8]})
assert_frame_equal(expected_df, df)


def test_to_series() -> None:
df = pl.DataFrame({"x": [1, 2, 3], "y": [2, 3, 4], "z": [3, 4, 5]})

assert_series_equal(df.to_series(), df["x"])
assert_series_equal(df.to_series(0), df["x"])
assert_series_equal(df.to_series(-3), df["x"])

assert_series_equal(df.to_series(1), df["y"])
assert_series_equal(df.to_series(-2), df["y"])

assert_series_equal(df.to_series(2), df["z"])
assert_series_equal(df.to_series(-1), df["z"])


def test_take_every() -> None:
df = pl.DataFrame({"a": [1, 2, 3, 4], "b": ["w", "x", "y", "z"]})
expected_df = pl.DataFrame({"a": [1, 3], "b": ["w", "y"]})
assert_frame_equal(expected_df, df.take_every(2))


def test_slice() -> None:
df = pl.DataFrame({"a": [2, 1, 3], "b": ["a", "b", "c"]})
expected = pl.DataFrame({"a": [1, 3], "b": ["b", "c"]})
Expand Down Expand Up @@ -1219,9 +1266,6 @@ def test_lazy_functions() -> None:
expected = 3
assert np.isclose(out.select_at_idx(9), expected)
assert np.isclose(pl.last(df["b"]), expected)
expected = 3
assert np.isclose(out.select_at_idx(9), expected)
assert np.isclose(pl.last(df["b"]), expected)


def test_multiple_column_sort() -> None:
Expand Down
7 changes: 7 additions & 0 deletions py-polars/tests/test_lazy.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

import polars as pl
from polars import col, lit, map_binary, when
from polars.testing import assert_frame_equal


def test_lazy() -> None:
Expand Down Expand Up @@ -50,6 +51,12 @@ def test_set_null() -> None:
assert s[2] is None


def test_take_every() -> None:
df = pl.DataFrame({"a": [1, 2, 3, 4], "b": ["w", "x", "y", "z"]}).lazy()
expected_df = pl.DataFrame({"a": [1, 3], "b": ["w", "y"]})
assert_frame_equal(expected_df, df.take_every(2).collect())


def test_agg() -> None:
df = pl.DataFrame({"a": [1, 2, 3], "b": [1.0, 2.0, 3.0]})
ldf = df.lazy().min()
Expand Down

0 comments on commit 25b58e2

Please sign in to comment.