Skip to content

Commit

Permalink
Some API alignment (missing funcs) between DataFrame, LazyFrame, …
Browse files Browse the repository at this point in the history
…and `Series` (#3791)
  • Loading branch information
alexander-beedie committed Jun 24, 2022
1 parent fb598e4 commit e3daafd
Show file tree
Hide file tree
Showing 13 changed files with 131 additions and 18 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,4 @@ venv/
*.iml
coverage.lcov
coverage.xml
.DS_Store
1 change: 1 addition & 0 deletions py-polars/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@ wheels/
!Cargo.lock
target/
venv/
.DS_Store
1 change: 1 addition & 0 deletions py-polars/docs/source/reference/dataframe.rst
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ Manipulation/ selection
DataFrame.select_at_idx
DataFrame.replace_at_idx
DataFrame.sort
DataFrame.reverse
DataFrame.replace
DataFrame.slice
DataFrame.limit
Expand Down
1 change: 1 addition & 0 deletions py-polars/docs/source/reference/lazyframe.rst
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ Manipulation/ selection
:toctree: api/

LazyFrame.with_row_count
LazyFrame.clone
LazyFrame.inspect
LazyFrame.filter
LazyFrame.select
Expand Down
2 changes: 2 additions & 0 deletions py-polars/docs/source/reference/series.rst
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ Descriptive stats
Series.chunk_lengths
Series.n_chunks
Series.null_count
Series.is_empty
Series.is_null
Series.is_not_null
Series.is_finite
Expand Down Expand Up @@ -160,6 +161,7 @@ Manipulation/ selection
Series.take_every
Series.sort
Series.argsort
Series.reverse
Series.take
Series.shrink_to_fit
Series.explode
Expand Down
34 changes: 30 additions & 4 deletions py-polars/polars/internals/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1729,7 +1729,7 @@ def __getitem__(
if isinstance(item, slice):
# special case df[::-1]
if item.start is None and item.stop is None and item.step == -1:
return self.select(pli.col("*").reverse())
return self.reverse()

if getattr(item, "end", False):
raise ValueError("A slice with steps larger than 1 is not supported.")
Expand Down Expand Up @@ -1883,6 +1883,32 @@ def to_series(self, index: int = 0) -> "pli.Series":
"""
return pli.wrap_s(self._df.select_at_idx(index))

def reverse(self: DF) -> DF:
"""
Reverse the DataFrame.
>>> df = pl.DataFrame(
... {
... "key": ["a", "b", "c"],
... "val": [1, 2, 3],
... }
... )
>>> df.reverse()
shape: (3, 2)
┌─────┬─────┐
│ key ┆ val │
│ --- ┆ --- │
│ str ┆ i64 │
╞═════╪═════╡
│ c ┆ 3 │
├╌╌╌╌╌┼╌╌╌╌╌┤
│ b ┆ 2 │
├╌╌╌╌╌┼╌╌╌╌╌┤
│ a ┆ 1 │
└─────┴─────┘
"""
return self.select(pli.col("*").reverse())

def rename(self: DF, mapping: Dict[str, str]) -> DF:
"""
Rename column names.
Expand Down Expand Up @@ -2443,7 +2469,7 @@ def replace(self, column: str, new_col: "pli.Series") -> None:
"""
self._df.replace(column, new_col.inner())

def slice(self: DF, offset: int, length: int) -> DF:
def slice(self: DF, offset: int, length: Optional[int] = None) -> DF:
"""
Slice this DataFrame over the rows direction.
Expand Down Expand Up @@ -2477,7 +2503,7 @@ def slice(self: DF, offset: int, length: int) -> DF:
└─────┴─────┴─────┘
"""
if length < 0:
if (length is not None) and length < 0:
length = self.height - offset + length
return self._from_pydf(self._df.slice(offset, length))

Expand Down Expand Up @@ -4044,7 +4070,7 @@ def select_at_idx(self, idx: int) -> "pli.Series":

def clone(self: DF) -> DF:
"""
Very cheap deep clone.
Cheap deepcopy/clone.
"""
return self._from_pydf(self._df.clone())

Expand Down
14 changes: 13 additions & 1 deletion py-polars/polars/internals/lazy_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -756,6 +756,18 @@ def cache(self: LDF) -> LDF:
"""
return self._from_pyldf(self._ldf.cache())

def clone(self: LDF) -> LDF:
"""
Cheap deepcopy/clone.
"""
return self._from_pyldf(self._ldf.clone())

def __copy__(self: LDF) -> LDF:
return self.clone()

def __deepcopy__(self: LDF, memodict={}) -> LDF: # type: ignore
return self.clone()

def filter(self: LDF, predicate: Union["pli.Expr", str]) -> LDF:
"""
Filter the rows in the DataFrame based on a predicate expression.
Expand Down Expand Up @@ -1645,7 +1657,7 @@ def shift_and_fill(
fill_value = pli.lit(fill_value)
return self._from_pyldf(self._ldf.shift_and_fill(periods, fill_value._pyexpr))

def slice(self: LDF, offset: int, length: int) -> LDF:
def slice(self: LDF, offset: int, length: Optional[int] = None) -> LDF:
"""
Slice the DataFrame.
Expand Down
32 changes: 31 additions & 1 deletion py-polars/polars/internals/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -1225,7 +1225,7 @@ def limit(self, num_elements: int = 10) -> "Series":
"""
return wrap_s(self._s.limit(num_elements))

def slice(self, offset: int, length: int) -> "Series":
def slice(self, offset: int, length: Optional[int] = None) -> "Series":
"""
Get a slice of this Series.
Expand Down Expand Up @@ -1561,6 +1561,19 @@ def has_validity(self) -> bool:
"""
return self._s.has_validity()

def is_empty(self) -> bool:
"""
Check if the Series is empty.
Examples
--------
>>> s = pl.Series("a", [], dtype=pl.Float32)
>>> s.is_empty()
True
"""
return self.len() == 0

def is_null(self) -> "Series":
"""
Get mask of null values.
Expand Down Expand Up @@ -2008,6 +2021,23 @@ def rechunk(self, in_place: bool = False) -> Optional["Series"]:
else:
return wrap_s(opt_s)

def reverse(self) -> "Series":
"""
Return Series in reverse order.
Examples
--------
>>> s = pl.Series("a", [1, 2, 3], dtype=pl.Int8)
shape: (3,)
Series: 'a' [i8]
[
3
2
1
]
"""
return wrap_s(self._s.reverse())

def is_numeric(self) -> bool:
"""
Check if this Series datatype is numeric.
Expand Down
6 changes: 4 additions & 2 deletions py-polars/src/dataframe.rs
Original file line number Diff line number Diff line change
Expand Up @@ -992,8 +992,10 @@ impl PyDataFrame {
Ok(())
}

pub fn slice(&self, offset: usize, length: usize) -> Self {
let df = self.df.slice(offset as i64, length);
pub fn slice(&self, offset: usize, length: Option<usize>) -> Self {
let df = self
.df
.slice(offset as i64, length.unwrap_or(self.df.height()));
df.into()
}

Expand Down
4 changes: 2 additions & 2 deletions py-polars/src/lazy/dataframe.rs
Original file line number Diff line number Diff line change
Expand Up @@ -652,9 +652,9 @@ impl PyLazyFrame {
.into()
}

pub fn slice(&self, offset: i64, len: IdxSize) -> Self {
pub fn slice(&self, offset: i64, len: Option<IdxSize>) -> Self {
let ldf = self.ldf.clone();
ldf.slice(offset, len).into()
ldf.slice(offset, len.unwrap_or(IdxSize::MAX)).into()
}

pub fn tail(&self, n: IdxSize) -> Self {
Expand Down
12 changes: 10 additions & 2 deletions py-polars/src/series.rs
Original file line number Diff line number Diff line change
Expand Up @@ -412,6 +412,10 @@ impl PySeries {
self.series.cumprod(reverse).into()
}

pub fn reverse(&self) -> Self {
self.series.reverse().into()
}

pub fn chunk_lengths(&self) -> Vec<usize> {
self.series.chunk_lengths().collect()
}
Expand Down Expand Up @@ -472,8 +476,10 @@ impl PySeries {
series.into()
}

pub fn slice(&self, offset: i64, length: usize) -> Self {
let series = self.series.slice(offset, length);
pub fn slice(&self, offset: i64, length: Option<usize>) -> Self {
let series = self
.series
.slice(offset, length.unwrap_or(self.series.len()));
series.into()
}

Expand Down Expand Up @@ -1413,9 +1419,11 @@ impl PySeries {
let out = self.series.reshape(&dims).map_err(PyPolarsErr::from)?;
Ok(out.into())
}

pub fn shuffle(&self, seed: u64) -> Self {
self.series.shuffle(seed).into()
}

pub fn extend_constant(&self, value: Wrap<AnyValue>, n: usize) -> PyResult<Self> {
let value = value.0;
let out = self
Expand Down
10 changes: 8 additions & 2 deletions py-polars/tests/test_df.py
Original file line number Diff line number Diff line change
Expand Up @@ -512,8 +512,13 @@ def test_assignment() -> None:

def test_slice() -> None:
df = pl.DataFrame({"a": [2, 1, 3], "b": ["a", "b", "c"]})
df = df.slice(1, 2)
assert df.frame_equal(pl.DataFrame({"a": [1, 3], "b": ["b", "c"]}))
expected = pl.DataFrame({"a": [1, 3], "b": ["b", "c"]})
for slice_params in (
[1, 10], # slice > len(df)
[1, 2], # slice == len(df)
[1], # optional len
):
assert df.slice(*slice_params).frame_equal(expected)


def test_null_count() -> None:
Expand Down Expand Up @@ -1289,6 +1294,7 @@ def test_to_html(df: pl.DataFrame) -> None:
def test_rows() -> None:
df = pl.DataFrame({"a": [1, 2], "b": [1, 2]})
assert df.rows() == [(1, 1), (2, 2)]
assert df.reverse().rows() == [(2, 2), (1, 1)]


def test_rename(df: pl.DataFrame) -> None:
Expand Down
31 changes: 27 additions & 4 deletions py-polars/tests/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,16 +233,23 @@ def test_append_extend() -> None:

def test_various() -> None:
a = pl.Series("a", [1, 2])

assert a.is_null().sum() == 0
assert a.name == "a"

a.rename("b", in_place=True)
assert a.name == "b"
assert a.len() == 2
assert len(a) == 2
b = a.slice(1, 1)
assert b.len() == 1
assert b.series_equal(pl.Series("b", [2]))

for b in (
a.slice(1, 10),
a.slice(1, 1),
a.slice(1, None),
a.slice(1),
):
assert b.len() == 1
assert b.series_equal(pl.Series("b", [2]))

a.append(b)
assert a.series_equal(pl.Series("b", [1, 2, 2]))

Expand All @@ -259,6 +266,7 @@ def test_various() -> None:

assert a.take([2, 3]).series_equal(pl.Series("a", [1, 4]))
assert a.is_numeric()

a = pl.Series("bool", [True, False])
assert not a.is_numeric()

Expand Down Expand Up @@ -624,12 +632,19 @@ def test_iter() -> None:
def test_empty() -> None:
a = pl.Series(dtype=pl.Int8)
assert a.dtype == pl.Int8
assert a.is_empty()

a = pl.Series()
assert a.dtype == pl.Float32
assert a.is_empty()

a = pl.Series("name", [])
assert a.dtype == pl.Float32
assert a.is_empty()

a = pl.Series(values=(), dtype=pl.Int8)
assert a.dtype == pl.Int8
assert a.is_empty()


def test_describe() -> None:
Expand Down Expand Up @@ -1583,3 +1598,11 @@ def test_drop_nan_ignore_null_3525() -> None:
3.0,
4.0,
]


def test_reverse() -> None:
s = pl.Series("values", [1, 2, 3, 4, 5])
assert s.reverse().to_list() == [5, 4, 3, 2, 1]

s = pl.Series("values", ["a", "b", None, "y", "x"])
assert s.reverse().to_list() == ["x", "y", None, "b", "a"]

0 comments on commit e3daafd

Please sign in to comment.