Skip to content

Commit

Permalink
feat[rust, python]: Support length=None for Expr.slice (#4603)
Browse files Browse the repository at this point in the history
  • Loading branch information
stinodego committed Aug 29, 2022
1 parent 8bc22e0 commit fbfc716
Show file tree
Hide file tree
Showing 11 changed files with 66 additions and 52 deletions.
14 changes: 9 additions & 5 deletions polars/polars-lazy/src/physical_plan/expressions/slice.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ use polars_core::prelude::*;
use polars_core::utils::{slice_offsets, CustomIterTools};
use polars_core::POOL;
use rayon::prelude::*;
use AnyValue::Null;

use crate::physical_plan::state::ExecutionState;
use crate::prelude::*;
Expand All @@ -18,7 +19,7 @@ pub struct SliceExpr {

fn extract_offset(offset: &Series) -> Result<i64> {
if offset.len() > 1 {
return Err(PolarsError::ComputeError(format!("Invalid argument to slice; expected an offset literal but got an Series of length {}", offset.len()).into()));
return Err(PolarsError::ComputeError(format!("Invalid argument to slice; expected an offset literal but got a Series of length {}", offset.len()).into()));
}
offset.get(0).extract::<i64>().ok_or_else(|| {
PolarsError::ComputeError(format!("could not get an offset from {:?}", offset).into())
Expand All @@ -27,11 +28,14 @@ fn extract_offset(offset: &Series) -> Result<i64> {

fn extract_length(length: &Series) -> Result<usize> {
if length.len() > 1 {
return Err(PolarsError::ComputeError(format!("Invalid argument to slice; expected a length literal but got an Series of length {}", length.len()).into()));
return Err(PolarsError::ComputeError(format!("Invalid argument to slice; expected a length literal but got a Series of length {}", length.len()).into()));
}
match length.get(0) {
Null => Ok(usize::MAX),
v => v.extract::<usize>().ok_or_else(|| {
PolarsError::ComputeError(format!("could not get a length from {:?}", length).into())
}),
}
length.get(0).extract::<usize>().ok_or_else(|| {
PolarsError::ComputeError(format!("could not get a length from {:?}", length).into())
})
}

fn extract_args(offset: &Series, length: &Series) -> Result<(i64, usize)> {
Expand Down
7 changes: 4 additions & 3 deletions py-polars/polars/internals/dataframe/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -2582,14 +2582,15 @@ def replace(self, column: str, new_col: pli.Series) -> None:

def slice(self: DF, offset: int, length: int | None = None) -> DF:
"""
Slice this DataFrame over the rows direction.
Get a slice of this DataFrame.
Parameters
----------
offset
Offset index.
Start index. Negative indexing is supported.
length
Length of the slice.
Length of the slice. If set to ``None``, all rows starting at the offset
will be selected.
Examples
--------
Expand Down
17 changes: 9 additions & 8 deletions py-polars/polars/internals/expr/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -1127,23 +1127,24 @@ def len(self) -> Expr:
"""
return self.count()

def slice(self, offset: int | Expr, length: int | Expr) -> Expr:
def slice(self, offset: int | Expr, length: int | Expr | None = None) -> Expr:
"""
Slice the Series.
Get a slice of this expression.
Parameters
----------
offset
Start index.
Start index. Negative indexing is supported.
length
Length of the slice.
Length of the slice. If set to ``None``, all rows starting at the offset
will be selected.
Examples
--------
>>> df = pl.DataFrame(
... {
... "a": [8, 9, 10],
... "b": [None, 4, 4],
... "a": [8, 9, 10, 11],
... "b": [None, 4, 4, 4],
... }
... )
>>> df.select(pl.all().slice(1, 2))
Expand All @@ -1159,9 +1160,9 @@ def slice(self, offset: int | Expr, length: int | Expr) -> Expr:
└─────┴─────┘
"""
if isinstance(offset, int):
if not isinstance(offset, Expr):
offset = pli.lit(offset)
if isinstance(length, int):
if not isinstance(length, Expr):
length = pli.lit(length)
return wrap_expr(self._pyexpr.slice(offset._pyexpr, length._pyexpr))

Expand Down
9 changes: 5 additions & 4 deletions py-polars/polars/internals/expr/list.py
Original file line number Diff line number Diff line change
Expand Up @@ -508,16 +508,17 @@ def shift(self, periods: int = 1) -> pli.Expr:
"""
return pli.wrap_expr(self._pyexpr.lst_shift(periods))

def slice(self, offset: int, length: int) -> pli.Expr:
def slice(self, offset: int, length: int | None = None) -> pli.Expr:
"""
Slice every sublist
Slice every sublist.
Parameters
----------
offset
Take the values from this index offset.
Start index. Negative indexing is supported.
length
The length of the slice to take.
Length of the slice. If set to ``None`` (default), the slice is taken to the
end of the list.
Examples
--------
Expand Down
16 changes: 9 additions & 7 deletions py-polars/polars/internals/expr/string.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

import polars.internals as pli
from polars.datatypes import DataType, Date, Datetime, Time, is_polars_dtype
from polars.utils import deprecated_alias

if TYPE_CHECKING:
from polars.internals.type_aliases import TransferEncoding
Expand Down Expand Up @@ -1036,22 +1037,23 @@ def replace_all(
self._pyexpr.str_replace_all(pattern._pyexpr, value._pyexpr, literal)
)

def slice(self, start: int, length: int | None = None) -> pli.Expr:
@deprecated_alias(start="offset")
def slice(self, offset: int, length: int | None = None) -> pli.Expr:
"""
Create subslices of the string values of a Utf8 Series.
Parameters
----------
start
Starting index of the slice (zero-indexed). Negative indexing
may be used.
offset
Start index. Negative indexing is supported.
length
Optional length of the slice. If None (default), the slice is taken to the
Length of the slice. If set to ``None`` (default), the slice is taken to the
end of the string.
Returns
-------
Series of Utf8 type
Expr
Series of dtype Utf8.
Examples
--------
Expand Down Expand Up @@ -1095,4 +1097,4 @@ def slice(self, start: int, length: int | None = None) -> pli.Expr:
└─────────────┴──────────┘
"""
return pli.wrap_expr(self._pyexpr.str_slice(start, length))
return pli.wrap_expr(self._pyexpr.str_slice(offset, length))
7 changes: 4 additions & 3 deletions py-polars/polars/internals/lazyframe/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1792,14 +1792,15 @@ def shift_and_fill(

def slice(self: LDF, offset: int, length: int | None = None) -> LDF:
"""
Slice the DataFrame.
Get a slice of this DataFrame.
Parameters
----------
offset
Start index.
Start index. Negative indexing is supported.
length
Length of the slice.
Length of the slice. If set to ``None``, all rows starting at the offset
will be selected.
Examples
--------
Expand Down
9 changes: 5 additions & 4 deletions py-polars/polars/internals/series/list.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,16 +198,17 @@ def shift(self, periods: int = 1) -> pli.Series:
"""

def slice(self, offset: int, length: int) -> pli.Series:
def slice(self, offset: int, length: int | None = None) -> pli.Series:
"""
Slice every sublist
Slice every sublist.
Parameters
----------
offset
Take the values from this index offset
Start index. Negative indexing is supported.
length
The length of the slice to take
Length of the slice. If set to ``None`` (default), the slice is taken to the
end of the list.
Examples
--------
Expand Down
8 changes: 4 additions & 4 deletions py-polars/polars/internals/series/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -1348,13 +1348,14 @@ def slice(self, offset: int, length: int | None = None) -> Series:
Parameters
----------
offset
Offset index.
Start index. Negative indexing is supported.
length
Length of the slice.
Length of the slice. If set to ``None``, all rows starting at the offset
will be selected.
Examples
--------
>>> s = pl.Series("a", [1, 2, 3])
>>> s = pl.Series("a", [1, 2, 3, 4])
>>> s.slice(1, 2)
shape: (2,)
Series: 'a' [i64]
Expand All @@ -1364,7 +1365,6 @@ def slice(self, offset: int, length: int | None = None) -> Series:
]
"""
return wrap_s(self._s.slice(offset, length))

def append(self, other: Series, append_chunks: bool = True) -> None:
"""
Expand Down
18 changes: 12 additions & 6 deletions py-polars/polars/internals/series/string.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import polars.internals as pli
from polars.datatypes import Date, Datetime, Time
from polars.internals.series.utils import expr_dispatch
from polars.utils import deprecated_alias

if TYPE_CHECKING:
from polars.internals.type_aliases import TransferEncoding
Expand Down Expand Up @@ -708,22 +709,23 @@ def to_lowercase(self) -> pli.Series:
def to_uppercase(self) -> pli.Series:
"""Modify the strings to their uppercase equivalent."""

def slice(self, start: int, length: int | None = None) -> pli.Series:
@deprecated_alias(start="offset")
def slice(self, offset: int, length: int | None = None) -> pli.Series:
"""
Create subslices of the string values of a Utf8 Series.
Parameters
----------
start
Starting index of the slice (zero-indexed). Negative indexing
may be used.
offset
Start index. Negative indexing is supported.
length
Optional length of the slice. If None (default), the slice is taken to the
Length of the slice. If set to ``None`` (default), the slice is taken to the
end of the string.
Returns
-------
Series of Utf8 type
Series
Series of dtype Utf8.
Examples
--------
Expand Down Expand Up @@ -751,3 +753,7 @@ def slice(self, start: int, length: int | None = None) -> pli.Series:
]
"""
s = pli.wrap_s(self._s)
return (
s.to_frame().select(pli.col(s.name).str.slice(offset, length)).to_series()
)
6 changes: 5 additions & 1 deletion py-polars/src/lazy/dsl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1310,7 +1310,11 @@ impl PyExpr {
self.inner.clone().arr().shift(periods).into()
}

fn lst_slice(&self, offset: i64, length: usize) -> Self {
fn lst_slice(&self, offset: i64, length: Option<usize>) -> Self {
let length = match length {
Some(i) => i,
None => usize::MAX,
};
self.inner.clone().arr().slice(offset, length).into()
}

Expand Down
7 changes: 0 additions & 7 deletions py-polars/src/series.rs
Original file line number Diff line number Diff line change
Expand Up @@ -480,13 +480,6 @@ impl PySeries {
self.series.n_chunks()
}

pub fn slice(&self, offset: i64, length: Option<usize>) -> Self {
let series = self
.series
.slice(offset, length.unwrap_or_else(|| self.series.len()));
series.into()
}

pub fn append(&mut self, other: &PySeries) -> PyResult<()> {
self.series
.append(&other.series)
Expand Down

0 comments on commit fbfc716

Please sign in to comment.