Skip to content

Commit

Permalink
Quiet an unnecessary warning (tests), and minor optimisation for slic…
Browse files Browse the repository at this point in the history
…es with negative stride (#3913)
  • Loading branch information
alexander-beedie committed Jul 6, 2022
1 parent 004958f commit b1f2dc5
Show file tree
Hide file tree
Showing 3 changed files with 55 additions and 35 deletions.
63 changes: 39 additions & 24 deletions py-polars/polars/internals/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -293,12 +293,12 @@ def __init__(self, obj: FrameOrSeries):
self.obj = obj

@staticmethod
def _as_original(lazy: "pli.LazyFrame", obj: FrameOrSeries) -> FrameOrSeries:
def _as_original(lazy: "pli.LazyFrame", original: FrameOrSeries) -> FrameOrSeries:
"""
Return lazy variant back to its original type.
"""
frame = lazy.collect()
return frame if isinstance(obj, pli.DataFrame) else frame.to_series()
return frame if isinstance(original, pli.DataFrame) else frame.to_series()

@staticmethod
def _lazify(obj: FrameOrSeries) -> "pli.LazyFrame":
Expand All @@ -311,52 +311,66 @@ def _slice_positive(self, obj: "pli.LazyFrame") -> "pli.LazyFrame":
"""
Logic for slices with positive stride.
"""
# note: at this point stride is guaranteed to be > 1
return obj.slice(self.start, self.slice_length).take_every(self.stride)

def _slice_negative(self, obj: "pli.LazyFrame") -> "pli.LazyFrame":
"""
Logic for slices with negative stride.
"""
# apply slice before reversing (more efficient)
stride = abs(self.stride)
lazyslice = obj.slice(self.stop + 1, self.slice_length)

# potential early-exit if single row
if self.slice_length == 1:
return lazyslice
else:
# reverse frame, applying 'take_every' if stride > 1
lazyslice = lazyslice.reverse()
return lazyslice.take_every(stride) if (stride > 1) else lazyslice

def _slice_setup(self, s: slice) -> None:
"""
Normalise slice bounds, identify unbounded and/or zero-length slices.
"""
# can normalise slice indices as we know object size
obj_len = len(self.obj)
start, stop, stride = slice(s.start, s.stop, s.step).indices(obj_len)

# check if slice is actually unbounded
if stride >= 1:
self.is_unbounded = start <= 0 and stop >= obj_len
self.is_unbounded = (start <= 0) and (stop >= obj_len)
else:
self.is_unbounded = stop is None and (
start is None or (start >= obj_len - 1)
)
self.is_unbounded = (stop == -1) and (start >= obj_len - 1)

self._positive_indices = start >= 0 and stop >= 0
self.slice_length = (
0
if self.obj.is_empty()
or (
(start == stop)
or (stride > 0 and start > stop)
or (stride < 0 and start < stop)

# determine slice length
if self.obj.is_empty():
self.slice_length = 0
elif self.is_unbounded:
self.slice_length = obj_len
else:
self.slice_length = (
0
if (
(start == stop)
or (stride > 0 and start > stop)
or (stride < 0 and start < stop)
)
else abs(stop - start)
)
else abs(stop - start)
)
self.start, self.stop, self.stride = start, stop, stride

def apply(self, s: slice) -> FrameOrSeries:
"""
Apply a slice operation, taking advantage of any potential fast paths.
"""
# normalise slice
self._slice_setup(s)

# check for fast-paths / early-exit
# check for fast-paths / single-operation calls
if self.slice_length == 0:
return self.obj.cleared()

Expand All @@ -365,11 +379,12 @@ def apply(self, s: slice) -> FrameOrSeries:

elif self._positive_indices and self.stride == 1:
return self.obj.slice(self.start, self.slice_length)

lazyobj = self._lazify(self.obj)
sliced = (
self._slice_positive(lazyobj)
if self.stride > 0
else self._slice_negative(lazyobj)
)
return self._as_original(sliced, self.obj)
else:
# multi-operation call; make lazy
lazyobj = self._lazify(self.obj)
sliced = (
self._slice_positive(lazyobj)
if self.stride > 0
else self._slice_negative(lazyobj)
)
return self._as_original(sliced, self.obj)
17 changes: 11 additions & 6 deletions py-polars/polars/testing.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
from __future__ import annotations

import random
import warnings
from dataclasses import dataclass
from datetime import datetime
from functools import reduce
from typing import Any, Callable, Sequence

try:
from hypothesis import settings
from hypothesis.errors import InvalidArgument
from hypothesis.errors import InvalidArgument, NonInteractiveExampleWarning
from hypothesis.strategies import (
SearchStrategy,
booleans,
Expand Down Expand Up @@ -380,7 +381,7 @@ def __post_init__(self) -> None:
self.null_probability < 0 or self.null_probability > 1
):
raise InvalidArgument(
f"null_probability should be between 0.0 and 1.0 or None; found {self.null_probability}"
f"null_probability should be between 0.0 and 1.0; found {self.null_probability}"
)
if self.dtype is None and not self.strategy:
self.dtype = random.choice(strategy_dtypes)
Expand All @@ -392,10 +393,14 @@ def __post_init__(self) -> None:
else:
# given a custom strategy, but no explicit dtype. infer one
# from the first non-None value that the strategy produces.
sample_value_iter = (self.strategy.example() for _ in range(100)) # type: ignore[union-attr]
sample_value_type = type(
next(e for e in sample_value_iter if e is not None)
)
with warnings.catch_warnings():
# note: usually you should not call "example()" outside of an interactive shell, hence
# the warning. however, here it is reasonable to do so, so we catch and ignore it
warnings.simplefilter("ignore", NonInteractiveExampleWarning)
sample_value_iter = (self.strategy.example() for _ in range(100)) # type: ignore[union-attr]
sample_value_type = type(
next(e for e in sample_value_iter if e is not None)
)
if sample_value_type is not None:
self.dtype = py_type_to_dtype(sample_value_type)
else:
Expand Down
10 changes: 5 additions & 5 deletions py-polars/tests_parametric/test_testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,21 +21,21 @@ def test_strategy_classes(df: pl.DataFrame, lf: pl.LazyFrame, srs: pl.Series) ->

@given(
df1=dataframes(cols=5, size=5),
df2=dataframes(min_cols=10, max_cols=20, min_size=5, max_size=25),
df2=dataframes(min_cols=2, max_cols=5, min_size=3, max_size=8),
s1=series(size=5),
s2=series(min_size=5, max_size=25, name="col"),
s2=series(min_size=3, max_size=8, name="col"),
)
def test_strategy_shape(
df1: pl.DataFrame, df2: pl.DataFrame, s1: pl.Series, s2: pl.Series
) -> None:
assert df1.shape == (5, 5)
assert df1.columns == ["col0", "col1", "col2", "col3", "col4"]

assert 10 <= len(df2.columns) <= 20
assert 5 <= len(df2) <= 25
assert 2 <= len(df2.columns) <= 5
assert 3 <= len(df2) <= 8

assert s1.len() == 5
assert 5 <= s2.len() <= 25
assert 3 <= s2.len() <= 8
assert s1.name == ""
assert s2.name == "col"

Expand Down

0 comments on commit b1f2dc5

Please sign in to comment.