Quiet an unnecessary warning (tests), and minor optimisation for slic…

…es with negative stride (#3913)
pola-rs · Jul 6, 2022 · b1f2dc5 · b1f2dc5
1 parent 004958f
commit b1f2dc5
Show file tree

Hide file tree

Showing 3 changed files with 55 additions and 35 deletions.
diff --git a/py-polars/polars/internals/functions.py b/py-polars/polars/internals/functions.py
@@ -293,12 +293,12 @@ def __init__(self, obj: FrameOrSeries):
         self.obj = obj
 
     @staticmethod
-    def _as_original(lazy: "pli.LazyFrame", obj: FrameOrSeries) -> FrameOrSeries:
+    def _as_original(lazy: "pli.LazyFrame", original: FrameOrSeries) -> FrameOrSeries:
         """
         Return lazy variant back to its original type.
         """
         frame = lazy.collect()
-        return frame if isinstance(obj, pli.DataFrame) else frame.to_series()
+        return frame if isinstance(original, pli.DataFrame) else frame.to_series()
 
     @staticmethod
     def _lazify(obj: FrameOrSeries) -> "pli.LazyFrame":
@@ -311,52 +311,66 @@ def _slice_positive(self, obj: "pli.LazyFrame") -> "pli.LazyFrame":
         """
         Logic for slices with positive stride.
         """
+        # note: at this point stride is guaranteed to be > 1
         return obj.slice(self.start, self.slice_length).take_every(self.stride)
 
     def _slice_negative(self, obj: "pli.LazyFrame") -> "pli.LazyFrame":
         """
         Logic for slices with negative stride.
         """
+        # apply slice before reversing (more efficient)
         stride = abs(self.stride)
         lazyslice = obj.slice(self.stop + 1, self.slice_length)
+
+        # potential early-exit if single row
         if self.slice_length == 1:
             return lazyslice
         else:
+            # reverse frame, applying 'take_every' if stride > 1
             lazyslice = lazyslice.reverse()
             return lazyslice.take_every(stride) if (stride > 1) else lazyslice
 
     def _slice_setup(self, s: slice) -> None:
         """
         Normalise slice bounds, identify unbounded and/or zero-length slices.
         """
+        # can normalise slice indices as we know object size
         obj_len = len(self.obj)
         start, stop, stride = slice(s.start, s.stop, s.step).indices(obj_len)
+
+        # check if slice is actually unbounded
         if stride >= 1:
-            self.is_unbounded = start <= 0 and stop >= obj_len
+            self.is_unbounded = (start <= 0) and (stop >= obj_len)
         else:
-            self.is_unbounded = stop is None and (
-                start is None or (start >= obj_len - 1)
-            )
+            self.is_unbounded = (stop == -1) and (start >= obj_len - 1)
+
         self._positive_indices = start >= 0 and stop >= 0
-        self.slice_length = (
-            0
-            if self.obj.is_empty()
-            or (
-                (start == stop)
-                or (stride > 0 and start > stop)
-                or (stride < 0 and start < stop)
+
+        # determine slice length
+        if self.obj.is_empty():
+            self.slice_length = 0
+        elif self.is_unbounded:
+            self.slice_length = obj_len
+        else:
+            self.slice_length = (
+                0
+                if (
+                    (start == stop)
+                    or (stride > 0 and start > stop)
+                    or (stride < 0 and start < stop)
+                )
+                else abs(stop - start)
             )
-            else abs(stop - start)
-        )
         self.start, self.stop, self.stride = start, stop, stride
 
     def apply(self, s: slice) -> FrameOrSeries:
         """
         Apply a slice operation, taking advantage of any potential fast paths.
         """
+        # normalise slice
         self._slice_setup(s)
 
-        # check for fast-paths / early-exit
+        # check for fast-paths / single-operation calls
         if self.slice_length == 0:
             return self.obj.cleared()
 
@@ -365,11 +379,12 @@ def apply(self, s: slice) -> FrameOrSeries:
 
         elif self._positive_indices and self.stride == 1:
             return self.obj.slice(self.start, self.slice_length)
-
-        lazyobj = self._lazify(self.obj)
-        sliced = (
-            self._slice_positive(lazyobj)
-            if self.stride > 0
-            else self._slice_negative(lazyobj)
-        )
-        return self._as_original(sliced, self.obj)
+        else:
+            # multi-operation call; make lazy
+            lazyobj = self._lazify(self.obj)
+            sliced = (
+                self._slice_positive(lazyobj)
+                if self.stride > 0
+                else self._slice_negative(lazyobj)
+            )
+            return self._as_original(sliced, self.obj)
diff --git a/py-polars/polars/testing.py b/py-polars/polars/testing.py
@@ -1,14 +1,15 @@
 from __future__ import annotations
 
 import random
+import warnings
 from dataclasses import dataclass
 from datetime import datetime
 from functools import reduce
 from typing import Any, Callable, Sequence
 
 try:
     from hypothesis import settings
-    from hypothesis.errors import InvalidArgument
+    from hypothesis.errors import InvalidArgument, NonInteractiveExampleWarning
     from hypothesis.strategies import (
         SearchStrategy,
         booleans,
@@ -380,7 +381,7 @@ def __post_init__(self) -> None:
                 self.null_probability < 0 or self.null_probability > 1
             ):
                 raise InvalidArgument(
-                    f"null_probability should be between 0.0 and 1.0 or None; found {self.null_probability}"
+                    f"null_probability should be between 0.0 and 1.0; found {self.null_probability}"
                 )
             if self.dtype is None and not self.strategy:
                 self.dtype = random.choice(strategy_dtypes)
@@ -392,10 +393,14 @@ def __post_init__(self) -> None:
                 else:
                     # given a custom strategy, but no explicit dtype. infer one
                     # from the first non-None value that the strategy produces.
-                    sample_value_iter = (self.strategy.example() for _ in range(100))  # type: ignore[union-attr]
-                    sample_value_type = type(
-                        next(e for e in sample_value_iter if e is not None)
-                    )
+                    with warnings.catch_warnings():
+                        # note: usually you should not call "example()" outside of an interactive shell, hence
+                        # the warning. however, here it is reasonable to do so, so we catch and ignore it
+                        warnings.simplefilter("ignore", NonInteractiveExampleWarning)
+                        sample_value_iter = (self.strategy.example() for _ in range(100))  # type: ignore[union-attr]
+                        sample_value_type = type(
+                            next(e for e in sample_value_iter if e is not None)
+                        )
                     if sample_value_type is not None:
                         self.dtype = py_type_to_dtype(sample_value_type)
                     else:

diff --git a/py-polars/tests_parametric/test_testing.py b/py-polars/tests_parametric/test_testing.py
@@ -21,21 +21,21 @@ def test_strategy_classes(df: pl.DataFrame, lf: pl.LazyFrame, srs: pl.Series) ->
 
 @given(
     df1=dataframes(cols=5, size=5),
-    df2=dataframes(min_cols=10, max_cols=20, min_size=5, max_size=25),
+    df2=dataframes(min_cols=2, max_cols=5, min_size=3, max_size=8),
     s1=series(size=5),
-    s2=series(min_size=5, max_size=25, name="col"),
+    s2=series(min_size=3, max_size=8, name="col"),
 )
 def test_strategy_shape(
     df1: pl.DataFrame, df2: pl.DataFrame, s1: pl.Series, s2: pl.Series
 ) -> None:
     assert df1.shape == (5, 5)
     assert df1.columns == ["col0", "col1", "col2", "col3", "col4"]
 
-    assert 10 <= len(df2.columns) <= 20
-    assert 5 <= len(df2) <= 25
+    assert 2 <= len(df2.columns) <= 5
+    assert 3 <= len(df2) <= 8
 
     assert s1.len() == 5
-    assert 5 <= s2.len() <= 25
+    assert 3 <= s2.len() <= 8
     assert s1.name == ""
     assert s2.name == "col"