tests[python]: more control over parametrically-generated floats (opt…

…ionally disallow +/-inf) (#5007)
pola-rs · Sep 27, 2022 · ca4996a · ca4996a
1 parent 9d8ff8e
commit ca4996a
Show file tree

Hide file tree

Showing 2 changed files with 36 additions and 5 deletions.
diff --git a/py-polars/polars/testing.py b/py-polars/polars/testing.py
@@ -5,6 +5,7 @@
 from dataclasses import dataclass
 from datetime import datetime, timedelta
 from functools import reduce
+from math import isfinite
 from typing import Any, Sequence
 
 try:
@@ -550,6 +551,7 @@ def series(
         max_size: int | None = MAX_DATA_SIZE,
         strategy: SearchStrategy[object] | None = None,
         null_probability: float = 0.0,
+        allow_infinities: bool = True,
         unique: bool = False,
         chunked: bool | None = None,
         allowed_dtypes: Sequence[PolarsDataType] | None = None,
@@ -566,22 +568,21 @@ def series(
         dtype : dtype, optional
             a valid polars DataType for the resulting series.
         size : int, optional
-            if set, will create a Series of exactly this size (and ignore min/max len
-            params).
+            if set, creates a Series of exactly this size (ignoring min/max params).
         min_size : int, optional
             if not passing an exact size, can set a minimum here (defaults to 0).
             no-op if `size` is set.
         max_size : int, optional
             if not passing an exact size, can set a maximum value here (defaults to
-            MAX_DATA_SIZE).
-            no-op if `size` is set.
-
+            MAX_DATA_SIZE). no-op if `size` is set.
         strategy : strategy, optional
             supports overriding the default strategy for the given dtype.
         null_probability : float, optional
             percentage chance (expressed between 0.0 => 1.0) that a generated value is
             None. this is applied independently of any None values generated by the
             underlying strategy.
+        allow_infinities : bool, optional
+            optionally disallow generation of +/-inf values for floating-point dtypes.
         unique : bool, optional
             indicate whether Series values should all be distinct.
         chunked : bool, optional
@@ -642,6 +643,10 @@ def draw_series(draw: DrawFn) -> pli.Series:
                 draw(sampled_from(selectable_dtypes)) if dtype is None else dtype
             )
             dtype_strategy = strategy or dtype_strategy_mapping[series_dtype]
+            if series_dtype in (Float32, Float64) and not allow_infinities:
+                dtype_strategy = dtype_strategy.filter(
+                    lambda x: not isinstance(x, float) or isfinite(x)
+                )
 
             # create/assign series size
             series_size = (
@@ -701,6 +706,7 @@ def dataframes(
         chunked: bool | None = None,
         include_cols: Sequence[column] | None = None,
         null_probability: float | dict[str, float] = 0.0,
+        allow_infinities: bool = True,
         allowed_dtypes: Sequence[PolarsDataType] | None = None,
         excluded_dtypes: Sequence[PolarsDataType] | None = None,
     ) -> SearchStrategy[pli.DataFrame | pli.LazyFrame]:
@@ -741,6 +747,8 @@ def dataframes(
             underlying strategy, and can be applied either on a per-column basis (if
             given as a ``{col:pct}`` dict), or globally. if null_probability is defined
             on a column, it takes precedence over the global value.
+        allow_infinities : bool, optional
+            optionally disallow generation of +/-inf values for floating-point dtypes.
         allowed_dtypes : {list,set}, optional
             when automatically generating data, allow only these dtypes.
         excluded_dtypes : {list,set}, optional
@@ -855,6 +863,7 @@ def draw_frames(draw: DrawFn) -> pli.DataFrame | pli.LazyFrame:
                             dtype=c.dtype,
                             size=series_size,
                             null_probability=(c.null_probability or 0.0),
+                            allow_infinities=allow_infinities,
                             strategy=c.strategy,
                             unique=c.unique,
                             chunked=(chunked is None and draw(booleans())),

diff --git a/py-polars/tests/parametric/test_testing.py b/py-polars/tests/parametric/test_testing.py
@@ -3,6 +3,8 @@
 # ------------------------------------------------
 from __future__ import annotations
 
+from typing import Any
+
 from hypothesis import given, settings
 from hypothesis.strategies import sampled_from
 
@@ -161,3 +163,23 @@ def test_chunking(
     assert s1.n_chunks() == 1
     if len(s2) > 1:
         assert s2.n_chunks() > 1
+
+
+@given(
+    df=dataframes(allowed_dtypes=[pl.Float32, pl.Float64], allow_infinities=False),
+    s=series(dtype=pl.Float64, allow_infinities=False),
+)
+def test_infinities(
+    df: pl.DataFrame,
+    s: pl.Series,
+) -> None:
+    from math import isfinite
+
+    def finite_float(value: Any) -> bool:
+        if isinstance(value, float):
+            return isfinite(value)
+        return False
+
+    assert all(finite_float(val) for val in s.to_list())
+    for col in df.columns:
+        assert all(finite_float(val) for val in df[col].to_list())