Skip to content

Commit

Permalink
tests[python]: more control over parametrically-generated floats (opt…
Browse files Browse the repository at this point in the history
…ionally disallow +/-inf) (#5007)
  • Loading branch information
alexander-beedie committed Sep 27, 2022
1 parent 9d8ff8e commit ca4996a
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 5 deletions.
19 changes: 14 additions & 5 deletions py-polars/polars/testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from dataclasses import dataclass
from datetime import datetime, timedelta
from functools import reduce
from math import isfinite
from typing import Any, Sequence

try:
Expand Down Expand Up @@ -550,6 +551,7 @@ def series(
max_size: int | None = MAX_DATA_SIZE,
strategy: SearchStrategy[object] | None = None,
null_probability: float = 0.0,
allow_infinities: bool = True,
unique: bool = False,
chunked: bool | None = None,
allowed_dtypes: Sequence[PolarsDataType] | None = None,
Expand All @@ -566,22 +568,21 @@ def series(
dtype : dtype, optional
a valid polars DataType for the resulting series.
size : int, optional
if set, will create a Series of exactly this size (and ignore min/max len
params).
if set, creates a Series of exactly this size (ignoring min/max params).
min_size : int, optional
if not passing an exact size, can set a minimum here (defaults to 0).
no-op if `size` is set.
max_size : int, optional
if not passing an exact size, can set a maximum value here (defaults to
MAX_DATA_SIZE).
no-op if `size` is set.
MAX_DATA_SIZE). no-op if `size` is set.
strategy : strategy, optional
supports overriding the default strategy for the given dtype.
null_probability : float, optional
percentage chance (expressed between 0.0 => 1.0) that a generated value is
None. this is applied independently of any None values generated by the
underlying strategy.
allow_infinities : bool, optional
optionally disallow generation of +/-inf values for floating-point dtypes.
unique : bool, optional
indicate whether Series values should all be distinct.
chunked : bool, optional
Expand Down Expand Up @@ -642,6 +643,10 @@ def draw_series(draw: DrawFn) -> pli.Series:
draw(sampled_from(selectable_dtypes)) if dtype is None else dtype
)
dtype_strategy = strategy or dtype_strategy_mapping[series_dtype]
if series_dtype in (Float32, Float64) and not allow_infinities:
dtype_strategy = dtype_strategy.filter(
lambda x: not isinstance(x, float) or isfinite(x)
)

# create/assign series size
series_size = (
Expand Down Expand Up @@ -701,6 +706,7 @@ def dataframes(
chunked: bool | None = None,
include_cols: Sequence[column] | None = None,
null_probability: float | dict[str, float] = 0.0,
allow_infinities: bool = True,
allowed_dtypes: Sequence[PolarsDataType] | None = None,
excluded_dtypes: Sequence[PolarsDataType] | None = None,
) -> SearchStrategy[pli.DataFrame | pli.LazyFrame]:
Expand Down Expand Up @@ -741,6 +747,8 @@ def dataframes(
underlying strategy, and can be applied either on a per-column basis (if
given as a ``{col:pct}`` dict), or globally. if null_probability is defined
on a column, it takes precedence over the global value.
allow_infinities : bool, optional
optionally disallow generation of +/-inf values for floating-point dtypes.
allowed_dtypes : {list,set}, optional
when automatically generating data, allow only these dtypes.
excluded_dtypes : {list,set}, optional
Expand Down Expand Up @@ -855,6 +863,7 @@ def draw_frames(draw: DrawFn) -> pli.DataFrame | pli.LazyFrame:
dtype=c.dtype,
size=series_size,
null_probability=(c.null_probability or 0.0),
allow_infinities=allow_infinities,
strategy=c.strategy,
unique=c.unique,
chunked=(chunked is None and draw(booleans())),
Expand Down
22 changes: 22 additions & 0 deletions py-polars/tests/parametric/test_testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
# ------------------------------------------------
from __future__ import annotations

from typing import Any

from hypothesis import given, settings
from hypothesis.strategies import sampled_from

Expand Down Expand Up @@ -161,3 +163,23 @@ def test_chunking(
assert s1.n_chunks() == 1
if len(s2) > 1:
assert s2.n_chunks() > 1


@given(
df=dataframes(allowed_dtypes=[pl.Float32, pl.Float64], allow_infinities=False),
s=series(dtype=pl.Float64, allow_infinities=False),
)
def test_infinities(
df: pl.DataFrame,
s: pl.Series,
) -> None:
from math import isfinite

def finite_float(value: Any) -> bool:
if isinstance(value, float):
return isfinite(value)
return False

assert all(finite_float(val) for val in s.to_list())
for col in df.columns:
assert all(finite_float(val) for val in df[col].to_list())

0 comments on commit ca4996a

Please sign in to comment.