Skip to content

Commit

Permalink
Relocate hypothesis unit tests to parallel tests_parametric dir (#3899
Browse files Browse the repository at this point in the history
)
  • Loading branch information
alexander-beedie committed Jul 5, 2022
1 parent 84c5170 commit 6adbff3
Show file tree
Hide file tree
Showing 11 changed files with 210 additions and 187 deletions.
7 changes: 6 additions & 1 deletion py-polars/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,13 @@ test: venv
$(PYTHON_BIN)/maturin develop
$(PYTHON) -m pytest tests

test-all: venv
$(PYTHON_BIN)/maturin develop
$(PYTHON) -m pytest tests
$(PYTHON) -m pytest tests_parametric

test-with-cov: venv
@cd tests && ../$(PYTHON) -m pytest \
$(PYTHON) -m pytest \
--cov=polars \
--cov-report xml \
--cov-fail-under=85 \
Expand Down
2 changes: 1 addition & 1 deletion py-polars/tests/test_datelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,10 @@
import pyarrow as pa
import pytest
import pytz
from test_series import verify_series_and_expr_api

import polars as pl
from polars.datatypes import DTYPE_TEMPORAL_UNITS
from polars.testing import verify_series_and_expr_api


def test_fill_null() -> None:
Expand Down
22 changes: 4 additions & 18 deletions py-polars/tests/test_df.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,9 @@
import pandas as pd
import pyarrow as pa
import pytest
from hypothesis import given

import polars as pl
from polars.testing import assert_frame_equal, assert_series_equal, columns, dataframes
from polars.testing import assert_frame_equal, assert_series_equal, columns

if sys.version_info >= (3, 8):
from typing import Literal
Expand All @@ -26,22 +25,9 @@ def test_version() -> None:
_version = pl.__version__


@given(df=dataframes())
def test_repr(df: pl.DataFrame) -> None:
assert isinstance(repr(df), str)
# print(df)


# note: temporarily constraining dtypes for this test (possible windows-specific date bug)
@given(df=dataframes(allowed_dtypes=[pl.Boolean, pl.UInt64, pl.Utf8]))
def test_null_count(df: pl.DataFrame) -> None:
null_count, ncols = df.null_count(), len(df.columns)
if ncols == 0:
assert null_count.shape == (0, 0)
else:
assert null_count.shape == (1, ncols)
for idx, count in enumerate(null_count.rows()[0]):
assert count == sum(v is None for v in df.select_at_idx(idx).to_list())
def test_null_count() -> None:
df = pl.DataFrame({"a": [2, 1, 3], "b": ["a", "b", None]})
assert df.null_count().shape == (1, 2)


def test_init_empty() -> None:
Expand Down
12 changes: 5 additions & 7 deletions py-polars/tests/test_exprs.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
from test_series import verify_series_and_expr_api

import polars as pl
from polars import testing
from polars.testing import assert_series_equal, verify_series_and_expr_api


def test_horizontal_agg(fruits_cars: pl.DataFrame) -> None:
Expand Down Expand Up @@ -54,21 +52,21 @@ def test_flatten_explode() -> None:
expected = pl.Series("a", ["H", "e", "l", "l", "o", "W", "o", "r", "l", "d"])

result: pl.Series = df.to_frame().select(pl.col("a").flatten())[:, 0] # type: ignore
testing.assert_series_equal(result, expected)
assert_series_equal(result, expected)

result: pl.Series = df.to_frame().select(pl.col("a").explode())[:, 0] # type: ignore
testing.assert_series_equal(result, expected)
assert_series_equal(result, expected)


def test_min_nulls_consistency() -> None:
df = pl.DataFrame({"a": [None, 2, 3], "b": [4, None, 6], "c": [7, 5, 0]})
out = df.select([pl.min(["a", "b", "c"])]).to_series()
expected = pl.Series("min", [4, 2, 0])
testing.assert_series_equal(out, expected)
assert_series_equal(out, expected)

out = df.select([pl.max(["a", "b", "c"])]).to_series()
expected = pl.Series("max", [7, 5, 6])
testing.assert_series_equal(out, expected)
assert_series_equal(out, expected)


def test_list_join_strings() -> None:
Expand Down
21 changes: 10 additions & 11 deletions py-polars/tests/test_lists.py
Original file line number Diff line number Diff line change
@@ -1,34 +1,33 @@
from datetime import date, datetime, time

import pandas as pd
from test_series import verify_series_and_expr_api

import polars as pl
from polars import testing
from polars.testing import assert_series_equal, verify_series_and_expr_api


def test_list_arr_get() -> None:
a = pl.Series("a", [[1, 2, 3], [4, 5], [6, 7, 8, 9]])
out = a.arr.get(0)
expected = pl.Series("a", [1, 4, 6])
testing.assert_series_equal(out, expected)
assert_series_equal(out, expected)
out = a.arr.first()
testing.assert_series_equal(out, expected)
assert_series_equal(out, expected)
out = pl.select(pl.lit(a).arr.first()).to_series()
testing.assert_series_equal(out, expected)
assert_series_equal(out, expected)

out = a.arr.get(-1)
expected = pl.Series("a", [3, 5, 9])
testing.assert_series_equal(out, expected)
assert_series_equal(out, expected)
out = a.arr.last()
testing.assert_series_equal(out, expected)
assert_series_equal(out, expected)
out = pl.select(pl.lit(a).arr.last()).to_series()
testing.assert_series_equal(out, expected)
assert_series_equal(out, expected)

a = pl.Series("a", [[1, 2, 3], [4, 5], [6, 7, 8, 9]])
out = a.arr.get(-3)
expected = pl.Series("a", [1, None, 7])
testing.assert_series_equal(out, expected)
assert_series_equal(out, expected)

assert pl.DataFrame(
{"a": [[1], [2], [3], [4, 5, 6], [7, 8, 9], [None, 11]]}
Expand All @@ -49,10 +48,10 @@ def test_contains() -> None:
a = pl.Series("a", [[1, 2, 3], [2, 5], [6, 7, 8, 9]])
out = a.arr.contains(2)
expected = pl.Series("a", [True, True, False])
testing.assert_series_equal(out, expected)
assert_series_equal(out, expected)

out = pl.select(pl.lit(a).arr.contains(2)).to_series()
testing.assert_series_equal(out, expected)
assert_series_equal(out, expected)


def test_dtype() -> None:
Expand Down
9 changes: 0 additions & 9 deletions py-polars/tests/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,6 @@ def test_cum_agg() -> None:
verify_series_and_expr_api(s, pl.Series("a", [1, 2, 6, 12]), "cumprod")


# TODO: exclude obvious/known overflow inside the strategy before commenting back in
# @given(s=series(allowed_dtypes=_NUMERIC_COL_TYPES, name="a"))
# def test_cum_agg_extra(s: pl.Series) -> None:
# # confirm that ops on generated Series match equivalent Expr call
# # note: testing codepath-equivalence, not correctness.
# for op in ("cumsum", "cummin", "cummax", "cumprod"):
# verify_series_and_expr_api(s, None, op)


def test_init_inputs(monkeypatch: Any) -> None:
for flag in [False, True]:
monkeypatch.setattr(pl.internals.construction, "_PYARROW_AVAILABLE", flag)
Expand Down
141 changes: 1 addition & 140 deletions py-polars/tests/test_testing.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,7 @@
import pytest
from hypothesis import given, settings
from hypothesis.strategies import sampled_from

import polars as pl
from polars.testing import (
assert_frame_equal,
assert_series_equal,
column,
columns,
dataframes,
series,
strategy_dtypes,
)

TEMPORAL_DTYPES = [pl.Datetime, pl.Date, pl.Time, pl.Duration]
from polars.testing import assert_frame_equal, assert_series_equal


def test_compare_series_value_mismatch() -> None:
Expand Down Expand Up @@ -130,130 +118,3 @@ def test_assert_series_equal_int_overflow() -> None:
assert_series_equal(s0, s0, check_exact=check_exact)
with pytest.raises(AssertionError):
assert_series_equal(s1, s2, check_exact=check_exact)


@given(df=dataframes(), lf=dataframes(lazy=True), srs=series())
@settings(max_examples=10)
def test_strategy_classes(df: pl.DataFrame, lf: pl.LazyFrame, srs: pl.Series) -> None:
assert isinstance(df, pl.DataFrame)
assert isinstance(lf, pl.LazyFrame)
assert isinstance(srs, pl.Series)


@given(
df1=dataframes(cols=5, size=5),
df2=dataframes(min_cols=10, max_cols=20, min_size=5, max_size=25),
s1=series(size=5),
s2=series(min_size=5, max_size=25, name="col"),
)
def test_strategy_shape(
df1: pl.DataFrame, df2: pl.DataFrame, s1: pl.Series, s2: pl.Series
) -> None:
assert df1.shape == (5, 5)
assert df1.columns == ["col0", "col1", "col2", "col3", "col4"]

assert 10 <= len(df2.columns) <= 20
assert 5 <= len(df2) <= 25

assert s1.len() == 5
assert 5 <= s2.len() <= 25
assert s1.name == ""
assert s2.name == "col"


@given(
lf=dataframes(
# generate lazyframes with at least one row
lazy=True,
min_size=1,
# test mix & match of bulk-assigned cols with custom cols
cols=columns(["a", "b"], dtype=pl.UInt8, unique=True),
include_cols=[
column("c", dtype=pl.Boolean),
column("d", strategy=sampled_from(["x", "y", "z"])),
],
)
)
def test_strategy_frame_columns(lf: pl.LazyFrame) -> None:
assert lf.schema == {"a": pl.UInt8, "b": pl.UInt8, "c": pl.Boolean, "d": pl.Utf8}
assert lf.columns == ["a", "b", "c", "d"]
df = lf.collect()

# confirm uint cols bounds
uint8_max = (2**8) - 1
assert df["a"].min() >= 0
assert df["b"].min() >= 0
assert df["a"].max() <= uint8_max
assert df["b"].max() <= uint8_max

# confirm uint cols uniqueness
assert df["a"].is_unique().all()
assert df["b"].is_unique().all()

# boolean col
assert all(isinstance(v, bool) for v in df["c"].to_list())

# string col, entries selected from custom values
xyz = {"x", "y", "z"}
assert all(v in xyz for v in df["d"].to_list())


@given(
df=dataframes(allowed_dtypes=TEMPORAL_DTYPES, max_size=1),
lf=dataframes(excluded_dtypes=TEMPORAL_DTYPES, max_size=1, lazy=True),
s1=series(max_size=1),
s2=series(dtype=pl.Boolean, max_size=1),
s3=series(allowed_dtypes=TEMPORAL_DTYPES, max_size=1),
s4=series(excluded_dtypes=TEMPORAL_DTYPES, max_size=1),
)
def test_strategy_dtypes(
df: pl.DataFrame,
lf: pl.LazyFrame,
s1: pl.Series,
s2: pl.Series,
s3: pl.Series,
s4: pl.Series,
) -> None:
# dataframe, lazyframe
assert all(tp in TEMPORAL_DTYPES for tp in df.dtypes)
assert all(tp not in TEMPORAL_DTYPES for tp in lf.dtypes)

# series
assert s1.dtype in strategy_dtypes
assert s2.dtype == pl.Boolean
assert s3.dtype in TEMPORAL_DTYPES
assert s4.dtype not in TEMPORAL_DTYPES


@given(
# set global, per-column, and overridden null-probabilities
s=series(size=50, null_probability=0.10),
df1=dataframes(cols=1, size=50, null_probability=0.30),
df2=dataframes(cols=2, size=50, null_probability={"col0": 0.70}),
df3=dataframes(
cols=1,
size=50,
null_probability=1.0,
include_cols=[column(name="colx", null_probability=0.20)],
),
)
def test_strategy_null_probability(
s: pl.Series,
df1: pl.DataFrame,
df2: pl.DataFrame,
df3: pl.DataFrame,
) -> None:
for obj in (s, df1, df2, df3):
assert len(obj) == 50 # type: ignore[arg-type]

assert s.null_count() < df1.null_count().fold(sum).sum()
assert df1.null_count().fold(sum).sum() < df2.null_count().fold(sum).sum()
assert df2.null_count().fold(sum).sum() < df3.null_count().fold(sum).sum()

nulls_col0, nulls_col1 = df2.null_count().rows()[0]
assert nulls_col0 > nulls_col1
assert nulls_col0 < 50

nulls_col0, nulls_colx = df3.null_count().rows()[0]
assert nulls_col0 > nulls_colx
assert nulls_col0 == 50
24 changes: 24 additions & 0 deletions py-polars/tests_parametric/test_dataframe.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# -------------------------------------------------
# Validate Series behaviour with parameteric tests
# -------------------------------------------------
from hypothesis import given

import polars as pl
from polars.testing import dataframes


@given(df=dataframes())
def test_repr(df: pl.DataFrame) -> None:
assert isinstance(repr(df), str)
# print(df)


@given(df=dataframes(allowed_dtypes=[pl.Boolean, pl.UInt64, pl.Utf8, pl.Time]))
def test_null_count(df: pl.DataFrame) -> None:
null_count, ncols = df.null_count(), len(df.columns)
if ncols == 0:
assert null_count.shape == (0, 0)
else:
assert null_count.shape == (1, ncols)
for idx, count in enumerate(null_count.rows()[0]):
assert count == sum(v is None for v in df.select_at_idx(idx).to_list())
1 change: 1 addition & 0 deletions py-polars/tests_parametric/test_lazyframe.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# TODO:
20 changes: 20 additions & 0 deletions py-polars/tests_parametric/test_series.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# -------------------------------------------------
# Validate Series behaviour with parameteric tests
# -------------------------------------------------

# from hypothesis import given
#
# import polars as pl
# from polars.testing import (
# series,
# verify_series_and_expr_api,
# )
#
#
# # TODO: exclude obvious/known overflow inside the strategy before commenting back in
# @given(s=series(allowed_dtypes=_NUMERIC_COL_TYPES, name="a"))
# def test_cum_agg_extra(s: pl.Series) -> None:
# # confirm that ops on generated Series match equivalent Expr call
# # note: testing codepath-equivalence, not correctness.
# for op in ("cumsum", "cummin", "cummax", "cumprod"):
# verify_series_and_expr_api(s, None, op)

0 comments on commit 6adbff3

Please sign in to comment.