Relocate hypothesis unit tests to parallel tests_parametric dir (#3899

)
pola-rs · Jul 5, 2022 · 6adbff3 · 6adbff3
1 parent 84c5170
commit 6adbff3
Show file tree

Hide file tree

Showing 11 changed files with 210 additions and 187 deletions.
diff --git a/py-polars/Makefile b/py-polars/Makefile
@@ -27,8 +27,13 @@ test: venv
 	$(PYTHON_BIN)/maturin develop
 	$(PYTHON) -m pytest tests
 
+test-all: venv
+	$(PYTHON_BIN)/maturin develop
+	$(PYTHON) -m pytest tests
+	$(PYTHON) -m pytest tests_parametric
+
 test-with-cov: venv
-	@cd tests && ../$(PYTHON) -m pytest \
+	$(PYTHON) -m pytest \
 		--cov=polars \
 		--cov-report xml \
 		--cov-fail-under=85 \

diff --git a/py-polars/tests/test_datelike.py b/py-polars/tests/test_datelike.py
@@ -7,10 +7,10 @@
 import pyarrow as pa
 import pytest
 import pytz
-from test_series import verify_series_and_expr_api
 
 import polars as pl
 from polars.datatypes import DTYPE_TEMPORAL_UNITS
+from polars.testing import verify_series_and_expr_api
 
 
 def test_fill_null() -> None:

diff --git a/py-polars/tests/test_df.py b/py-polars/tests/test_df.py
@@ -11,10 +11,9 @@
 import pandas as pd
 import pyarrow as pa
 import pytest
-from hypothesis import given
 
 import polars as pl
-from polars.testing import assert_frame_equal, assert_series_equal, columns, dataframes
+from polars.testing import assert_frame_equal, assert_series_equal, columns
 
 if sys.version_info >= (3, 8):
     from typing import Literal
@@ -26,22 +25,9 @@ def test_version() -> None:
     _version = pl.__version__
 
 
-@given(df=dataframes())
-def test_repr(df: pl.DataFrame) -> None:
-    assert isinstance(repr(df), str)
-    # print(df)
-
-
-# note: temporarily constraining dtypes for this test (possible windows-specific date bug)
-@given(df=dataframes(allowed_dtypes=[pl.Boolean, pl.UInt64, pl.Utf8]))
-def test_null_count(df: pl.DataFrame) -> None:
-    null_count, ncols = df.null_count(), len(df.columns)
-    if ncols == 0:
-        assert null_count.shape == (0, 0)
-    else:
-        assert null_count.shape == (1, ncols)
-        for idx, count in enumerate(null_count.rows()[0]):
-            assert count == sum(v is None for v in df.select_at_idx(idx).to_list())
+def test_null_count() -> None:
+    df = pl.DataFrame({"a": [2, 1, 3], "b": ["a", "b", None]})
+    assert df.null_count().shape == (1, 2)
 
 
 def test_init_empty() -> None:

diff --git a/py-polars/tests/test_exprs.py b/py-polars/tests/test_exprs.py
@@ -1,7 +1,5 @@
-from test_series import verify_series_and_expr_api
-
 import polars as pl
-from polars import testing
+from polars.testing import assert_series_equal, verify_series_and_expr_api
 
 
 def test_horizontal_agg(fruits_cars: pl.DataFrame) -> None:
@@ -54,21 +52,21 @@ def test_flatten_explode() -> None:
     expected = pl.Series("a", ["H", "e", "l", "l", "o", "W", "o", "r", "l", "d"])
 
     result: pl.Series = df.to_frame().select(pl.col("a").flatten())[:, 0]  # type: ignore
-    testing.assert_series_equal(result, expected)
+    assert_series_equal(result, expected)
 
     result: pl.Series = df.to_frame().select(pl.col("a").explode())[:, 0]  # type: ignore
-    testing.assert_series_equal(result, expected)
+    assert_series_equal(result, expected)
 
 
 def test_min_nulls_consistency() -> None:
     df = pl.DataFrame({"a": [None, 2, 3], "b": [4, None, 6], "c": [7, 5, 0]})
     out = df.select([pl.min(["a", "b", "c"])]).to_series()
     expected = pl.Series("min", [4, 2, 0])
-    testing.assert_series_equal(out, expected)
+    assert_series_equal(out, expected)
 
     out = df.select([pl.max(["a", "b", "c"])]).to_series()
     expected = pl.Series("max", [7, 5, 6])
-    testing.assert_series_equal(out, expected)
+    assert_series_equal(out, expected)
 
 
 def test_list_join_strings() -> None:

diff --git a/py-polars/tests/test_lists.py b/py-polars/tests/test_lists.py
@@ -1,34 +1,33 @@
 from datetime import date, datetime, time
 
 import pandas as pd
-from test_series import verify_series_and_expr_api
 
 import polars as pl
-from polars import testing
+from polars.testing import assert_series_equal, verify_series_and_expr_api
 
 
 def test_list_arr_get() -> None:
     a = pl.Series("a", [[1, 2, 3], [4, 5], [6, 7, 8, 9]])
     out = a.arr.get(0)
     expected = pl.Series("a", [1, 4, 6])
-    testing.assert_series_equal(out, expected)
+    assert_series_equal(out, expected)
     out = a.arr.first()
-    testing.assert_series_equal(out, expected)
+    assert_series_equal(out, expected)
     out = pl.select(pl.lit(a).arr.first()).to_series()
-    testing.assert_series_equal(out, expected)
+    assert_series_equal(out, expected)
 
     out = a.arr.get(-1)
     expected = pl.Series("a", [3, 5, 9])
-    testing.assert_series_equal(out, expected)
+    assert_series_equal(out, expected)
     out = a.arr.last()
-    testing.assert_series_equal(out, expected)
+    assert_series_equal(out, expected)
     out = pl.select(pl.lit(a).arr.last()).to_series()
-    testing.assert_series_equal(out, expected)
+    assert_series_equal(out, expected)
 
     a = pl.Series("a", [[1, 2, 3], [4, 5], [6, 7, 8, 9]])
     out = a.arr.get(-3)
     expected = pl.Series("a", [1, None, 7])
-    testing.assert_series_equal(out, expected)
+    assert_series_equal(out, expected)
 
     assert pl.DataFrame(
         {"a": [[1], [2], [3], [4, 5, 6], [7, 8, 9], [None, 11]]}
@@ -49,10 +48,10 @@ def test_contains() -> None:
     a = pl.Series("a", [[1, 2, 3], [2, 5], [6, 7, 8, 9]])
     out = a.arr.contains(2)
     expected = pl.Series("a", [True, True, False])
-    testing.assert_series_equal(out, expected)
+    assert_series_equal(out, expected)
 
     out = pl.select(pl.lit(a).arr.contains(2)).to_series()
-    testing.assert_series_equal(out, expected)
+    assert_series_equal(out, expected)
 
 
 def test_dtype() -> None:

diff --git a/py-polars/tests/test_series.py b/py-polars/tests/test_series.py
@@ -21,15 +21,6 @@ def test_cum_agg() -> None:
     verify_series_and_expr_api(s, pl.Series("a", [1, 2, 6, 12]), "cumprod")
 
 
-# TODO: exclude obvious/known overflow inside the strategy before commenting back in
-# @given(s=series(allowed_dtypes=_NUMERIC_COL_TYPES, name="a"))
-# def test_cum_agg_extra(s: pl.Series) -> None:
-#     # confirm that ops on generated Series match equivalent Expr call
-#     # note: testing codepath-equivalence, not correctness.
-#     for op in ("cumsum", "cummin", "cummax", "cumprod"):
-#         verify_series_and_expr_api(s, None, op)
-
-
 def test_init_inputs(monkeypatch: Any) -> None:
     for flag in [False, True]:
         monkeypatch.setattr(pl.internals.construction, "_PYARROW_AVAILABLE", flag)

diff --git a/py-polars/tests/test_testing.py b/py-polars/tests/test_testing.py
@@ -1,19 +1,7 @@
 import pytest
-from hypothesis import given, settings
-from hypothesis.strategies import sampled_from
 
 import polars as pl
-from polars.testing import (
-    assert_frame_equal,
-    assert_series_equal,
-    column,
-    columns,
-    dataframes,
-    series,
-    strategy_dtypes,
-)
-
-TEMPORAL_DTYPES = [pl.Datetime, pl.Date, pl.Time, pl.Duration]
+from polars.testing import assert_frame_equal, assert_series_equal
 
 
 def test_compare_series_value_mismatch() -> None:
@@ -130,130 +118,3 @@ def test_assert_series_equal_int_overflow() -> None:
         assert_series_equal(s0, s0, check_exact=check_exact)
         with pytest.raises(AssertionError):
             assert_series_equal(s1, s2, check_exact=check_exact)
-
-
-@given(df=dataframes(), lf=dataframes(lazy=True), srs=series())
-@settings(max_examples=10)
-def test_strategy_classes(df: pl.DataFrame, lf: pl.LazyFrame, srs: pl.Series) -> None:
-    assert isinstance(df, pl.DataFrame)
-    assert isinstance(lf, pl.LazyFrame)
-    assert isinstance(srs, pl.Series)
-
-
-@given(
-    df1=dataframes(cols=5, size=5),
-    df2=dataframes(min_cols=10, max_cols=20, min_size=5, max_size=25),
-    s1=series(size=5),
-    s2=series(min_size=5, max_size=25, name="col"),
-)
-def test_strategy_shape(
-    df1: pl.DataFrame, df2: pl.DataFrame, s1: pl.Series, s2: pl.Series
-) -> None:
-    assert df1.shape == (5, 5)
-    assert df1.columns == ["col0", "col1", "col2", "col3", "col4"]
-
-    assert 10 <= len(df2.columns) <= 20
-    assert 5 <= len(df2) <= 25
-
-    assert s1.len() == 5
-    assert 5 <= s2.len() <= 25
-    assert s1.name == ""
-    assert s2.name == "col"
-
-
-@given(
-    lf=dataframes(
-        # generate lazyframes with at least one row
-        lazy=True,
-        min_size=1,
-        # test mix & match of bulk-assigned cols with custom cols
-        cols=columns(["a", "b"], dtype=pl.UInt8, unique=True),
-        include_cols=[
-            column("c", dtype=pl.Boolean),
-            column("d", strategy=sampled_from(["x", "y", "z"])),
-        ],
-    )
-)
-def test_strategy_frame_columns(lf: pl.LazyFrame) -> None:
-    assert lf.schema == {"a": pl.UInt8, "b": pl.UInt8, "c": pl.Boolean, "d": pl.Utf8}
-    assert lf.columns == ["a", "b", "c", "d"]
-    df = lf.collect()
-
-    # confirm uint cols bounds
-    uint8_max = (2**8) - 1
-    assert df["a"].min() >= 0
-    assert df["b"].min() >= 0
-    assert df["a"].max() <= uint8_max
-    assert df["b"].max() <= uint8_max
-
-    # confirm uint cols uniqueness
-    assert df["a"].is_unique().all()
-    assert df["b"].is_unique().all()
-
-    # boolean col
-    assert all(isinstance(v, bool) for v in df["c"].to_list())
-
-    # string col, entries selected from custom values
-    xyz = {"x", "y", "z"}
-    assert all(v in xyz for v in df["d"].to_list())
-
-
-@given(
-    df=dataframes(allowed_dtypes=TEMPORAL_DTYPES, max_size=1),
-    lf=dataframes(excluded_dtypes=TEMPORAL_DTYPES, max_size=1, lazy=True),
-    s1=series(max_size=1),
-    s2=series(dtype=pl.Boolean, max_size=1),
-    s3=series(allowed_dtypes=TEMPORAL_DTYPES, max_size=1),
-    s4=series(excluded_dtypes=TEMPORAL_DTYPES, max_size=1),
-)
-def test_strategy_dtypes(
-    df: pl.DataFrame,
-    lf: pl.LazyFrame,
-    s1: pl.Series,
-    s2: pl.Series,
-    s3: pl.Series,
-    s4: pl.Series,
-) -> None:
-    # dataframe, lazyframe
-    assert all(tp in TEMPORAL_DTYPES for tp in df.dtypes)
-    assert all(tp not in TEMPORAL_DTYPES for tp in lf.dtypes)
-
-    # series
-    assert s1.dtype in strategy_dtypes
-    assert s2.dtype == pl.Boolean
-    assert s3.dtype in TEMPORAL_DTYPES
-    assert s4.dtype not in TEMPORAL_DTYPES
-
-
-@given(
-    # set global, per-column, and overridden null-probabilities
-    s=series(size=50, null_probability=0.10),
-    df1=dataframes(cols=1, size=50, null_probability=0.30),
-    df2=dataframes(cols=2, size=50, null_probability={"col0": 0.70}),
-    df3=dataframes(
-        cols=1,
-        size=50,
-        null_probability=1.0,
-        include_cols=[column(name="colx", null_probability=0.20)],
-    ),
-)
-def test_strategy_null_probability(
-    s: pl.Series,
-    df1: pl.DataFrame,
-    df2: pl.DataFrame,
-    df3: pl.DataFrame,
-) -> None:
-    for obj in (s, df1, df2, df3):
-        assert len(obj) == 50  # type: ignore[arg-type]
-
-    assert s.null_count() < df1.null_count().fold(sum).sum()
-    assert df1.null_count().fold(sum).sum() < df2.null_count().fold(sum).sum()
-    assert df2.null_count().fold(sum).sum() < df3.null_count().fold(sum).sum()
-
-    nulls_col0, nulls_col1 = df2.null_count().rows()[0]
-    assert nulls_col0 > nulls_col1
-    assert nulls_col0 < 50
-
-    nulls_col0, nulls_colx = df3.null_count().rows()[0]
-    assert nulls_col0 > nulls_colx
-    assert nulls_col0 == 50
diff --git a/py-polars/tests_parametric/test_dataframe.py b/py-polars/tests_parametric/test_dataframe.py
@@ -0,0 +1,24 @@
+# -------------------------------------------------
+# Validate Series behaviour with parameteric tests
+# -------------------------------------------------
+from hypothesis import given
+
+import polars as pl
+from polars.testing import dataframes
+
+
+@given(df=dataframes())
+def test_repr(df: pl.DataFrame) -> None:
+    assert isinstance(repr(df), str)
+    # print(df)
+
+
+@given(df=dataframes(allowed_dtypes=[pl.Boolean, pl.UInt64, pl.Utf8, pl.Time]))
+def test_null_count(df: pl.DataFrame) -> None:
+    null_count, ncols = df.null_count(), len(df.columns)
+    if ncols == 0:
+        assert null_count.shape == (0, 0)
+    else:
+        assert null_count.shape == (1, ncols)
+        for idx, count in enumerate(null_count.rows()[0]):
+            assert count == sum(v is None for v in df.select_at_idx(idx).to_list())
diff --git a/py-polars/tests_parametric/test_lazyframe.py b/py-polars/tests_parametric/test_lazyframe.py
@@ -0,0 +1 @@
+# TODO:
diff --git a/py-polars/tests_parametric/test_series.py b/py-polars/tests_parametric/test_series.py
@@ -0,0 +1,20 @@
+# -------------------------------------------------
+# Validate Series behaviour with parameteric tests
+# -------------------------------------------------
+
+# from hypothesis import given
+#
+# import polars as pl
+# from polars.testing import (
+#     series,
+#     verify_series_and_expr_api,
+# )
+#
+#
+# # TODO: exclude obvious/known overflow inside the strategy before commenting back in
+# @given(s=series(allowed_dtypes=_NUMERIC_COL_TYPES, name="a"))
+# def test_cum_agg_extra(s: pl.Series) -> None:
+#     # confirm that ops on generated Series match equivalent Expr call
+#     # note: testing codepath-equivalence, not correctness.
+#     for op in ("cumsum", "cummin", "cummax", "cumprod"):
+#          verify_series_and_expr_api(s, None, op)