From 326a501c52d395d0cb18f0991a0e640bf3d1832e Mon Sep 17 00:00:00 2001 From: mansenfranzen Date: Sun, 7 Jul 2019 14:32:45 +0200 Subject: [PATCH] Add `abstractmethod` for `BaseWrangler` to explicitly enforce implementation of all required methods before wrangler instantiation. Add `concretize_abstract_wrangler` helper function for testing classes which do not implement all abstract methods. --- src/pywrangler/base.py | 9 ++++++- src/pywrangler/util/testing.py | 43 +++++++++++++++++++++++++++++++++ tests/dask/test_base.py | 3 ++- tests/dask/test_benchmark.py | 5 ++-- tests/pandas/test_base.py | 5 ++-- tests/pandas/test_benchmark.py | 3 ++- tests/pyspark/test_base.py | 3 ++- tests/pyspark/test_benchmark.py | 3 ++- tests/test_base.py | 14 +++++++---- tests/test_interfaces.py | 26 ++++++++++++-------- tests/util/test_testing.py | 24 ++++++++++++++++++ 11 files changed, 114 insertions(+), 24 deletions(-) create mode 100644 src/pywrangler/util/testing.py create mode 100644 tests/util/test_testing.py diff --git a/src/pywrangler/base.py b/src/pywrangler/base.py index 57350c6..08f747b 100644 --- a/src/pywrangler/base.py +++ b/src/pywrangler/base.py @@ -3,11 +3,13 @@ """ +from abc import ABC, abstractmethod + from pywrangler.util import _pprint from pywrangler.util.helper import get_param_names -class BaseWrangler: +class BaseWrangler(ABC): """Defines the basic interface common to all data wranglers. In analogy to sklearn transformers (see link below), all wranglers have to @@ -42,10 +44,12 @@ class BaseWrangler: """ @property + @abstractmethod def preserves_sample_size(self) -> bool: raise NotImplementedError @property + @abstractmethod def computation_engine(self) -> str: raise NotImplementedError @@ -91,12 +95,15 @@ def set_params(self, **params): return self + @abstractmethod def fit(self, *args, **kwargs): raise NotImplementedError + @abstractmethod def transform(self, *args, **kwargs): raise NotImplementedError + @abstractmethod def fit_transform(self, *args, **kwargs): raise NotImplementedError diff --git a/src/pywrangler/util/testing.py b/src/pywrangler/util/testing.py new file mode 100644 index 0000000..8af0133 --- /dev/null +++ b/src/pywrangler/util/testing.py @@ -0,0 +1,43 @@ +"""This module contains testing utility. + +""" + +from typing import Type + + +def concretize_abstract_wrangler(wrangler_class: Type) -> Type: + """Makes abstract wrangler classes instantiable for testing purposes by + implementing abstract methods of `BaseWrangler`. + + Parameters + ---------- + wrangler_class: Type + Class object to inherit from while overriding abstract methods. + + Returns + ------- + concrete_class: Type + Concrete class usable for testing. + + """ + + class ConcreteWrangler(wrangler_class): + + @property + def preserves_sample_size(self): + return super().preserves_sample_size + + @property + def computation_engine(self): + return super().computation_engine + + def fit(self, *args, **kwargs): + return super().fit(*args, **kwargs) + + def fit_transform(self, *args, **kwargs): + return super().fit_transform(*args, **kwargs) + + def transform(self, *args, **kwargs): + return super().transform(*args, **kwargs) + + return ConcreteWrangler diff --git a/tests/dask/test_base.py b/tests/dask/test_base.py index 0e545eb..933d867 100644 --- a/tests/dask/test_base.py +++ b/tests/dask/test_base.py @@ -9,9 +9,10 @@ dask = pytest.importorskip("dask") # noqa: E402 from pywrangler.dask.base import DaskWrangler +from pywrangler.util.testing import concretize_abstract_wrangler def test_dask_base_wrangler_engine(): - wrangler = DaskWrangler() + wrangler = concretize_abstract_wrangler(DaskWrangler)() assert wrangler.computation_engine == "dask" diff --git a/tests/dask/test_benchmark.py b/tests/dask/test_benchmark.py index 1de8416..4a0ad87 100644 --- a/tests/dask/test_benchmark.py +++ b/tests/dask/test_benchmark.py @@ -21,6 +21,7 @@ DaskBaseProfiler ) from pywrangler.dask.base import DaskSingleNoFit +from pywrangler.util.testing import concretize_abstract_wrangler @pytest.fixture @@ -29,7 +30,7 @@ class DummyWrangler(DaskSingleNoFit): def transform(self, df): return df.mean() - return DummyWrangler() + return concretize_abstract_wrangler(DummyWrangler)() @pytest.fixture @@ -66,7 +67,7 @@ def transform(self, df): time.sleep(sleep) return df_out - return DummyWrangler() + return concretize_abstract_wrangler(DummyWrangler)() return create_wrangler diff --git a/tests/pandas/test_base.py b/tests/pandas/test_base.py index c7643aa..9f57a4f 100644 --- a/tests/pandas/test_base.py +++ b/tests/pandas/test_base.py @@ -7,12 +7,13 @@ import pandas as pd from pywrangler.pandas.base import PandasWrangler +from pywrangler.util.testing import concretize_abstract_wrangler pytestmark = pytest.mark.pandas def test_pandas_base_wrangler_engine(): - wrangler = PandasWrangler() + wrangler = concretize_abstract_wrangler(PandasWrangler)() assert wrangler.computation_engine == "pandas" @@ -24,7 +25,7 @@ class DummyWrangler(PandasWrangler): def preserves_sample_size(self): return preserves_sample_size - wrangler = DummyWrangler() + wrangler = concretize_abstract_wrangler(DummyWrangler)() df1 = pd.DataFrame([0] * 10) df2 = pd.DataFrame([0] * 20) diff --git a/tests/pandas/test_benchmark.py b/tests/pandas/test_benchmark.py index 605e225..0e8a275 100644 --- a/tests/pandas/test_benchmark.py +++ b/tests/pandas/test_benchmark.py @@ -15,6 +15,7 @@ PandasMemoryProfiler, PandasTimeProfiler ) +from pywrangler.util.testing import concretize_abstract_wrangler pytestmark = pytest.mark.pandas @@ -52,7 +53,7 @@ def transform(self, df): time.sleep(sleep) return df_out - return DummyWrangler() + return concretize_abstract_wrangler(DummyWrangler)() return create_wrangler diff --git a/tests/pyspark/test_base.py b/tests/pyspark/test_base.py index 277f301..23c1815 100644 --- a/tests/pyspark/test_base.py +++ b/tests/pyspark/test_base.py @@ -9,9 +9,10 @@ pyspark = pytest.importorskip("pyspark") # noqa: E402 from pywrangler.pyspark.base import PySparkWrangler +from pywrangler.util.testing import concretize_abstract_wrangler def test_spark_base_wrangler_engine(): - wrangler = PySparkWrangler() + wrangler = concretize_abstract_wrangler(PySparkWrangler)() assert wrangler.computation_engine == "pyspark" diff --git a/tests/pyspark/test_benchmark.py b/tests/pyspark/test_benchmark.py index c3e0712..4db9f79 100644 --- a/tests/pyspark/test_benchmark.py +++ b/tests/pyspark/test_benchmark.py @@ -13,6 +13,7 @@ from pywrangler.pyspark.base import PySparkSingleNoFit from pywrangler.pyspark.benchmark import PySparkTimeProfiler, \ PySparkBaseProfiler +from pywrangler.util.testing import concretize_abstract_wrangler SLEEP = 0.0001 @@ -24,7 +25,7 @@ def transform(self, df): time.sleep(SLEEP) return df - return DummyWrangler + return concretize_abstract_wrangler(DummyWrangler) def test_spark_time_profiler_fastest(spark, wrangler_sleeps): diff --git a/tests/test_base.py b/tests/test_base.py index ca2efff..40eee93 100644 --- a/tests/test_base.py +++ b/tests/test_base.py @@ -5,9 +5,10 @@ import pytest from pywrangler import base +from pywrangler.util.testing import concretize_abstract_wrangler -@pytest.fixture(scope="module") +@pytest.fixture(scope="session") def dummy_wrangler(): """Create DummyWrangler for testing BaseWrangler. @@ -26,23 +27,26 @@ def preserves_sample_size(self): def computation_engine(self): return "DummyEngine" - return DummyWrangler("arg_val", "kwarg_val") + return concretize_abstract_wrangler(DummyWrangler)("arg_val", "kwarg_val") def test_base_wrangler_not_implemented(): - wrangler = base.BaseWrangler() + with pytest.raises(TypeError): + base.BaseWrangler() + + empty_wrangler = concretize_abstract_wrangler(base.BaseWrangler)() test_attributes = ("preserves_sample_size", "computation_engine") test_methods = ("fit", "transform", "fit_transform") for test_attribute in test_attributes: with pytest.raises(NotImplementedError): - getattr(wrangler, test_attribute) + getattr(empty_wrangler, test_attribute) for test_method in test_methods: with pytest.raises(NotImplementedError): - getattr(wrangler, test_method)() + getattr(empty_wrangler, test_method)() def test_base_wrangler_get_params(dummy_wrangler): diff --git a/tests/test_interfaces.py b/tests/test_interfaces.py index 37adc75..027a7d7 100644 --- a/tests/test_interfaces.py +++ b/tests/test_interfaces.py @@ -5,10 +5,11 @@ import pytest from pywrangler import wranglers +from pywrangler.util.testing import concretize_abstract_wrangler @pytest.fixture(scope="module") -def interval_ident_kwargs(): +def ii_kwargs(): return {"marker_column": "marker_col", "marker_start": "start", @@ -19,26 +20,31 @@ def interval_ident_kwargs(): "target_column_name": "abc"} -def test_base_interval_identifier_init(interval_ident_kwargs): +def test_base_interval_identifier_init(ii_kwargs): - bii = wranglers.IntervalIdentifier(**interval_ident_kwargs) + wrangler = concretize_abstract_wrangler(wranglers.IntervalIdentifier) + bii = wrangler(**ii_kwargs) - assert bii.get_params() == interval_ident_kwargs + assert bii.get_params() == ii_kwargs -def test_base_interval_identifier_sort_length_exc(interval_ident_kwargs): +def test_base_interval_identifier_sort_length_exc(ii_kwargs): - incorrect_length = interval_ident_kwargs.copy() + incorrect_length = ii_kwargs.copy() incorrect_length["ascending"] = (True, ) + wrangler = concretize_abstract_wrangler(wranglers.IntervalIdentifier) + with pytest.raises(ValueError): - wranglers.IntervalIdentifier(**incorrect_length) + wrangler(**incorrect_length) -def test_base_interval_identifier_sort_keyword_exc(interval_ident_kwargs): +def test_base_interval_identifier_sort_keyword_exc(ii_kwargs): - incorrect_keyword = interval_ident_kwargs.copy() + incorrect_keyword = ii_kwargs.copy() incorrect_keyword["ascending"] = ("wrong keyword", "wrong keyword too") + wrangler = concretize_abstract_wrangler(wranglers.IntervalIdentifier) + with pytest.raises(ValueError): - wranglers.IntervalIdentifier(**incorrect_keyword) + wrangler(**incorrect_keyword) diff --git a/tests/util/test_testing.py b/tests/util/test_testing.py new file mode 100644 index 0000000..3d1d080 --- /dev/null +++ b/tests/util/test_testing.py @@ -0,0 +1,24 @@ +"""This module contains tests for testing utilities. + +""" + +import pytest + +from pywrangler.base import BaseWrangler +from pywrangler.util.testing import concretize_abstract_wrangler + + +def test_concretize_abstract_wrangler(): + + class Dummy(BaseWrangler): + @property + def computation_engine(self) -> str: + return "engine" + + concrete_class = concretize_abstract_wrangler(Dummy) + instance = concrete_class() + + assert instance.computation_engine == "engine" + + with pytest.raises(NotImplementedError): + instance.preserves_sample_size