Skip to content

Commit

Permalink
Add abstractmethod for BaseWrangler to explicitly enforce
Browse files Browse the repository at this point in the history
implementation of all required methods before wrangler instantiation.
Add `concretize_abstract_wrangler` helper function for testing classes
which do not implement all abstract methods.
  • Loading branch information
mansenfranzen committed Jul 7, 2019
1 parent fbb404b commit 326a501
Show file tree
Hide file tree
Showing 11 changed files with 114 additions and 24 deletions.
9 changes: 8 additions & 1 deletion src/pywrangler/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,13 @@
"""

from abc import ABC, abstractmethod

from pywrangler.util import _pprint
from pywrangler.util.helper import get_param_names


class BaseWrangler:
class BaseWrangler(ABC):
"""Defines the basic interface common to all data wranglers.
In analogy to sklearn transformers (see link below), all wranglers have to
Expand Down Expand Up @@ -42,10 +44,12 @@ class BaseWrangler:
"""

@property
@abstractmethod
def preserves_sample_size(self) -> bool:
raise NotImplementedError

@property
@abstractmethod
def computation_engine(self) -> str:
raise NotImplementedError

Expand Down Expand Up @@ -91,12 +95,15 @@ def set_params(self, **params):

return self

@abstractmethod
def fit(self, *args, **kwargs):
raise NotImplementedError

@abstractmethod
def transform(self, *args, **kwargs):
raise NotImplementedError

@abstractmethod
def fit_transform(self, *args, **kwargs):
raise NotImplementedError

Expand Down
43 changes: 43 additions & 0 deletions src/pywrangler/util/testing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
"""This module contains testing utility.
"""

from typing import Type


def concretize_abstract_wrangler(wrangler_class: Type) -> Type:
"""Makes abstract wrangler classes instantiable for testing purposes by
implementing abstract methods of `BaseWrangler`.
Parameters
----------
wrangler_class: Type
Class object to inherit from while overriding abstract methods.
Returns
-------
concrete_class: Type
Concrete class usable for testing.
"""

class ConcreteWrangler(wrangler_class):

@property
def preserves_sample_size(self):
return super().preserves_sample_size

@property
def computation_engine(self):
return super().computation_engine

def fit(self, *args, **kwargs):
return super().fit(*args, **kwargs)

def fit_transform(self, *args, **kwargs):
return super().fit_transform(*args, **kwargs)

def transform(self, *args, **kwargs):
return super().transform(*args, **kwargs)

return ConcreteWrangler
3 changes: 2 additions & 1 deletion tests/dask/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,10 @@
dask = pytest.importorskip("dask") # noqa: E402

from pywrangler.dask.base import DaskWrangler
from pywrangler.util.testing import concretize_abstract_wrangler


def test_dask_base_wrangler_engine():
wrangler = DaskWrangler()
wrangler = concretize_abstract_wrangler(DaskWrangler)()

assert wrangler.computation_engine == "dask"
5 changes: 3 additions & 2 deletions tests/dask/test_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
DaskBaseProfiler
)
from pywrangler.dask.base import DaskSingleNoFit
from pywrangler.util.testing import concretize_abstract_wrangler


@pytest.fixture
Expand All @@ -29,7 +30,7 @@ class DummyWrangler(DaskSingleNoFit):
def transform(self, df):
return df.mean()

return DummyWrangler()
return concretize_abstract_wrangler(DummyWrangler)()


@pytest.fixture
Expand Down Expand Up @@ -66,7 +67,7 @@ def transform(self, df):
time.sleep(sleep)
return df_out

return DummyWrangler()
return concretize_abstract_wrangler(DummyWrangler)()

return create_wrangler

Expand Down
5 changes: 3 additions & 2 deletions tests/pandas/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,13 @@
import pandas as pd

from pywrangler.pandas.base import PandasWrangler
from pywrangler.util.testing import concretize_abstract_wrangler

pytestmark = pytest.mark.pandas


def test_pandas_base_wrangler_engine():
wrangler = PandasWrangler()
wrangler = concretize_abstract_wrangler(PandasWrangler)()

assert wrangler.computation_engine == "pandas"

Expand All @@ -24,7 +25,7 @@ class DummyWrangler(PandasWrangler):
def preserves_sample_size(self):
return preserves_sample_size

wrangler = DummyWrangler()
wrangler = concretize_abstract_wrangler(DummyWrangler)()

df1 = pd.DataFrame([0] * 10)
df2 = pd.DataFrame([0] * 20)
Expand Down
3 changes: 2 additions & 1 deletion tests/pandas/test_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
PandasMemoryProfiler,
PandasTimeProfiler
)
from pywrangler.util.testing import concretize_abstract_wrangler

pytestmark = pytest.mark.pandas

Expand Down Expand Up @@ -52,7 +53,7 @@ def transform(self, df):
time.sleep(sleep)
return df_out

return DummyWrangler()
return concretize_abstract_wrangler(DummyWrangler)()

return create_wrangler

Expand Down
3 changes: 2 additions & 1 deletion tests/pyspark/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,10 @@
pyspark = pytest.importorskip("pyspark") # noqa: E402

from pywrangler.pyspark.base import PySparkWrangler
from pywrangler.util.testing import concretize_abstract_wrangler


def test_spark_base_wrangler_engine():
wrangler = PySparkWrangler()
wrangler = concretize_abstract_wrangler(PySparkWrangler)()

assert wrangler.computation_engine == "pyspark"
3 changes: 2 additions & 1 deletion tests/pyspark/test_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from pywrangler.pyspark.base import PySparkSingleNoFit
from pywrangler.pyspark.benchmark import PySparkTimeProfiler, \
PySparkBaseProfiler
from pywrangler.util.testing import concretize_abstract_wrangler

SLEEP = 0.0001

Expand All @@ -24,7 +25,7 @@ def transform(self, df):
time.sleep(SLEEP)
return df

return DummyWrangler
return concretize_abstract_wrangler(DummyWrangler)


def test_spark_time_profiler_fastest(spark, wrangler_sleeps):
Expand Down
14 changes: 9 additions & 5 deletions tests/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,10 @@
import pytest

from pywrangler import base
from pywrangler.util.testing import concretize_abstract_wrangler


@pytest.fixture(scope="module")
@pytest.fixture(scope="session")
def dummy_wrangler():
"""Create DummyWrangler for testing BaseWrangler.
Expand All @@ -26,23 +27,26 @@ def preserves_sample_size(self):
def computation_engine(self):
return "DummyEngine"

return DummyWrangler("arg_val", "kwarg_val")
return concretize_abstract_wrangler(DummyWrangler)("arg_val", "kwarg_val")


def test_base_wrangler_not_implemented():

wrangler = base.BaseWrangler()
with pytest.raises(TypeError):
base.BaseWrangler()

empty_wrangler = concretize_abstract_wrangler(base.BaseWrangler)()

test_attributes = ("preserves_sample_size", "computation_engine")
test_methods = ("fit", "transform", "fit_transform")

for test_attribute in test_attributes:
with pytest.raises(NotImplementedError):
getattr(wrangler, test_attribute)
getattr(empty_wrangler, test_attribute)

for test_method in test_methods:
with pytest.raises(NotImplementedError):
getattr(wrangler, test_method)()
getattr(empty_wrangler, test_method)()


def test_base_wrangler_get_params(dummy_wrangler):
Expand Down
26 changes: 16 additions & 10 deletions tests/test_interfaces.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,11 @@
import pytest

from pywrangler import wranglers
from pywrangler.util.testing import concretize_abstract_wrangler


@pytest.fixture(scope="module")
def interval_ident_kwargs():
def ii_kwargs():

return {"marker_column": "marker_col",
"marker_start": "start",
Expand All @@ -19,26 +20,31 @@ def interval_ident_kwargs():
"target_column_name": "abc"}


def test_base_interval_identifier_init(interval_ident_kwargs):
def test_base_interval_identifier_init(ii_kwargs):

bii = wranglers.IntervalIdentifier(**interval_ident_kwargs)
wrangler = concretize_abstract_wrangler(wranglers.IntervalIdentifier)
bii = wrangler(**ii_kwargs)

assert bii.get_params() == interval_ident_kwargs
assert bii.get_params() == ii_kwargs


def test_base_interval_identifier_sort_length_exc(interval_ident_kwargs):
def test_base_interval_identifier_sort_length_exc(ii_kwargs):

incorrect_length = interval_ident_kwargs.copy()
incorrect_length = ii_kwargs.copy()
incorrect_length["ascending"] = (True, )

wrangler = concretize_abstract_wrangler(wranglers.IntervalIdentifier)

with pytest.raises(ValueError):
wranglers.IntervalIdentifier(**incorrect_length)
wrangler(**incorrect_length)


def test_base_interval_identifier_sort_keyword_exc(interval_ident_kwargs):
def test_base_interval_identifier_sort_keyword_exc(ii_kwargs):

incorrect_keyword = interval_ident_kwargs.copy()
incorrect_keyword = ii_kwargs.copy()
incorrect_keyword["ascending"] = ("wrong keyword", "wrong keyword too")

wrangler = concretize_abstract_wrangler(wranglers.IntervalIdentifier)

with pytest.raises(ValueError):
wranglers.IntervalIdentifier(**incorrect_keyword)
wrangler(**incorrect_keyword)
24 changes: 24 additions & 0 deletions tests/util/test_testing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
"""This module contains tests for testing utilities.
"""

import pytest

from pywrangler.base import BaseWrangler
from pywrangler.util.testing import concretize_abstract_wrangler


def test_concretize_abstract_wrangler():

class Dummy(BaseWrangler):
@property
def computation_engine(self) -> str:
return "engine"

concrete_class = concretize_abstract_wrangler(Dummy)
instance = concrete_class()

assert instance.computation_engine == "engine"

with pytest.raises(NotImplementedError):
instance.preserves_sample_size

0 comments on commit 326a501

Please sign in to comment.