diff --git a/src/sghi/etl/core.py b/src/sghi/etl/core.py index 7b01860..9b1e594 100644 --- a/src/sghi/etl/core.py +++ b/src/sghi/etl/core.py @@ -5,6 +5,8 @@ from abc import ABCMeta, abstractmethod from typing import TYPE_CHECKING, Generic, TypeVar +from typing_extensions import deprecated + from sghi.disposable import Disposable if TYPE_CHECKING: @@ -71,11 +73,11 @@ class Processor(Disposable, Generic[_RDT, _PDT], metaclass=ABCMeta): This class defines a blueprint for processing raw data and converting it into processed data ready for further consumption downstream. Subclasses - implementing this interface should override the :meth:`process` method to + implementing this interface should override the :meth:`apply` method to specify how the data processing occurs. In a typical ETL workflow, the `Transform` phase is functionally equivalent - to the ``process`` method of this class. Accordingly, a ``Processor`` is + to the ``apply`` method of this class. Accordingly, a ``Processor`` is thus executed immediately after the :class:`~sghi.etl.core.Source` finishes in an SGHI ETL workflow. The raw data obtained from the ``Source`` is taken as input. The output of the ``Processor`` is then passed to a @@ -93,17 +95,17 @@ def __call__(self, raw_data: _RDT) -> _PDT: """Transform raw data into processed, clean data and return it. Call this ``Processor`` as a callable. Delegate actual call to - :meth:`process`. + :meth:`apply`. :param raw_data: The unprocessed data drawn from a `Source`. :return: The processed, cleaned data that is ready for further consumption downstream. """ - return self.process(raw_data) + return self.apply(raw_data) @abstractmethod - def process(self, raw_data: _RDT) -> _PDT: + def apply(self, raw_data: _RDT) -> _PDT: """Transform raw data into processed, clean data and return it. :param raw_data: The unprocessed data drawn from a `Source`. @@ -113,6 +115,23 @@ def process(self, raw_data: _RDT) -> _PDT: """ ... + @deprecated('Use "apply" instead. Will be removed in 2.0', stacklevel=1) + def process(self, raw_data: _RDT) -> _PDT: + """Transform raw data into processed, clean data and return it. + + .. warning:: + + This method is deprecated and will be removed in a future + version. Clients of this class should use the :meth:`apply` + method instead, which this method delegates to. + + :param raw_data: The unprocessed data drawn from a `Source`. + + :return: The processed, cleaned data that is ready for further + consumption downstream. + """ + return self.apply(raw_data) + class Sink(Disposable, Generic[_PDT], metaclass=ABCMeta): """An entity that consumes processed data. diff --git a/test/sghi/etl/core_tests.py b/test/sghi/etl/core_tests.py index 60e2c15..cafb038 100644 --- a/test/sghi/etl/core_tests.py +++ b/test/sghi/etl/core_tests.py @@ -6,6 +6,7 @@ from dataclasses import dataclass, field from unittest import TestCase +import pytest from typing_extensions import override from sghi.disposable import not_disposed @@ -46,7 +47,7 @@ class IntsToStrings(Processor[Iterable[int], Iterable[str]]): @not_disposed @override - def process(self, raw_data: Iterable[int]) -> Iterable[str]: + def apply(self, raw_data: Iterable[int]) -> Iterable[str]: yield from map(str, raw_data) @property @@ -101,8 +102,7 @@ def test_invoking_source_as_a_callable_returns_expected_value( In short, ensure that invoking a ``Source`` instance as a callable delegates the actual call to :meth:`~sghi.etl.core.Source.draw`. - """ # noqa: D202, D205 - + """ # noqa: D205 instance1: IntsSupplier instance2: IntsSupplier max_ints: int = 4 @@ -138,10 +138,8 @@ def test_invoking_processor_as_a_callable_returns_expected_value( value when invoked as a callable. In short, ensure that invoking a ``Processor`` instance as a callable - delegates the actual call to - :meth:`~sghi.etl.core.Processor.process`. - """ # noqa: D202, D205 - + delegates the actual call to :meth:`~sghi.etl.core.Processor.apply`. + """ # noqa: D205 raw_values = tuple(self._source()) instance1: IntsToStrings @@ -149,11 +147,46 @@ def test_invoking_processor_as_a_callable_returns_expected_value( with IntsToStrings() as instance1, IntsToStrings() as instance2: assert ( - tuple(instance1.process(raw_values)) + tuple(instance1.apply(raw_values)) == tuple(instance2(raw_values)) == ("0", "1", "2", "3", "4") ) + def test_invoking_the_process_method_returns_expected_value(self) -> None: + """:meth:`~sghi.etl.core.Processor.process` should return the expected + value. + + That is, invoking the ``process`` method of a ``Processor`` instance + should return the same value as invoking the + :meth:`~sghi.etl.core.Processor.apply` method of the same instance. + """ # noqa: D205 + raw_values = tuple(self._source()) + + instance1: IntsToStrings + instance2: IntsToStrings + + with IntsToStrings() as instance1, IntsToStrings() as instance2: + assert ( + tuple(instance1.apply(raw_values)) + == tuple(instance2.process(raw_values)) # type: ignore + == ("0", "1", "2", "3", "4") + ) + + def test_invoking_the_process_method_raises_a_deprecation_waring( + self, + ) -> None: + """:meth:`~sghi.etl.core.Processor.process` is deprecated for removal. + + Ensure that invoking the ``process`` method raised a + ``DeprecationWarning``. + """ + raw_values = tuple(self._source()) + instance: IntsToStrings = IntsToStrings() + with pytest.warns(DeprecationWarning, match='Use "apply" instead'): + instance.process(raw_values) # type: ignore + + instance.dispose() + class TestSink(TestCase): """Tests for the :class:`sghi.etl.core.Processor` interface. @@ -180,8 +213,7 @@ def test_invoking_sink_as_a_callable_returns_expected_value(self) -> None: In short, ensure that invoking a ``Sink`` instance as a callable delegates the actual call to :meth:`~sghi.etl.core.Sink.drain`. - """ # noqa: D202, D205 - + """ # noqa: D205 processed_data: tuple[str, ...] processed_data = tuple(self._processor(self._source()))