In [2]:
%load_ext nb_mypy

Version 1.0.5


# Problem with TypeAliases

In [6]:
from typing import Generic, TypeVar, TypeAlias, Any, final
from abc import abstractmethod

from sagemaker.processing import Processor
from sagemaker.estimator import EstimatorBase

from sagemaker.workflow.steps import ProcessingStep, TrainingStep, CreateModelStep, TransformStep, \
    TuningStep, ConfigurableRetryStep

# todo: decide how stringent this should be while still allowing user to add new step types
StepType = TypeVar("StepType", bound=ConfigurableRetryStep)
StepActor: TypeAlias = Processor |  EstimatorBase  # todo: add more types as needed
ConcreteStep: TypeAlias = ProcessingStep | TrainingStep | CreateModelStep | TransformStep | TuningStep


class StepFactoryInterface(Generic[StepType]):
    # @abstractmethod
    # def __init__(self, step: StepType):
    #     self.step_cls = StepType

    @property
    @abstractmethod
    # Note: This needs to return a *concrete* subclass of ConfigurableRetryStep
    def step_cls(self) -> ConcreteStep:
        ...

    def step_type(self) -> str:
        map = {
            ProcessingStep: 'processor',
        }

        return map[self.step_cls]

    @abstractmethod
    def _instantiate_step_actor(self) -> StepActor:
        """Note: It is consistent with LSP for implementation to return a more *specific* type."""
        ...

    @abstractmethod
    def _construct_run_args(self) -> dict[str, Any]:  # todo: create dataclass for return types
        """Note: It is consistent with LSP for implementation to return a more *specific* type."""
        ...

    def type(self):
        return StepType

    @final
    def create_step(self) -> StepType:
        step_actor: StepActor = self._instantiate_step_actor()
        run_args: dict[str, Any] = self._construct_run_args()
        # Type checker complains because you can't instantiate a ConfigurableRetryStep directly
        # (it's abstract). Ignore for now, as I don't think it's easily possible to specify we're
        # passing a concrete subclass.
        return self.step_cls(
            **{step: step_actor},
            **run_args
        )

ProcessingStepFactoryInterface = StepFactoryInterface[ProcessingStep]
psf = ProcessingStepFactoryInterface()

<cell>32: [1m[31merror:[m Invalid index type [m[1m"ProcessingStep | TrainingStep | CreateModelStep | TransformStep | TuningStep"[m for [m[1m"dict[type[ProcessingStep], str]"[m; expected type [m[1m"type[ProcessingStep]"[m  [m[33m[index][m
<cell>54: [1m[31merror:[m [m[1m"ProcessingStep"[m not callable  [m[33m[operator][m
<cell>54: [1m[31merror:[m [m[1m"TrainingStep"[m not callable  [m[33m[operator][m
<cell>54: [1m[31merror:[m [m[1m"CreateModelStep"[m not callable  [m[33m[operator][m
<cell>54: [1m[31merror:[m [m[1m"TransformStep"[m not callable  [m[33m[operator][m
<cell>54: [1m[31merror:[m [m[1m"TuningStep"[m not callable  [m[33m[operator][m
<cell>55: [1m[31merror:[m Name [m[1m"step"[m is not defined  [m[33m[name-defined][m
<cell>60: [1m[31merror:[m Cannot instantiate abstract class [m[1m"BaseStepFactoryInterface"[m with abstract attributes [m[1m"_construct_run_args"[m, [m[1m"_instantiate_step_actor"[m and [

# Trying virtual subclasses


In [13]:
from typing import Generic, TypeVar, TypeAlias, Any, final
from abc import ABC, abstractmethod

from sagemaker.processing import Processor
from sagemaker.estimator import EstimatorBase

from sagemaker.workflow.steps import ProcessingStep, TrainingStep, CreateModelStep, TransformStep, \
    TuningStep, ConfigurableRetryStep

StepActor: TypeAlias = Processor |  EstimatorBase  # todo: add more types as needed
class ConcreteStep(ABC):
    ...

ConcreteStep.register(ProcessingStep)
ConcreteStep.register(TrainingStep)
ConcreteStep.register(CreateModelStep)
ConcreteStep.register(TransformStep)
ConcreteStep.register(TuningStep)

# todo: decide how stringent this should be while still allowing user to add new step types
StepType = TypeVar("StepType", bound=ConcreteStep)
class StepFactoryInterface(Generic[StepType]):
    @abstractmethod
    def _instantiate_step_actor(self) -> StepActor:
        """Note: It is consistent with LSP for implementation to return a more *specific* type."""
        ...

    @abstractmethod
    def construct_run_args(self) -> dict[str, Any]:  # todo: create dataclass for return types
        """Note: It is consistent with LSP for implementation to return a more *specific* type."""
        # Changed: needs to include stepactor, e.g. processor=processor
        ...

    @final
    def create_step(self) -> StepType:
        step_actor: StepActor = self._instantiate_step_actor()
        run_args: dict[str, Any] = self.construct_run_args()
        # Type checker complains because you can't instantiate a ConfigurableRetryStep directly
        # (it's abstract). Ignore for now, as I don't think it's easily possible to specify we're
        # passing a concrete subclass.
        return StepType(
            **run_args
        )

ProcessingStepFactoryInterface = StepFactoryInterface[ProcessingStep]
# psf = ProcessingStepFactoryInterface()

<cell>41: [1m[31merror:[m [m[1m"StepType"[m is a type variable and only valid in type context  [m[33m[misc][m
<cell>45: [1m[31merror:[m Type argument [m[1m"ProcessingStep"[m of [m[1m"BaseStepFactoryInterface"[m must be a subtype of [m[1m"ConcreteStep"[m  [m[33m[type-var][m
<cell>45: [1m[31merror:[m Value of type variable [m[1m"StepType"[m of [m[1m"BaseStepFactoryInterface"[m cannot be [m[1m"ProcessingStep"[m  [m[33m[type-var][m
<cell>46: [1m[31merror:[m Value of type variable [m[1m"StepType"[m of [m[1m"BaseStepFactoryInterface"[m cannot be [m[1m"ProcessingStep"[m  [m[33m[type-var][m
<cell>46: [1m[31merror:[m Cannot instantiate abstract class [m[1m"BaseStepFactoryInterface"[m with abstract attributes [m[1m"_instantiate_step_actor"[m and [m[1m"construct_run_args"[m  [m[33m[abstract][m


# Only create basic interface

In [16]:
from typing import Generic, TypeVar, TypeAlias, Any, final
from abc import abstractmethod

from sagemaker.processing import Processor
from sagemaker.estimator import EstimatorBase

from sagemaker.workflow.steps import ProcessingStep, TrainingStep, CreateModelStep, TransformStep, \
    TuningStep, ConfigurableRetryStep

# todo: decide how stringent this should be while still allowing user to add new step types
StepType = TypeVar("StepType", bound=ConfigurableRetryStep)


class StepFactoryInterface(Generic[StepType]):
    @abstractmethod
    def create_step(self) -> StepType:
        ...

ProcessingStepFactoryInterface = StepFactoryInterface[ProcessingStep]


In [17]:
from sagemaker.workflow.steps import ProcessingStep


class FrameworkProcessorFactory(ProcessingStepFactoryInterface):
    def create_step(self) -> ProcessingStep:
        ...

<cell>5: [1m[31merror:[m Missing return statement  [m[33m[empty-body][m
