diff --git a/mlrun/frameworks/_common/__init__.py b/mlrun/frameworks/_common/__init__.py index 93b2866920b..4ea71b6c01a 100644 --- a/mlrun/frameworks/_common/__init__.py +++ b/mlrun/frameworks/_common/__init__.py @@ -1,6 +1,7 @@ # flake8: noqa - this is until we take care of the F401 violations with respect to __all__ & sphinx -from .artifacts_library import ArtifactsLibrary, get_plans -from .mlrun_interface import MLRunInterface, RestorationInformation +from .artifacts_library import ArtifactsLibrary +from .mlrun_interface import MLRunInterface from .model_handler import ModelHandler, with_mlrun_interface, without_mlrun_interface from .plan import Plan -from .utils import ExtraDataType, IOSampleType, ModelType, PathType, TrackableType +from .producer import Producer +from .utils import CommonTypes, CommonUtils, LoggingMode diff --git a/mlrun/frameworks/_common/artifacts_library.py b/mlrun/frameworks/_common/artifacts_library.py index 25d5fd73dd4..c8efa1aa7ec 100644 --- a/mlrun/frameworks/_common/artifacts_library.py +++ b/mlrun/frameworks/_common/artifacts_library.py @@ -18,80 +18,165 @@ class 'init_artifact' class method: some_artifact = SomeArtifactPlan """ + # A constant name for the context parameter to use for passing a plans configuration: + CONTEXT_PARAMETER = "_artifacts" + + # TODO: Finish support for custom plans. @classmethod - def from_dict(cls, plans_dictionary: Dict[str, dict]) -> List[Plan]: + def get_plans( + cls, + artifacts: Union[List[Plan], Dict[str, dict], List[str]] = None, + context: mlrun.MLClientCtx = None, + include_default: bool = True, + # custom_plans: dict = None, :param custom_plans: Custom user plans objects to initialize from. + **default_kwargs, + ) -> List[Plan]: + """ + Get plans for a run. The plans will be taken from the provided artifacts / configuration via code, from provided + configuration via MLRun context and if the 'include_default' is True, from the framework artifact library's + defaults. + + :param artifacts: The artifacts parameter passed to the function. Can be passed as a configuration + dictionary or an initialized plans list that will simply be returned. + :param context: A context to look in if the configuration was passed as a parameter. + :param include_default: Whether to include the default in addition to the provided plans. Defaulted to True. + :param default_kwargs: Additional key word arguments to pass to the 'default' method of the given artifact + library class. + + :return: The plans list. + + :raise MLRunInvalidArgumentError: If the plans were not passed in a list or a dictionary. + """ + # Generate the available plans dictionary: + available_plans = cls._get_library_plans() + # if custom_plans is not None: + # available_plans = {**available_plans, **custom_plans} + + # Initialize the plans list: + parsed_plans = [] # type: List[Plan] + + # Get the user input plans: + artifacts_from_context = None + if context is not None: + artifacts_from_context = context.parameters.get(cls.CONTEXT_PARAMETER, None) + for user_input in [artifacts, artifacts_from_context]: + if user_input is not None: + if isinstance(user_input, dict): + parsed_plans += cls._from_dict( + requested_plans=user_input, available_plans=available_plans + ) + elif isinstance(user_input, list): + parsed_plans += cls._from_list( + requested_plans=user_input, available_plans=available_plans + ) + else: + raise mlrun.errors.MLRunInvalidArgumentError( + f"Artifacts plans are expected to be given in a list or a dictionary, " + f"got: '{type(user_input)}'." + ) + + # Get the library's default: + if include_default: + parsed_plans += cls.default(**default_kwargs) + + return parsed_plans + + @classmethod + @abstractmethod + def default(cls, **kwargs) -> List[Plan]: + """ + Get the default artifacts plans list of this framework's library. + + :return: The default artifacts plans list. + """ + pass + + @classmethod + def _get_library_plans(cls) -> Dict[str, Type[Plan]]: + """ + Get all the supported plans in this library. + + :return: The library's plans. + """ + return { # type: Dict[str, Type[Plan]] + plan_name: plan_class + for plan_name, plan_class in cls.__dict__.items() + if isinstance(plan_class, type) and not plan_name.startswith("_") + } + + @staticmethod + def _from_dict( + requested_plans: Dict[str, dict], available_plans: Dict[str, Type[Plan]] + ) -> List[Plan]: """ Initialize a list of plans from a given configuration dictionary. The configuration is expected to be a dictionary of plans and their initialization parameters in the following format: + { PLAN_NAME: { PARAMETER_NAME: PARAMETER_VALUE, - ... }, - ... } - :param plans_dictionary: The configurations of plans. + :param requested_plans: The configurations of plans to initialize. + :param available_plans: The available plans to initialize from. :return: The initialized plans list. :raise MLRunInvalidArgumentError: If the configuration was incorrect due to unsupported plan or miss use of parameters in the plan initializer. """ - # Get all of the supported plans in this library: - library_plans = cls._get_plans() - - # Go through the given configuration an initialize the plans accordingly: + # Go through the given configuration and initialize the plans accordingly: plans = [] # type: List[Plan] - for plan_name, plan_parameters in plans_dictionary.items(): + for plan_name, plan_parameters in requested_plans.items(): # Validate the plan is in the library: - if plan_name not in library_plans: + if plan_name not in available_plans: raise mlrun.errors.MLRunInvalidArgumentError( - f"The given artifact '{plan_name}' is not supported in this artifacts library. The supported" - f"artifacts are: {list(library_plans.keys())}." + f"The given artifact '{plan_name}' is not known in this artifacts library. The known artifacts " + f"are: {list(available_plans.keys())}." ) # Try to create the plan with the given parameters: try: - plans.append(library_plans[plan_name](**plan_parameters)) + plans.append(available_plans[plan_name](**plan_parameters)) except TypeError as error: - # A TypeError was raised, that means there was a miss use of parameters in the plan's '__init__' method: + # A TypeError was raised, that means there was a misuse of parameters in the plan's '__init__' method: raise mlrun.MLRunInvalidArgumentError( - f"The following artifact: '{plan_name}' cannot be parsed due to miss use of parameters: {error}" + f"The following artifact: '{plan_name}' cannot be parsed due to misuse of parameters: {error}" ) return plans - @classmethod - def from_list(cls, plans_list: List[str]): + @staticmethod + def _from_list( + requested_plans: List[str], available_plans: Dict[str, Type[Plan]] + ) -> List[Plan]: """ Initialize a list of plans from a given configuration list. The configuration is expected to be a list of plans names to be initialized with their default configuration. - :param plans_list: The list of plans names to initialize. + :param requested_plans: The plans to initialize. + :param available_plans: The available plans to initialize from. :return: The initialized plans list. :raise MLRunInvalidArgumentError: If the configuration was incorrect due to unsupported plan. """ - # Get all of the supported plans in this library: - library_plans = cls._get_plans() - - # Go through the given configuration an initialize the plans accordingly: + # Go through the given configuration and initialize the plans accordingly: plans = [] # type: List[Plan] - for plan in plans_list: + for plan in requested_plans: # Initialized plan: if isinstance(plan, Plan): plans.append(plan) # Plan name that needed to be parsed: elif isinstance(plan, str): # Validate the plan is in the library: - if plan not in library_plans: + if plan not in available_plans: raise mlrun.errors.MLRunInvalidArgumentError( - f"The given artifact '{plan}' is not supported in this artifacts library. The supported" - f"artifacts are: {list(library_plans.keys())}." + f"The given artifact '{plan}' is not known in this artifacts library. The known artifacts " + f"are: {list(available_plans.keys())}." ) # Create the plan and collect it: - plans.append(library_plans[plan]()) + plans.append(available_plans[plan]()) # Unsupported type: else: raise mlrun.errors.MLRunInvalidArgumentError( @@ -100,81 +185,3 @@ def from_list(cls, plans_list: List[str]): ) return plans - - @classmethod - @abstractmethod - def default(cls, **kwargs) -> List[Plan]: - """ - Get the default artifacts plans list of this framework's library. - - :return: The default artifacts plans list. - """ - pass - - @classmethod - def _get_plans(cls) -> Dict[str, Type[Plan]]: - """ - Get all of the supported plans in this library. - - :return: The library's plans. - """ - return { # type: Dict[str, Type[Plan]] - plan_name: plan_class - for plan_name, plan_class in cls.__dict__.items() - if isinstance(plan_class, type) and not plan_name.startswith("_") - } - - -# A constant name for the context parameter to use for passing a plans configuration: -ARTIFACTS_CONTEXT_PARAMETER = "_artifacts" - - -def get_plans( - artifacts_library: Type[ArtifactsLibrary], - artifacts: Union[List[Plan], Dict[str, dict], List[str]] = None, - context: mlrun.MLClientCtx = None, - include_default: bool = True, - **default_kwargs, -) -> List[Plan]: - """ - Get plans for a run. The plans will be taken from the provided artifacts / configuration via code, from provided - configuration via MLRun context and if the 'include_default' is True, from the framework artifact library's - defaults. - - :param artifacts_library: The framework's artifacts library class to get its defaults. - :param artifacts: The artifacts parameter passed to the function. Can be passed as a configuration - dictionary or an initialized plans list that will simply be returned. - :param context: A context to look in if the configuration was passed as a parameter. - :param include_default: Whether to include the default in addition to the provided plans. Defaulted to True. - :param default_kwargs: Additional key word arguments to pass to the 'default' method of the given artifact - library class. - - :return: The plans list. - - :raise MLRunInvalidArgumentError: If the plans were not passed in a list or a dictionary. - """ - # Setup the plans list: - parsed_plans = [] # type: List[Plan] - - # Get the user input plans: - artifacts_from_context = None - if context is not None: - artifacts_from_context = context.parameters.get( - ARTIFACTS_CONTEXT_PARAMETER, None - ) - for user_input in [artifacts, artifacts_from_context]: - if user_input is not None: - if isinstance(user_input, dict): - parsed_plans += artifacts_library.from_dict(plans_dictionary=user_input) - elif isinstance(user_input, list): - parsed_plans += artifacts_library.from_list(plans_list=user_input) - else: - raise mlrun.errors.MLRunInvalidArgumentError( - f"Artifacts plans are expected to be given in a list or a dictionary, got: '{type(user_input)}'." - ) - - # Get the library's default: - if include_default: - parsed_plans += artifacts_library.default(**default_kwargs) - - return parsed_plans diff --git a/mlrun/frameworks/_common/mlrun_interface.py b/mlrun/frameworks/_common/mlrun_interface.py index 69a94585dad..0a5369c3b65 100644 --- a/mlrun/frameworks/_common/mlrun_interface.py +++ b/mlrun/frameworks/_common/mlrun_interface.py @@ -2,19 +2,13 @@ import functools import inspect from abc import ABC -from types import MethodType -from typing import Any, Dict, Generic, List, Tuple +from types import FunctionType, MethodType +from typing import Any, Dict, Generic, List, Tuple, Union -from .utils import MLRunInterfaceableType +from .utils import CommonTypes -RestorationInformation = Tuple[ - Dict[str, Any], # Interface properties. - Dict[str, Any], # Replaced properties. - List[str], # Replaced methods and functions. -] - -class MLRunInterface(ABC, Generic[MLRunInterfaceableType]): +class MLRunInterface(ABC, Generic[CommonTypes.MLRunInterfaceableType]): """ An abstract class for enriching an object interface with the properties, methods and functions written below. @@ -23,8 +17,8 @@ class MLRunInterface(ABC, Generic[MLRunInterfaceableType]): In order to replace object's attributes, the attributes to replace are needed to be added to the attributes: '_REPLACED_PROPERTIES', '_REPLACED_METHODS' and '_REPLACED_FUNCTIONS'. The original attribute will be kept in a - backup attribute with the prefix noted in '_ORIGINAL_ATTRIBUTE_NAME'. Replacing functions / methods will be the one - located by looking for the prefix noted in '_REPLACING_ATTRIBUTE_NAME'. The replacing function / method can be a + backup attribute with the prefix noted in '_ORIGINAL_ATTRIBUTE_NAME'. The replacing functions / methods will be + located by looking for the prefix noted in '_REPLACING_ATTRIBUTE_NAME'. The replacing function / method can be an MLRunInterface class method that return a function / method. For example: if "x" is in the list then the method "object.x" will be stored as "object.original_x" and "object.x" @@ -42,6 +36,8 @@ class MLRunInterface(ABC, Generic[MLRunInterfaceableType]): _REPLACED_METHODS = [] # type: List[str] _REPLACED_FUNCTIONS = [] # type: List[str] + # TODO: Add _OPTIONALLY_REPLACED_PROPERTIES, _OPTIONALLY_REPLACED_METHODS and _OPTIONALLY_REPLACED_FUNCTIONS + # Name template for the replaced attribute to be stored as in the object. _ORIGINAL_ATTRIBUTE_NAME = "original_{}" @@ -51,38 +47,40 @@ class MLRunInterface(ABC, Generic[MLRunInterfaceableType]): @classmethod def add_interface( cls, - obj: MLRunInterfaceableType, - restoration_information: RestorationInformation = None, + obj: CommonTypes.MLRunInterfaceableType, + restoration: CommonTypes.MLRunInterfaceRestorationType = None, ): """ Enrich the object with this interface properties, methods and functions so it will have this framework MLRun's features. - :param obj: The object to enrich his interface. - :param restoration_information: Restoration information tuple as returned from 'remove_interface' in order to - add the interface in a certain state. + :param obj: The object to enrich his interface. + :param restoration: Restoration information tuple as returned from 'remove_interface' in order to add the + interface in a certain state. """ # Set default value to the restoration data: - if restoration_information is None: - restoration_information = (None, None, None) + if restoration is None: + restoration = (None, None, None) # Add the MLRun properties: cls._insert_properties( obj=obj, - properties=restoration_information[0], + properties=restoration[0], ) # Replace the object's properties in MLRun's properties: - cls._replace_properties(obj=obj, properties=restoration_information[1]) + cls._replace_properties(obj=obj, properties=restoration[1]) # Add the MLRun functions: cls._insert_functions(obj=obj) # Replace the object's functions / methods in MLRun's functions / methods: - cls._replace_functions(obj=obj, functions=restoration_information[2]) + cls._replace_functions(obj=obj, functions=restoration[2]) @classmethod - def remove_interface(cls, obj: MLRunInterfaceableType) -> RestorationInformation: + def remove_interface( + cls, obj: CommonTypes.MLRunInterfaceableType + ) -> CommonTypes.MLRunInterfaceRestorationType: """ Remove the MLRun features from the given object. The properties and replaced attributes found in the object will be returned. @@ -134,7 +132,7 @@ def remove_interface(cls, obj: MLRunInterfaceableType) -> RestorationInformation return properties, replaced_properties, replaced_functions @classmethod - def is_applied(cls, obj: MLRunInterfaceableType) -> bool: + def is_applied(cls, obj: CommonTypes.MLRunInterfaceableType) -> bool: """ Check if the given object has MLRun interface attributes in it. Interface is applied if all of its attributes are found in the object. If only replaced attributes are configured in the interface, then the interface is @@ -160,7 +158,7 @@ def is_applied(cls, obj: MLRunInterfaceableType) -> bool: @classmethod def _insert_properties( cls, - obj: MLRunInterfaceableType, + obj: CommonTypes.MLRunInterfaceableType, properties: Dict[str, Any] = None, ): """ @@ -201,7 +199,7 @@ def _insert_properties( setattr(obj, property_name, value) @classmethod - def _insert_functions(cls, obj: MLRunInterfaceableType): + def _insert_functions(cls, obj: CommonTypes.MLRunInterfaceableType): """ Insert the functions / methods of the interface to the object. @@ -226,7 +224,7 @@ def _insert_functions(cls, obj: MLRunInterfaceableType): @classmethod def _replace_properties( - cls, obj: MLRunInterfaceableType, properties: Dict[str, Any] = None + cls, obj: CommonTypes.MLRunInterfaceableType, properties: Dict[str, Any] = None ): """ Replace the properties of the given object according to the configuration in the MLRun interface. @@ -270,7 +268,7 @@ def _replace_properties( @classmethod def _replace_functions( - cls, obj: MLRunInterfaceableType, functions: List[str] = None + cls, obj: CommonTypes.MLRunInterfaceableType, functions: List[str] = None ): """ Replace the functions / methods of the given object according to the configuration in the MLRun interface. @@ -309,7 +307,7 @@ def _replace_functions( @classmethod def _replace_property( cls, - obj: MLRunInterfaceableType, + obj: CommonTypes.MLRunInterfaceableType, property_name: str, property_value: Any = None, include_none: bool = False, @@ -340,7 +338,9 @@ def _replace_property( setattr(obj, property_name, property_value) @classmethod - def _replace_function(cls, obj: MLRunInterfaceableType, function_name: str): + def _replace_function( + cls, obj: CommonTypes.MLRunInterfaceableType, function_name: str + ): """ Replace the method / function in the object with the configured method / function in this interface. The original method / function will be stored in a backup attribute with the prefix noted in @@ -378,7 +378,9 @@ def _replace_function(cls, obj: MLRunInterfaceableType, function_name: str): setattr(obj, function_name, replacing_function) @classmethod - def _restore_attribute(cls, obj: MLRunInterfaceableType, attribute_name: str): + def _restore_attribute( + cls, obj: CommonTypes.MLRunInterfaceableType, attribute_name: str + ): """ Restore the replaced attribute (property, method or function) in the object, removing the backup attribute as well. @@ -396,3 +398,56 @@ def _restore_attribute(cls, obj: MLRunInterfaceableType, attribute_name: str): # Remove the original backup attribute: setattr(obj, original_attribute_name, None) delattr(obj, original_attribute_name) + + @staticmethod + def _get_function_argument( + func: FunctionType, + argument_name: str, + passed_args: tuple = None, + passed_kwargs: dict = None, + default_value: Any = None, + ) -> Tuple[Any, Union[str, int, None]]: + """ + Get a passed argument (from *args or **kwargs) to a function. If the argument was not found the default value + will be returned. In addition, the keyword of the argument in `kwargs` or the index of the argument in `args` + will be returned as well. + + :param func: The function that is being called. + :param argument_name: The argument name to get. + :param passed_args: The passed arguments to the function (*args). + :param passed_kwargs: The passed keyword arguments to the function (*kwargs). + :param default_value: The default value to use in case it was not passed. + + :return: A tuple of: + [0] = The argument value or the default value if it was not found in any of the arguments. + [1] = If it was found in `kwargs` - the keyword of the argument. If it was found in `args` - the index + of the argument. If it was not found, None. + """ + # Set default values for arguments data structures: + if passed_args is None: + passed_args = [] + if passed_kwargs is None: + passed_kwargs = {} + + # Check in the key word arguments first: + if argument_name in passed_kwargs: + return passed_kwargs[argument_name], argument_name + + # Check in the arguments, inspecting the function's parameters to get the right index: + func_parameters = { + parameter_name: i + for i, parameter_name in enumerate( + inspect.signature(func).parameters.keys() + ) + } + if ( + argument_name in func_parameters + and len(passed_args) >= func_parameters[argument_name] + 1 + ): + return ( + passed_args[func_parameters[argument_name]], + func_parameters[argument_name], + ) + + # The argument name was not found: + return default_value, None diff --git a/mlrun/frameworks/_common/model_handler.py b/mlrun/frameworks/_common/model_handler.py index 64cd58ac78d..9df8d7372dd 100644 --- a/mlrun/frameworks/_common/model_handler.py +++ b/mlrun/frameworks/_common/model_handler.py @@ -12,20 +12,20 @@ import mlrun from mlrun.artifacts import Artifact, ModelArtifact -from mlrun.data_types import ValueType from mlrun.execution import MLClientCtx from mlrun.features import Feature from .mlrun_interface import MLRunInterface -from .utils import ExtraDataType, IOSampleType, ModelType, PathType +from .utils import CommonTypes, CommonUtils -class ModelHandler(ABC, Generic[ModelType, IOSampleType]): +class ModelHandler(ABC, Generic[CommonTypes.ModelType, CommonTypes.IOSampleType]): """ - An abstract interface for handling a model of the supported frameworks. + An abstract interface for handling a model of the supported frameworks. The handler will support loading, saving + and logging a model with all the required modules, custom objects and collected information about it. """ - # Framework name: + # Framework name (Must be set when inheriting the class): FRAMEWORK_NAME = None # type: str # Constant artifact names: @@ -39,18 +39,22 @@ class ModelHandler(ABC, Generic[ModelType, IOSampleType]): def __init__( self, - model: ModelType = None, - model_path: PathType = None, + model: CommonTypes.ModelType = None, + model_path: CommonTypes.PathType = None, model_name: str = None, - modules_map: Union[Dict[str, Union[None, str, List[str]]], PathType] = None, - custom_objects_map: Union[Dict[str, Union[str, List[str]]], PathType] = None, - custom_objects_directory: PathType = None, + modules_map: Union[ + Dict[str, Union[None, str, List[str]]], CommonTypes.PathType + ] = None, + custom_objects_map: Union[ + Dict[str, Union[str, List[str]]], CommonTypes.PathType + ] = None, + custom_objects_directory: CommonTypes.PathType = None, context: MLClientCtx = None, **kwargs, ): """ - Initialize the handler. The model can be set here so it won't require loading. Note you must provide at least - one of 'model' and 'model_path'. If a model is not given, the files in the model path will be collected + Initialize the handler. The model can be set here, so it won't require loading. Note you must provide at least + one of `model` and `model_path`. If a model is not given, the files in the model path will be collected automatically to be ready for loading. :param model: Model to handle or None in case a loading parameters were supplied. @@ -58,20 +62,24 @@ def __init__( path in the following format: 'store://models//:' :param model_name: The model name for saving and logging the model: + * Mandatory for loading the model from a local path. * If given a logged model (store model path) it will be read from the artifact. * If given a loaded model object and the model name is None, the name will be set to the model's object name / class. + :param modules_map: A dictionary of all the modules required for loading the model. Each key is a path to a module and its value is the object name to import from it. All the modules will be imported globally. If multiple objects needed to be imported from the same module a list can be given. The map can be passed as a path to a json file as well. For example: + { "module1": None, # => import module1 "module2": ["func1", "func2"], # => from module2 import func1, func2 "module3.sub_module": "func3", # => from module3.sub_module import func3 } + If the model path given is of a store object, the modules map will be read from the logged modules map artifact of the model. :param custom_objects_map: A dictionary of all the custom objects required for loading the model. Each key @@ -79,10 +87,12 @@ def __init__( from it. If multiple objects needed to be imported from the same py file a list can be given. The map can be passed as a path to a json file as well. For example: + { "/.../custom_optimizer.py": "optimizer", "/.../custom_layers.py": ["layer1", "layer2"] } + All the paths will be accessed from the given 'custom_objects_directory', meaning each py file will be read from 'custom_objects_directory/'. If the model path given is of a store object, the custom objects map will be @@ -135,7 +145,7 @@ def __init__( # If the model path is of a store model object, this will be the extra data as DataItems ready to be downloaded. self._extra_data = kwargs.get( "extra_data", {} - ) # type: Dict[str, ExtraDataType] + ) # type: Dict[str, CommonTypes.ExtraDataType] # If the model key is passed, override the default: self._model_key = kwargs.get("model_key", "model") @@ -146,6 +156,7 @@ def __init__( self._outputs = None # type: List[Feature] self._labels = {} # type: Dict[str, Union[str, int, float]] self._parameters = {} # type: Dict[str, Union[str, int, float]] + self._metrics = {} # type: Dict[str, float] self._registered_artifacts = {} # type: Dict[str, Artifact] # Set a flag to know if the user logged the model so its artifact is cached: @@ -165,7 +176,7 @@ def model_name(self) -> str: return self._model_name @property - def model(self) -> ModelType: + def model(self) -> CommonTypes.ModelType: """ Get the handled model. Will return None in case the model is not initialized. @@ -237,7 +248,9 @@ def parameters(self) -> Dict[str, str]: """ return self._parameters - def get_artifacts(self, committed_only: bool = False) -> Dict[str, ExtraDataType]: + def get_artifacts( + self, committed_only: bool = False + ) -> Dict[str, CommonTypes.ExtraDataType]: """ Get the registered artifacts of this model's artifact. By default all the artifacts (logged and to be logged - committed only) will be returned. To get only the artifacts registered in the current run whom are committed and @@ -279,7 +292,10 @@ def set_tag(self, tag: str): self._tag = tag def set_inputs( - self, from_sample: IOSampleType = None, features: List[Feature] = None, **kwargs + self, + from_sample: CommonTypes.IOSampleType = None, + features: List[Feature] = None, + **kwargs, ): """ Read the inputs property of this model to be logged along with it. The inputs can be set directly by passing the @@ -305,7 +321,10 @@ def set_inputs( ) def set_outputs( - self, from_sample: IOSampleType = None, features: List[Feature] = None, **kwargs + self, + from_sample: CommonTypes.IOSampleType = None, + features: List[Feature] = None, + **kwargs, ): """ Read the outputs property of this model to be logged along with it. The outputs can be set directly by passing @@ -370,9 +389,29 @@ def set_parameters( for label in to_remove: self._parameters.pop(label) + def set_metrics( + self, + to_add: Dict[str, CommonTypes.ExtraDataType] = None, + to_remove: List[str] = None, + ): + """ + Update the metrics dictionary of this model artifact. + + :param to_add: The metrics to add. + :param to_remove: A list of metrics keys to remove. + """ + # Update the extra data: + if to_add is not None: + self._metrics.update(to_add) + + # Remove extra data: + if to_remove is not None: + for label in to_remove: + self._metrics.pop(label) + def set_extra_data( self, - to_add: Dict[str, ExtraDataType] = None, + to_add: Dict[str, CommonTypes.ExtraDataType] = None, to_remove: List[str] = None, ): """ @@ -388,13 +427,13 @@ def set_extra_data( # Remove extra data: if to_remove is not None: for label in to_remove: - self._parameters.pop(label) + self._extra_data.pop(label) def register_artifacts( self, artifacts: Union[Artifact, List[Artifact], Dict[str, Artifact]] ): """ - Register the given artifacts so they will be logged as extra data with the model of this handler. Notice: The + Register the given artifacts, so they will be logged as extra data with the model of this handler. Notice: The artifacts will be logged only when either 'log' or 'update' are called. :param artifacts: The artifacts to register. Can be passed as a single artifact, a list of artifacts or an @@ -414,7 +453,7 @@ def register_artifacts( @abstractmethod def save( - self, output_path: PathType = None, **kwargs + self, output_path: CommonTypes.PathType = None, **kwargs ) -> Union[Dict[str, Artifact], None]: """ Save the handled model at the given output path. @@ -480,7 +519,7 @@ def log( outputs: List[Feature] = None, metrics: Dict[str, Union[int, float]] = None, artifacts: Dict[str, Artifact] = None, - extra_data: Dict[str, ExtraDataType] = None, + extra_data: Dict[str, CommonTypes.ExtraDataType] = None, **kwargs, ): """ @@ -533,9 +572,10 @@ def log( if tag != "": self.set_tag(tag=tag) - # Update labels and parameters: + # Update labels, parameters and metrics: self.set_labels(to_add=labels) self.set_parameters(to_add=parameters) + self.set_metrics(to_add=metrics) # Update the extra data: self._extra_data = { @@ -560,7 +600,7 @@ def log( framework=self.FRAMEWORK_NAME, labels=self._labels, parameters=self._parameters, - metrics=metrics, + metrics=self._metrics, extra_data={ k: v for k, v in self._extra_data.items() @@ -568,7 +608,7 @@ def log( }, algorithm=kwargs.get("algorithm", None), training_set=kwargs.get("sample_set", None), - label_column=kwargs.get("y_columns", None), + label_column=kwargs.get("target_columns", None), feature_vector=kwargs.get("feature_vector", None), feature_weights=kwargs.get("feature_weights", None), ) @@ -584,7 +624,7 @@ def update( outputs: List[Feature] = None, metrics: Dict[str, Union[int, float]] = None, artifacts: Dict[str, Artifact] = None, - extra_data: Dict[str, ExtraDataType] = None, + extra_data: Dict[str, CommonTypes.ExtraDataType] = None, **kwargs, ): """ @@ -666,97 +706,6 @@ def update( self._model_artifact ) # Update the cached model to the database. - @staticmethod - def convert_value_type_to_np_dtype( - value_type: str, - ) -> np.dtype: # TODO: Move to utils - """ - Get the 'tensorflow.DType' equivalent to the given MLRun value type. - - :param value_type: The MLRun value type to convert to numpy data type. - - :return: The 'numpy.dtype' equivalent to the given MLRun data type. - - :raise MLRunInvalidArgumentError: If numpy is not supporting the given data type. - """ - # Initialize the mlrun to numpy data type conversion map: - conversion_map = { - ValueType.BOOL: np.bool, - ValueType.INT8: np.int8, - ValueType.INT16: np.int16, - ValueType.INT32: np.int32, - ValueType.INT64: np.int64, - ValueType.UINT8: np.uint8, - ValueType.UINT16: np.uint16, - ValueType.UINT32: np.uint32, - ValueType.UINT64: np.uint64, - ValueType.FLOAT16: np.float16, - ValueType.FLOAT: np.float32, - ValueType.DOUBLE: np.float64, - } - - # Convert and return: - if value_type in conversion_map: - return conversion_map[value_type] - raise mlrun.errors.MLRunInvalidArgumentError( - f"The ValueType given is not supported in numpy: '{value_type}'." - ) - - @staticmethod - def convert_np_dtype_to_value_type( - np_dtype: Union[np.dtype, type, str] - ) -> str: # TODO: Move to utils - """ - Convert the given numpy data type to MLRun value type. It is better to use explicit bit namings (for example: - instead of using 'np.double', use 'np.float64'). - - :param np_dtype: The numpy data type to convert to MLRun's value type. Expected to be a 'numpy.dtype', 'type' or - 'str'. - - :return: The MLRun value type converted from the given data type. - - :raise MLRunInvalidArgumentError: If the numpy data type is not supported by MLRun. - """ - # Initialize the numpy to mlrun data type conversion map: - conversion_map = { - np.bool.__name__: ValueType.BOOL, - np.byte.__name__: ValueType.INT8, - np.int8.__name__: ValueType.INT8, - np.short.__name__: ValueType.INT16, - np.int16.__name__: ValueType.INT16, - np.int32.__name__: ValueType.INT32, - np.int.__name__: ValueType.INT64, - np.long.__name__: ValueType.INT64, - np.int64.__name__: ValueType.INT64, - np.ubyte.__name__: ValueType.UINT8, - np.uint8.__name__: ValueType.UINT8, - np.ushort.__name__: ValueType.UINT16, - np.uint16.__name__: ValueType.UINT16, - np.uint32.__name__: ValueType.UINT32, - np.uint.__name__: ValueType.UINT64, - np.uint64.__name__: ValueType.UINT64, - np.half.__name__: ValueType.FLOAT16, - np.float16.__name__: ValueType.FLOAT16, - np.single.__name__: ValueType.FLOAT, - np.float32.__name__: ValueType.FLOAT, - np.double.__name__: ValueType.DOUBLE, - np.float.__name__: ValueType.DOUBLE, - np.float64.__name__: ValueType.DOUBLE, - } - - # Parse the given numpy data type to string: - if isinstance(np_dtype, np.dtype): - np_dtype = np_dtype.name - elif isinstance(np_dtype, type): - np_dtype = np_dtype.__name__ - - # Convert and return: - if np_dtype in conversion_map: - return conversion_map[np_dtype] - raise mlrun.errors.MLRunInvalidArgumentError( - f"MLRun value type is not supporting the given numpy data type: '{np_dtype}'." - ) - def _collect_files_from_store_object(self): """ If the model path given is of a store object, collect the needed model files into this handler for later loading @@ -1041,7 +990,7 @@ def _log_custom_objects(self) -> Dict[str, Artifact]: def _read_io_samples( self, - samples: Union[IOSampleType, List[IOSampleType]], + samples: Union[CommonTypes.IOSampleType, List[CommonTypes.IOSampleType]], ) -> List[Feature]: """ Read the given inputs / output sample to / from the model into a list of MLRun Features (ports) to log in @@ -1057,7 +1006,7 @@ def _read_io_samples( return [self._read_sample(sample=sample) for sample in samples] - def _read_sample(self, sample: IOSampleType) -> Feature: + def _read_sample(self, sample: CommonTypes.IOSampleType) -> Feature: """ Read the sample into a MLRun Feature. This abstract class is reading samples of 'numpy.ndarray'. For further types of samples, please inherit this method. @@ -1071,7 +1020,9 @@ def _read_sample(self, sample: IOSampleType) -> Feature: # Supported types: if isinstance(sample, np.ndarray): return Feature( - value_type=self.convert_np_dtype_to_value_type(np_dtype=sample.dtype), + value_type=CommonUtils.convert_np_dtype_to_value_type( + np_dtype=sample.dtype + ), dims=list(sample.shape), ) @@ -1082,7 +1033,7 @@ def _read_sample(self, sample: IOSampleType) -> Feature: ) @staticmethod - def _validate_model_parameters(model_path: str, model: ModelType): + def _validate_model_parameters(model_path: str, model: CommonTypes.ModelType): """ Validate the given model parameters. @@ -1229,7 +1180,7 @@ def _import_module( for object_name in objects_names } - # Update the globals dictionary with the module improts: + # Update the globals dictionary with the module imports: globals().update(module_imports) return module_imports @@ -1324,7 +1275,7 @@ def wrapper(model_handler: ModelHandler, *args, **kwargs): if is_applied: interface.add_interface( obj=model_handler.model, - restoration_information=restoration_information, + restoration=restoration_information, ) return returned_value diff --git a/mlrun/frameworks/_common/plan.py b/mlrun/frameworks/_common/plan.py index 6c64b2953b8..f872946b72e 100644 --- a/mlrun/frameworks/_common/plan.py +++ b/mlrun/frameworks/_common/plan.py @@ -29,7 +29,7 @@ def artifacts(self) -> Dict[str, Artifact]: def is_reproducible(self, *args, **kwargs) -> bool: """ - Check whether or not the plan should be used to produce multiple times or only once. Defaulted to return False. + Check whether the plan should be used to produce multiple times or only once. Defaulted to return False. :return: True if the plan is reproducible and False otherwise. """ @@ -38,7 +38,7 @@ def is_reproducible(self, *args, **kwargs) -> bool: @abstractmethod def is_ready(self, *args, **kwargs) -> bool: """ - Check whether or not the plan is fit for production in the current time this method is called. + Check whether the plan is fit for production in the current time this method is called. :return: True if the plan is producible and False otherwise. """ diff --git a/mlrun/frameworks/_common/producer.py b/mlrun/frameworks/_common/producer.py new file mode 100644 index 00000000000..b0c48e701d3 --- /dev/null +++ b/mlrun/frameworks/_common/producer.py @@ -0,0 +1,159 @@ +from typing import Dict, List + +import mlrun +from mlrun.artifacts import Artifact + +from .plan import Plan +from .utils import LoggingMode + + +class Producer: + """ + Class for handling production of artifact plans during a run. + """ + + def __init__( + self, + context: mlrun.MLClientCtx = None, + plans: List[Plan] = None, + ): + """ + Initialize a producer with the given plans. The producer will log the produced artifacts using the given + context. + + :param context: The context to log with. + :param plans: The plans the producer will manage. + """ + # Store the context and plans: + self._context = context + self._plans = plans if plans is not None else [] + + # Set up the logger's mode (defaulted to Training): + self._mode = LoggingMode.TRAINING + + # Prepare the dictionaries to hold the artifacts. Once they are logged they will be moved from one to another: + self._logged_artifacts = {} # type: Dict[str, Artifact] + self._not_logged_artifacts = {} # type: Dict[str, Artifact] + + @property + def mode(self) -> LoggingMode: + """ + Get the logger's mode. + + :return: The logger mode. + """ + return self._mode + + @property + def context(self) -> mlrun.MLClientCtx: + """ + Get the logger's MLRun context. + + :return: The logger's MLRun context. + """ + return self._context + + @property + def artifacts(self) -> Dict[str, Artifact]: + """ + Get the logged artifacts. + + :return: The logged artifacts. + """ + return self._logged_artifacts + + def set_mode(self, mode: LoggingMode): + """ + Set the producer's mode. + + :param mode: The mode to set. + """ + self._mode = mode + + def set_context(self, context: mlrun.MLClientCtx): + """ + Set the context this logger will log with. + + :param context: The to be set MLRun context. + """ + self._context = context + + def set_plans(self, plans: List[Plan]): + """ + Update the plans of this logger to the given list of plans here. + + :param plans: The list of plans to override the current one. + """ + self._plans = plans + + def produce_stage(self, stage, **kwargs): + """ + Produce the artifacts ready at the given stage and log them. + + :param stage: The current stage to log at. + :param kwargs: All the required produce arguments to pass onto the plans. + """ + # Produce all the artifacts according to the given stage: + self._produce_artifacts(stage=stage, **kwargs) + + # Log if a context is available: + if self._context is not None: + # Log the artifacts in queue: + self._log_artifacts() + # Commit: + self._context.commit(completed=False) + + def _produce_artifacts(self, stage, **kwargs): + """ + Go through the plans and check if they are ready to be produced in the given stage of the run. If they are, + the logger will pass all the arguments to the 'plan.produce' method and collect the returned artifact. + + :param stage: The stage to produce the artifact to check if its ready. + :param kwargs: All of the required produce arguments to pass onto the plans. + """ + # Initialize a new list of plans for all the plans that will still need to be produced: + plans = [] + + # Go ver the plans to produce their artifacts: + for plan in self._plans: + # Check if the plan is ready: + if plan.is_ready(stage=stage): + # Produce the artifact: + self._not_logged_artifacts = { + **self._not_logged_artifacts, + **plan.produce(**kwargs), + } + # If the plan should not be produced again, continue to the next one so it won't be collected: + if not plan.is_reproducible(): + continue + # Collect the plan to produce it later (or again if reproducible): + plans.append(plan) + + # Clear the old plans: + self._plans = plans + + # Add evaluation prefix if in Evaluation mode: + if self._mode == LoggingMode.EVALUATION: + self._not_logged_artifacts = { + f"evaluation-{key}": value + for key, value in self._not_logged_artifacts.items() + } + for artifact in self._not_logged_artifacts.values(): + artifact.key = f"evaluation-{artifact.key}" + + def _log_artifacts(self): + """ + Log the produced plans artifacts using the logger's context. + """ + # Use the context to log each artifact: + for artifact in self._not_logged_artifacts.values(): + self._context.log_artifact(artifact) + + # Collect the logged artifacts: + self._logged_artifacts = { + **self._logged_artifacts, + **self._not_logged_artifacts, + } + + # Clean the not logged artifacts dictionary: + self._not_logged_artifacts = {} diff --git a/mlrun/frameworks/_common/utils.py b/mlrun/frameworks/_common/utils.py index 919931a3a46..129626622dd 100644 --- a/mlrun/frameworks/_common/utils.py +++ b/mlrun/frameworks/_common/utils.py @@ -1,23 +1,237 @@ +import re +from abc import ABC +from enum import Enum from pathlib import Path -from typing import TypeVar, Union +from typing import Any, Dict, List, Tuple, TypeVar, Union +import numpy as np +import pandas as pd + +import mlrun from mlrun.artifacts import Artifact +from mlrun.data_types import ValueType from mlrun.datastore import DataItem -# Generic types: -ModelType = TypeVar( - "ModelType" -) # A generic model type in a handler / interface (examples: tf.keras.Model, torch.Module). -IOSampleType = TypeVar( - "IOSampleType" -) # A generic inout / output samples for reading the inputs / outputs properties. -MLRunInterfaceableType = TypeVar( - "MLRunInterfaceableType" -) # A generic object type for what can be wrapped with a framework MLRun interface (examples: xgb, xgb.XGBModel). - -# Common types: -PathType = Union[str, Path] # For receiving a path from 'pathlib' or 'os.path'. -TrackableType = Union[str, bool, float, int] # All trackable values types for a logger. -ExtraDataType = Union[ - str, bytes, Artifact, DataItem -] # Types available in the extra data dictionary of an artifact. + +class CommonTypes(ABC): + """ + Common type hints to all frameworks. + """ + + # A generic model type in a handler / interface (examples: tf.keras.Model, torch.Module): + ModelType = TypeVar("ModelType") + + # A generic input / output samples for reading the inputs / outputs properties: + IOSampleType = TypeVar("IOSampleType") + + # A generic object type for what can be wrapped with a framework MLRun interface (examples: xgb, xgb.XGBModel): + MLRunInterfaceableType = TypeVar("MLRunInterfaceableType") + + # Type for a MLRun Interface restoration tuple as returned from 'remove_interface': + MLRunInterfaceRestorationType = Tuple[ + Dict[str, Any], # Interface properties. + Dict[str, Any], # Replaced properties. + List[str], # Replaced methods and functions. + ] + + # Common dataset type to all frameworks: + DatasetType = Union[ + list, + tuple, + dict, + np.ndarray, + pd.DataFrame, + pd.Series, + "scipy.sparse.base.spmatrix", # noqa: F821 + ] + + # A joined type for receiving a path from 'pathlib' or 'os.path': + PathType = Union[str, Path] + + # A joined type for all trackable values (for logging): + TrackableType = Union[str, bool, float, int] + + # Types available in the extra data dictionary of an artifact: + ExtraDataType = Union[str, bytes, Artifact, DataItem] + + +class LoggingMode(Enum): + """ + The logging mode options. + """ + + TRAINING = "Training" + EVALUATION = "Evaluation" + + +class CommonUtils(ABC): + """ + Common utilities functions to all frameworks. + """ + + @staticmethod + def to_array(dataset: CommonTypes.DatasetType) -> np.ndarray: + """ + Convert the given dataset to np.ndarray. + + :param dataset: The dataset to convert. Must be one of {pd.DataFrame, pd.Series, scipy.sparse.base.spmatrix, + list, tuple, dict}. + + :return: The dataset as a ndarray. + + :raise MLRunInvalidArgumentError: If the dataset type is not supported. + """ + if isinstance(dataset, np.ndarray): + return dataset + if isinstance(dataset, (pd.DataFrame, pd.Series)): + return dataset.to_numpy() + if isinstance(dataset, (list, tuple)): + return np.array(dataset) + if isinstance(dataset, dict): + return np.array(list(dataset.values())) + try: + # SciPy is not in MLRun's requirements but common to all frameworks. + import scipy.sparse.base as sp + + if isinstance(dataset, sp.spmatrix): + return dataset.toarray() + except ModuleNotFoundError: + # SciPy is not installed. + pass + + raise mlrun.errors.MLRunInvalidArgumentError( + f"Could not convert the given dataset into a numpy ndarray. Supporting conversion from: " + f"{CommonUtils.get_union_typehint_string(CommonTypes.DatasetType)}. " + f"The given dataset was of type: '{type(dataset)}'" + ) + + @staticmethod + def to_dataframe(dataset: CommonTypes.DatasetType) -> pd.DataFrame: + """ + Convert the given dataset to pd.DataFrame. + + :param dataset: The dataset to convert. Must be one of {np.ndarray, pd.Series, scipy.sparse.base.spmatrix, list, + tuple, dict}. + + :return: The dataset as a DataFrame. + + :raise MLRunInvalidArgumentError: If the dataset type is not supported. + """ + if isinstance(dataset, pd.DataFrame): + return dataset + if isinstance(dataset, (np.ndarray, pd.Series, list, tuple, dict)): + return pd.DataFrame(dataset) + try: + # SciPy is not in MLRun's requirements but common to all frameworks. + import scipy.sparse.base as sp + + if isinstance(dataset, sp.spmatrix): + return pd.DataFrame.sparse.from_spmatrix(dataset) + except ModuleNotFoundError: + # SciPy is not installed. + pass + raise mlrun.errors.MLRunInvalidArgumentError( + f"Could not convert the given dataset into a pandas DataFrame. Supporting conversion from: " + f"{CommonUtils.get_union_typehint_string(CommonTypes.DatasetType)}. " + f"The given dataset was of type: '{type(dataset)}'" + ) + + @staticmethod + def convert_value_type_to_np_dtype( + value_type: str, + ) -> np.dtype: + """ + Get the 'numpy.dtype' equivalent to the given MLRun value type. + + :param value_type: The MLRun value type to convert to numpy data type. + + :return: The 'numpy.dtype' equivalent to the given MLRun data type. + + :raise MLRunInvalidArgumentError: If numpy is not supporting the given data type. + """ + # Initialize the mlrun to numpy data type conversion map: + conversion_map = { + ValueType.BOOL: np.bool, + ValueType.INT8: np.int8, + ValueType.INT16: np.int16, + ValueType.INT32: np.int32, + ValueType.INT64: np.int64, + ValueType.UINT8: np.uint8, + ValueType.UINT16: np.uint16, + ValueType.UINT32: np.uint32, + ValueType.UINT64: np.uint64, + ValueType.FLOAT16: np.float16, + ValueType.FLOAT: np.float32, + ValueType.DOUBLE: np.float64, + } + + # Convert and return: + if value_type in conversion_map: + return conversion_map[value_type] + raise mlrun.errors.MLRunInvalidArgumentError( + f"The ValueType given is not supported in numpy: '{value_type}'" + ) + + @staticmethod + def convert_np_dtype_to_value_type(np_dtype: Union[np.dtype, type, str]) -> str: + """ + Convert the given numpy data type to MLRun value type. It is better to use explicit bit namings (for example: + instead of using 'np.double', use 'np.float64'). + + :param np_dtype: The numpy data type to convert to MLRun's value type. Expected to be a 'numpy.dtype', 'type' or + 'str'. + + :return: The MLRun value type converted from the given data type. + + :raise MLRunInvalidArgumentError: If the numpy data type is not supported by MLRun. + """ + # Initialize the numpy to mlrun data type conversion map: + conversion_map = { + np.bool.__name__: ValueType.BOOL, + np.byte.__name__: ValueType.INT8, + np.int8.__name__: ValueType.INT8, + np.short.__name__: ValueType.INT16, + np.int16.__name__: ValueType.INT16, + np.int32.__name__: ValueType.INT32, + np.int.__name__: ValueType.INT64, + np.long.__name__: ValueType.INT64, + np.int64.__name__: ValueType.INT64, + np.ubyte.__name__: ValueType.UINT8, + np.uint8.__name__: ValueType.UINT8, + np.ushort.__name__: ValueType.UINT16, + np.uint16.__name__: ValueType.UINT16, + np.uint32.__name__: ValueType.UINT32, + np.uint.__name__: ValueType.UINT64, + np.uint64.__name__: ValueType.UINT64, + np.half.__name__: ValueType.FLOAT16, + np.float16.__name__: ValueType.FLOAT16, + np.single.__name__: ValueType.FLOAT, + np.float32.__name__: ValueType.FLOAT, + np.double.__name__: ValueType.DOUBLE, + np.float.__name__: ValueType.DOUBLE, + np.float64.__name__: ValueType.DOUBLE, + } + + # Parse the given numpy data type to string: + if isinstance(np_dtype, np.dtype): + np_dtype = np_dtype.name + elif isinstance(np_dtype, type): + np_dtype = np_dtype.__name__ + + # Convert and return: + if np_dtype in conversion_map: + return conversion_map[np_dtype] + raise mlrun.errors.MLRunInvalidArgumentError( + f"MLRun value type is not supporting the given numpy data type: '{np_dtype}'" + ) + + @staticmethod + def get_union_typehint_string(union_typehint) -> str: + """ + Get the string representation of a types.Union typehint object. + + :param union_typehint: The union typehint to get its string representation. + + :return: The union typehint's string. + """ + return re.sub(r"typing.Union|[\[\]'\"()]|ForwardRef", "", str(union_typehint)) diff --git a/mlrun/frameworks/_dl_common/__init__.py b/mlrun/frameworks/_dl_common/__init__.py index c978d30f610..fc35bc3f7ae 100644 --- a/mlrun/frameworks/_dl_common/__init__.py +++ b/mlrun/frameworks/_dl_common/__init__.py @@ -1,2 +1,3 @@ # flake8: noqa - this is until we take care of the F401 violations with respect to __all__ & sphinx from .model_handler import DLModelHandler +from .utils import DLTypes, DLUtils diff --git a/mlrun/frameworks/_dl_common/loggers/__init__.py b/mlrun/frameworks/_dl_common/loggers/__init__.py index f2f4e37e58d..0611c7e1057 100644 --- a/mlrun/frameworks/_dl_common/loggers/__init__.py +++ b/mlrun/frameworks/_dl_common/loggers/__init__.py @@ -1,4 +1,4 @@ # flake8: noqa - this is until we take care of the F401 violations with respect to __all__ & sphinx -from .logger import Logger, LoggerMode +from .logger import Logger from .mlrun_logger import MLRunLogger from .tensorboard_logger import TensorboardLogger diff --git a/mlrun/frameworks/_dl_common/loggers/logger.py b/mlrun/frameworks/_dl_common/loggers/logger.py index 363aac17dd9..4d48bb66db3 100644 --- a/mlrun/frameworks/_dl_common/loggers/logger.py +++ b/mlrun/frameworks/_dl_common/loggers/logger.py @@ -1,18 +1,9 @@ -from enum import Enum from typing import Dict, List import mlrun -from ..._common import TrackableType - - -class LoggerMode(Enum): - """ - The logger's mode, can be training or evaluation. - """ - - TRAINING = "Training" - EVALUATION = "Evaluation" +from ..._common import LoggingMode +from ..utils import DLTypes class Logger: @@ -29,26 +20,28 @@ def __init__(self, context: mlrun.MLClientCtx = None): # Save the context: self._context = context - # Setup the logger's mode (defaulted to Training): - self._mode = LoggerMode.TRAINING + # Set up the logger's mode (defaulted to Training): + self._mode = LoggingMode.TRAINING - # Setup the results dictionaries - a dictionary of metrics for all the iteration results by their epochs: + # Set up the results dictionaries - a dictionary of metrics for all the iteration results by their epochs: # [Metric: str] -> [Epoch: int] -> [Iteration: int] -> [value: float] self._training_results = {} # type: Dict[str, List[List[float]]] self._validation_results = {} # type: Dict[str, List[List[float]]] - # Setup the metrics summary dictionaries - a dictionary of all metrics averages by epochs: + # Set up the metrics summary dictionaries - a dictionary of all metrics averages by epochs: # [Metric: str] -> [Epoch: int] -> [value: float]: self._training_summaries = {} # type: Dict[str, List[float]] self._validation_summaries = {} # type: Dict[str, List[float]] # Store the static hyperparameters given - a dictionary of parameters and their values to note: # [Parameter: str] -> [value: Union[str, bool, float, int]] - self._static_hyperparameters = {} # type: Dict[str, TrackableType] + self._static_hyperparameters = {} # type: Dict[str, DLTypes.TrackableType] # Setup the dynamic hyperparameters dictionary - a dictionary of all tracked hyperparameters by epochs: # [Hyperparameter: str] -> [Epoch: int] -> [value: Union[str, bool, float, int]] - self._dynamic_hyperparameters = {} # type: Dict[str, List[TrackableType]] + self._dynamic_hyperparameters = ( + {} + ) # type: Dict[str, List[DLTypes.TrackableType]] # Setup the iterations counter: self._epochs = 0 @@ -65,7 +58,7 @@ def context(self) -> mlrun.MLClientCtx: return self._context @property - def mode(self) -> LoggerMode: + def mode(self) -> LoggingMode: """ Get the logger's mode. @@ -116,7 +109,7 @@ def validation_summaries(self) -> Dict[str, List[float]]: return self._validation_summaries @property - def static_hyperparameters(self) -> Dict[str, TrackableType]: + def static_hyperparameters(self) -> Dict[str, DLTypes.TrackableType]: """ Get the static hyperparameters logged. The hyperparameters will be stored in a dictionary where each key is the hyperparameter name and the value is his logged value. @@ -126,7 +119,7 @@ def static_hyperparameters(self) -> Dict[str, TrackableType]: return self._static_hyperparameters @property - def dynamic_hyperparameters(self) -> Dict[str, List[TrackableType]]: + def dynamic_hyperparameters(self) -> Dict[str, List[DLTypes.TrackableType]]: """ Get the dynamic hyperparameters logged. The hyperparameters will be stored in a dictionary where each key is the hyperparameter name and the value is a list of his logged values per epoch. @@ -162,7 +155,7 @@ def validation_iterations(self) -> int: """ return self._validation_iterations - def set_mode(self, mode: LoggerMode): + def set_mode(self, mode: LoggingMode): """ Set the logger's mode. @@ -241,7 +234,9 @@ def log_validation_summary(self, metric_name: str, result: float): """ self._validation_summaries[metric_name].append(result) - def log_static_hyperparameter(self, parameter_name: str, value: TrackableType): + def log_static_hyperparameter( + self, parameter_name: str, value: DLTypes.TrackableType + ): """ Log the given parameter value in the static hyperparameters dictionary. @@ -250,7 +245,9 @@ def log_static_hyperparameter(self, parameter_name: str, value: TrackableType): """ self._static_hyperparameters[parameter_name] = value - def log_dynamic_hyperparameter(self, parameter_name: str, value: TrackableType): + def log_dynamic_hyperparameter( + self, parameter_name: str, value: DLTypes.TrackableType + ): """ Log the given parameter value in the dynamic hyperparameters dictionary at the current epoch (if its a new parameter it will be epoch 0). If the parameter appears in the static hyperparameters dictionary, it will be @@ -269,6 +266,7 @@ def log_dynamic_hyperparameter(self, parameter_name: str, value: TrackableType): else: self._dynamic_hyperparameters[parameter_name].append(value) + # TODO: Move to MLRun logger def log_context_parameters(self): """ Log the context given parameters as static hyperparameters. Should be called once as the context parameters do @@ -276,19 +274,17 @@ def log_context_parameters(self): """ for parameter_name, parameter_value in self._context.parameters.items(): # Check if the parameter is a trackable value: - if ( - isinstance(parameter_value, str) - or isinstance(parameter_value, bool) - or isinstance(parameter_value, float) - or isinstance(parameter_value, int) - ): + if isinstance(parameter_value, (str, bool, float, int)): self.log_static_hyperparameter( parameter_name=parameter_name, value=parameter_value ) else: # See if its string representation length is below the maximum value length: string_value = str(parameter_value) - if len(string_value) < 30: + if ( + len(string_value) < 30 + ): # Temporary to no log to long variables into the UI. + # TODO: Make the user specify the parameters and take them all by default. self.log_static_hyperparameter( parameter_name=parameter_name, value=parameter_value ) diff --git a/mlrun/frameworks/_dl_common/loggers/mlrun_logger.py b/mlrun/frameworks/_dl_common/loggers/mlrun_logger.py index e2cddcb4478..4d60d566140 100644 --- a/mlrun/frameworks/_dl_common/loggers/mlrun_logger.py +++ b/mlrun/frameworks/_dl_common/loggers/mlrun_logger.py @@ -7,9 +7,10 @@ import mlrun from mlrun.artifacts import Artifact, PlotlyArtifact -from ..._common import TrackableType +from ..._common import LoggingMode from ..model_handler import DLModelHandler -from .logger import Logger, LoggerMode +from ..utils import DLTypes +from .logger import Logger class MLRunLogger(Logger): @@ -25,7 +26,7 @@ class MLRunLogger(Logger): * At the end of the run: * Per epoch chart artifacts for the validation summaries and dynamic hyperparameters. - * Model is logged with all of the files and artifacts. + * Model is logged with all the files and artifacts. """ class _Loops: @@ -60,7 +61,7 @@ def log_epoch_to_context( """ Log the last epoch. The last epoch information recorded in the given tracking dictionaries will be logged, meaning the epoch index will not be taken from the given 'epoch' parameter, but the '-1' index will be used in - all of the dictionaries. Each epoch will log the following information: + all the dictionaries. Each epoch will log the following information: * Results table: @@ -76,10 +77,10 @@ def log_epoch_to_context( :param epoch: The epoch number that has just ended. """ - # Log the collected hyperparameters and values as results to the epoch's child context: + # Log the collected hyperparameters and values as results (the most recent value collected (-1 index)): for static_parameter, value in self._static_hyperparameters.items(): self._context.log_result(static_parameter, value) - if self._mode == LoggerMode.TRAINING: + if self._mode == LoggingMode.TRAINING: for dynamic_parameter, values in self._dynamic_hyperparameters.items(): self._context.log_result(dynamic_parameter, values[-1]) for metric, results in self._training_summaries.items(): @@ -89,7 +90,7 @@ def log_epoch_to_context( for metric, results in self._validation_summaries.items(): self._context.log_result( f"{self._Loops.EVALUATION}_{metric}" - if self._mode == LoggerMode.EVALUATION + if self._mode == LoggingMode.EVALUATION else f"{self._Loops.VALIDATION}_{metric}", results[-1], ) @@ -97,12 +98,12 @@ def log_epoch_to_context( # Log the epochs metrics results as chart artifacts: loops = ( [self._Loops.EVALUATION] - if self._mode == LoggerMode.EVALUATION + if self._mode == LoggingMode.EVALUATION else [self._Loops.TRAINING, self._Loops.VALIDATION] ) metrics_dictionaries = ( [self._validation_results] - if self._mode == LoggerMode.EVALUATION + if self._mode == LoggingMode.EVALUATION else [self._training_results, self._validation_results] ) for loop, metrics_dictionary in zip(loops, metrics_dictionaries): @@ -129,9 +130,9 @@ def log_run( self, model_handler: DLModelHandler, tag: str = "", - labels: Dict[str, TrackableType] = None, - parameters: Dict[str, TrackableType] = None, - extra_data: Dict[str, Union[TrackableType, Artifact]] = None, + labels: Dict[str, DLTypes.TrackableType] = None, + parameters: Dict[str, DLTypes.TrackableType] = None, + extra_data: Dict[str, Union[DLTypes.TrackableType, Artifact]] = None, ): """ Log the run, summarizing the validation metrics and dynamic hyperparameters across all epochs. If 'update' is @@ -153,7 +154,7 @@ def log_run( :param extra_data: Extra data to log with the model. """ # If in training mode, log the summaries and hyperparameters artifacts: - if self._mode == LoggerMode.TRAINING: + if self._mode == LoggingMode.TRAINING: # Create chart artifacts for summaries: for metric_name in self._training_summaries: # Create the plotly artifact: @@ -191,7 +192,7 @@ def log_run( # Log or update: model_handler.set_context(context=self._context) - if self._mode == LoggerMode.EVALUATION: + if self._mode == LoggingMode.EVALUATION: model_handler.update( labels=labels, parameters=parameters, @@ -209,7 +210,7 @@ def log_run( artifacts=self._artifacts, ) - # Commit: + # Commit to update the changes, so they will be available in the MLRun UI: self._context.commit(completed=False) def _generate_metrics_summary(self) -> Dict[str, float]: @@ -219,7 +220,7 @@ def _generate_metrics_summary(self) -> Dict[str, float]: :return: The metrics summary. """ # If in training mode, return both training and validation metrics: - if self._mode == LoggerMode.TRAINING: + if self._mode == LoggingMode.TRAINING: return { **{ f"{self._Loops.TRAINING}_{name}": values[-1] diff --git a/mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py b/mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py index fbe33be0f29..bdc57740513 100644 --- a/mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +++ b/mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py @@ -1,21 +1,18 @@ import os from abc import abstractmethod from datetime import datetime -from typing import Any, Callable, Dict, Generic, List, TypeVar, Union +from typing import Any, Callable, Dict, Generic, List, Union import yaml import mlrun from mlrun.config import config -from ..._common import TrackableType +from ..utils import DLTypes from .logger import Logger -# Define a type variable for the different tensor type objects of the supported frameworks: -Weight = TypeVar("Weight") - -class TensorboardLogger(Logger, Generic[Weight]): +class TensorboardLogger(Logger, Generic[DLTypes.WeightType]): """ An abstract tensorboard logger class for logging the information collected during training / evaluation of the base logger to tensorboard. Each framework has its own way of logging to tensorboard, but each must implement the entire @@ -49,7 +46,9 @@ class _Sections: def __init__( self, - statistics_functions: List[Callable[[Weight], Union[float, Weight]]], + statistics_functions: List[ + Callable[[DLTypes.WeightType], Union[float, DLTypes.WeightType]] + ], context: mlrun.MLClientCtx = None, tensorboard_directory: str = None, run_name: str = None, @@ -114,7 +113,7 @@ def __init__( # Setup the weights dictionaries - a dictionary of all required weight parameters: # [Weight: str] -> [value: WeightType] - self._weights = {} # type: Dict[str, Weight] + self._weights = {} # type: Dict[str, DLTypes.WeightType] # Setup the statistics dictionaries - a dictionary of statistics for the required weights per epoch: # [Statistic: str] -> [Weight: str] -> [epoch: int] -> [value: float] @@ -125,7 +124,7 @@ def __init__( ] = {} # type: Dict[str, List[float]] @property - def weights(self) -> Dict[str, Weight]: + def weights(self) -> Dict[str, DLTypes.WeightType]: """ Get the logged weights dictionary. Each of the logged weight will be found by its name. @@ -143,7 +142,7 @@ def weight_statistics(self) -> Dict[str, Dict[str, List[float]]]: """ return self._weights_statistics - def log_weight(self, weight_name: str, weight_holder: Weight): + def log_weight(self, weight_name: str, weight_holder: DLTypes.WeightType): """ Log the weight into the weights dictionary so it will be tracked and logged during the epochs. For each logged weight the key for it in the statistics logged will be initialized as well. @@ -388,7 +387,7 @@ def _write_scalar_to_tensorboard(self, name: str, value: float, step: int): @abstractmethod def _write_weight_histogram_to_tensorboard( - self, name: str, weight: Weight, step: int + self, name: str, weight: DLTypes.WeightType, step: int ): """ Write the current state of the weights as histograms to tensorboard. @@ -400,7 +399,9 @@ def _write_weight_histogram_to_tensorboard( pass @abstractmethod - def _write_weight_image_to_tensorboard(self, name: str, weight: Weight, step: int): + def _write_weight_image_to_tensorboard( + self, name: str, weight: DLTypes.WeightType, step: int + ): """ Log the current state of the weights as images to tensorboard. @@ -585,7 +586,7 @@ def _generate_run_end_text(self) -> str: def _extract_epoch_results( self, epoch: int = -1 - ) -> Dict[str, Dict[str, TrackableType]]: + ) -> Dict[str, Dict[str, DLTypes.TrackableType]]: """ Extract the given epoch results from all the collected values and results. diff --git a/mlrun/frameworks/_dl_common/utils.py b/mlrun/frameworks/_dl_common/utils.py new file mode 100644 index 00000000000..68f1549165d --- /dev/null +++ b/mlrun/frameworks/_dl_common/utils.py @@ -0,0 +1,21 @@ +from abc import ABC +from typing import TypeVar + +from .._common import CommonTypes, CommonUtils + + +class DLTypes(CommonTypes, ABC): + """ + Deep learning frameworks type hints. + """ + + # A generic type variable for the different tensor type objects of the supported frameworks: + WeightType = TypeVar("WeightType") + + +class DLUtils(CommonUtils, ABC): + """ + Deep learning frameworks utilities. + """ + + pass diff --git a/mlrun/frameworks/_ml_common/__init__.py b/mlrun/frameworks/_ml_common/__init__.py index 88ffdbf57db..cbf89120a61 100644 --- a/mlrun/frameworks/_ml_common/__init__.py +++ b/mlrun/frameworks/_ml_common/__init__.py @@ -1,9 +1,7 @@ # flake8: noqa - this is until we take care of the F401 violations with respect to __all__ & sphinx from .artifacts_library import MLArtifactsLibrary -from .metric import Metric -from .metrics_library import MetricsLibrary, get_metrics -from .mlrun_interface import MLMLRunInterface from .model_handler import MLModelHandler from .pkl_model_server import PickleModelServer from .plan import MLPlan, MLPlanStages, MLPlotPlan -from .utils import DatasetType, MetricEntry +from .producer import MLProducer +from .utils import AlgorithmFunctionality, MLTypes, MLUtils diff --git a/mlrun/frameworks/_ml_common/artifacts_library.py b/mlrun/frameworks/_ml_common/artifacts_library.py index 3daa92c7dd0..db0cf1d31fc 100644 --- a/mlrun/frameworks/_ml_common/artifacts_library.py +++ b/mlrun/frameworks/_ml_common/artifacts_library.py @@ -1,7 +1,6 @@ from abc import ABC from typing import List -from .._common import ModelType from .._common.artifacts_library import ArtifactsLibrary, Plan from .plans import ( CalibrationCurvePlan, @@ -10,7 +9,7 @@ FeatureImportancePlan, ROCCurvePlan, ) -from .utils import AlgorithmFunctionality, DatasetType +from .utils import AlgorithmFunctionality, MLTypes class MLArtifactsLibrary(ArtifactsLibrary, ABC): @@ -33,7 +32,7 @@ class 'init_artifact' class method: @classmethod def default( - cls, model: ModelType, y: DatasetType = None, *args, **kwargs + cls, model: MLTypes.ModelType, y: MLTypes.DatasetType = None, *args, **kwargs ) -> List[Plan]: """ Get the default artifacts plans list of this framework's library. diff --git a/mlrun/frameworks/_ml_common/logger.py b/mlrun/frameworks/_ml_common/logger.py deleted file mode 100644 index b15e39695c3..00000000000 --- a/mlrun/frameworks/_ml_common/logger.py +++ /dev/null @@ -1,312 +0,0 @@ -from enum import Enum -from typing import Dict, List, Union - -import numpy as np -import pandas as pd - -import mlrun -from mlrun.artifacts import Artifact - -from .metric import Metric -from .model_handler import MLModelHandler -from .plan import MLPlan, MLPlanStages -from .utils import to_array - - -class LoggerMode(Enum): - """ - The logger's mode, can be training or evaluation. - """ - - TRAINING = "training" - EVALUATION = "evaluation" - - -class Logger: - """ - Class for handling production of artifact plans and metrics calculations during a run. - """ - - def __init__( - self, - context: mlrun.MLClientCtx = None, - plans: List[MLPlan] = None, - metrics: List[Metric] = None, - ): - """ - Initialize a planner with the given plans. The planner will log the produced artifacts using the given context. - - :param context: The context to log with. - :param plans: The plans the planner will manage. - :param metrics: The metrics - """ - # Store the context and plans: - self._context = context - self._plans = plans if plans is not None else [] - self._metrics = metrics if metrics is not None else [] - - # Setup the logger's mode (defaulted to Training): - self._mode = LoggerMode.TRAINING - - # Prepare the dictionaries to hold the artifacts. Once they are logged they will be moved from one to another: - self._logged_artifacts = {} # type: Dict[str, Artifact] - self._not_logged_artifacts = {} # type: Dict[str, Artifact] - - # Prepare the dictionaries to hold the results. Once they are logged they will be moved from one to another: - self._logged_results = {} # type: Dict[str, float] - self._not_logged_results = {} # type: Dict[str, float] - - @property - def mode(self) -> LoggerMode: - """ - Get the logger's mode. - - :return: The logger mode. - """ - return self._mode - - @property - def context(self) -> mlrun.MLClientCtx: - """ - Get the logger's MLRun context. - - :return: The logger's MLRun context. - """ - return self._context - - @property - def artifacts(self) -> Dict[str, Artifact]: - """ - Get the logged artifacts. - - :return: The logged artifacts. - """ - return self._logged_artifacts - - @property - def results(self) -> Dict[str, float]: - """ - Get the logged results. - - :return: The logged results. - """ - return self._logged_results - - def set_mode(self, mode: LoggerMode): - """ - Set the logger's mode. - - :param mode: The mode to set. One of Logger.LoggerMode options. - """ - self._mode = mode - - def set_context(self, context: mlrun.MLClientCtx): - """ - Set the context this logger will log with. - - :param context: The to be set MLRun context. - """ - self._context = context - - def set_plans(self, plans: List[MLPlan]): - """ - Update the plans of this logger to the given list of plans here. - - :param plans: The list of plans to override the current one. - """ - self._plans = plans - - def set_metrics(self, metrics: List[Metric]): - """ - Update the metrics of this logger to the given list of metrics here. - - :param metrics: The list of metrics to override the current one. - """ - self._metrics = metrics - - def is_probabilities_required(self) -> bool: - """ - Check if probabilities are required in order to produce and calculate some of the artifacts and metrics. - - :return: True if probabilities are required by at least one plan or metric and False otherwise. - """ - probabilities_for_plans = any(plan.need_probabilities for plan in self._plans) - probabilities_for_metrics = any( - metric.need_probabilities for metric in self._metrics - ) - return probabilities_for_plans or probabilities_for_metrics - - def log_stage(self, stage: MLPlanStages, is_probabilities: bool = False, **kwargs): - """ - Produce the artifacts ready at the given stage and log them. - - :param stage: The current stage to log at. - :param is_probabilities: True if the 'y_pred' is a prediction of probabilities (from 'predict_proba') and False - if not. Defaulted to False. - :param kwargs: All of the required produce arguments to pass onto the plans. - """ - # Produce all the artifacts according to the given stage: - self._produce_artifacts( - stage=stage, is_probabilities=is_probabilities, **kwargs - ) - - # Log if a context is available: - if self._context is not None: - # Log the artifacts in queue: - self._log_artifacts() - # Commit: - self._context.commit(completed=False) - - def log_results( - self, - y_true: Union[np.ndarray, pd.DataFrame, pd.Series], - y_pred: Union[np.ndarray, pd.DataFrame, pd.Series], - is_probabilities: bool = False, - ): - """ - Calculate the results according to the 'is_probabilities' flag and log them. - - :param y_true: The ground truth values to send for the metrics functions. - :param y_pred: The predictions to send for the metrics functions. - :param is_probabilities: True if the 'y_pred' is a prediction of probabilities (from 'predict_proba') and False - if not. Defaulted to False. - """ - # Calculate the metrics results: - self._calculate_results( - y_true=y_true, y_pred=y_pred, is_probabilities=is_probabilities - ) - - # Log if a context is available: - if self._context is not None: - # Log the results in queue: - self._log_results() - # Commit: - self._context.commit(completed=False) - - def log_run( - self, - model_handler: MLModelHandler, - ): - """ - End the logger's run, logging the collected artifacts and metrics results with the model. The model will be - updated if the logger is in evaluation mode or logged as a new artifact if in training mode. - - :param model_handler: The model handler object holding the model to save and log. - """ - # In case of training, log the model as a new model artifact and in case of evaluation - update the current - # model artifact: - if self._mode == LoggerMode.TRAINING: - model_handler.log( - metrics=self._logged_results, - artifacts=self._logged_artifacts, - ) - else: - model_handler.update( - metrics=self._logged_results, - artifacts=self._logged_artifacts, - ) - - # Commit: - self._context.commit(completed=False) - - def _produce_artifacts( - self, stage: MLPlanStages, is_probabilities: bool = False, **kwargs - ): - """ - Go through the plans and check if they are ready to be produced in the given stage of the run. If they are, - the logger will pass all the arguments to the 'plan.produce' method and collect the returned artifact. - - :param stage: The stage to produce the artifact to check if its ready. - :param is_probabilities: True if the 'y_pred' that will be sent to 'produce' is a prediction of probabilities - (from 'predict_proba') and False if not. Defaulted to False. - :param kwargs: All of the required produce arguments to pass onto the plans. - """ - # Initialize a new list of plans for all the plans that will still need to be produced: - plans = [] - - # Go ver the plans to produce their artifacts: - for plan in self._plans: - # Check if the plan is ready: - if plan.is_ready(stage=stage, is_probabilities=is_probabilities): - # Produce the artifact: - self._not_logged_artifacts = { - **self._not_logged_artifacts, - **plan.produce(**kwargs), - } - # If the plan should not be produced again, continue to the next one so it won't be collected: - if not plan.is_reproducible(): - continue - # Collect the plan to produce it later (or again if reproducible): - plans.append(plan) - - # Clear the old plans: - self._plans = plans - - # Add evaluation prefix if in Evaluation mode: - if self._mode == LoggerMode.EVALUATION: - self._not_logged_artifacts = { - f"evaluation-{key}": value - for key, value in self._not_logged_artifacts.items() - } - for artifact in self._not_logged_artifacts.values(): - artifact.key = f"evaluation-{artifact.key}" - - def _calculate_results( - self, - y_true: Union[np.ndarray, pd.DataFrame, pd.Series], - y_pred: Union[np.ndarray, pd.DataFrame, pd.Series], - is_probabilities: bool, - ): - """ - Calculate the results from all the metrics in the logger. - - :param y_true: The ground truth values to send for the metrics functions. - :param y_pred: The predictions to send for the metrics functions. - :param is_probabilities: True if the 'y_pred' is a prediction of probabilities (from 'predict_proba') and False - if not. - """ - # Use squeeze to remove redundant dimensions: - y_true = np.squeeze(to_array(dataset=y_true)) - y_pred = np.squeeze(to_array(dataset=y_pred)) - - # Calculate the metrics: - for metric in self._metrics: - if metric.need_probabilities == is_probabilities: - self._not_logged_results[metric.name] = metric(y_true, y_pred) - - # Add evaluation prefix if in Evaluation mode: - if self._mode == LoggerMode.EVALUATION: - self._not_logged_results = { - f"evaluation_{key}": value - for key, value in self._not_logged_results.items() - } - - def _log_artifacts(self): - """ - Log the produced plans artifacts using the logger's context. - """ - # Use the context to log each artifact: - for artifact in self._not_logged_artifacts.values(): - self._context.log_artifact(artifact) - - # Collect the logged artifacts: - self._logged_artifacts = { - **self._logged_artifacts, - **self._not_logged_artifacts, - } - - # Clean the not logged artifacts dictionary: - self._not_logged_artifacts = {} - - def _log_results(self): - """ - Log the calculated metrics results using the logger's context. - """ - # Use the context to log each metric result: - self._context.log_results(self._not_logged_results) - - # Collect the logged results: - self._logged_results = {**self._logged_results, **self._not_logged_results} - - # Clean the not logged results dictionary: - self._not_logged_results = {} diff --git a/mlrun/frameworks/_ml_common/loggers/__init__.py b/mlrun/frameworks/_ml_common/loggers/__init__.py new file mode 100644 index 00000000000..b32116b19fd --- /dev/null +++ b/mlrun/frameworks/_ml_common/loggers/__init__.py @@ -0,0 +1,3 @@ +# flake8: noqa - this is until we take care of the F401 violations with respect to __all__ & sphinx +from .logger import Logger +from .mlrun_logger import MLRunLogger diff --git a/mlrun/frameworks/_ml_common/loggers/logger.py b/mlrun/frameworks/_ml_common/loggers/logger.py new file mode 100644 index 00000000000..27b24b5d0e5 --- /dev/null +++ b/mlrun/frameworks/_ml_common/loggers/logger.py @@ -0,0 +1,128 @@ +from typing import Dict, List + +from ..utils import MLTypes + + +class Logger: + """ + Logger for tracking hyperparamters and metrics results during training of some framework. + """ + + def __init__(self): + """ + Initialize a generic logger for collecting training data. + """ + # Set up the results dictionaries - a dictionary of metrics for all the iteration results by their epochs: + # [Validation Set: str] -> [Metric: str] -> [Iteration: int] -> [value: float] + self._results = {} # type: Dict[str, Dict[str, List[float]]] + + # Store the static hyperparameters given - a dictionary of parameters and their values to note: + # [Parameter: str] -> [value: Union[str, bool, float, int]] + self._static_hyperparameters = {} # type: Dict[str, MLTypes.TrackableType] + + # Set up the dynamic hyperparameters dictionary - a dictionary of all tracked hyperparameters by epochs: + # [Hyperparameter: str] -> [Epoch: int] -> [value: Union[str, bool, float, int]] + self._dynamic_hyperparameters = ( + {} + ) # type: Dict[str, List[MLTypes.TrackableType]] + + # Set up the iterations counter: + self._iterations = 0 + + @property + def results(self) -> Dict[str, Dict[str, List[float]]]: + """ + Get the results logged. The results will be stored in a dictionary where each key is the validation set name + and the value is a dictionary of metrics to their list of iterations values. + + :return: The results. + """ + return self._results + + @property + def static_hyperparameters(self) -> Dict[str, MLTypes.TrackableType]: + """ + Get the static hyperparameters logged. The hyperparameters will be stored in a dictionary where each key is the + hyperparameter name and the value is his logged value. + + :return: The static hyperparameters. + """ + return self._static_hyperparameters + + @property + def dynamic_hyperparameters(self) -> Dict[str, List[MLTypes.TrackableType]]: + """ + Get the dynamic hyperparameters logged. The hyperparameters will be stored in a dictionary where each key is the + hyperparameter name and the value is a list of his logged values per epoch. + + :return: The dynamic hyperparameters. + """ + return self._dynamic_hyperparameters + + @property + def iterations(self) -> int: + """ + Get the overall iterations. + + :return: The overall iterations. + """ + return self._iterations + + def log_iteration(self): + """ + Log a new iteration. + """ + self._iterations += 1 + + def log_result(self, validation_set_name: str, metric_name: str, result: float): + """ + Log the given metric result in the training results dictionary at the current epoch. + + :param validation_set_name: Name of the validation set used. + :param metric_name: The metric name. + :param result: The metric result to log. + """ + # Get the validation set's metrics (will set a new dictionary in case it's a new validation set): + if validation_set_name not in self._results: + self._results[validation_set_name] = {} + validation_set_metrics = self._results[validation_set_name] + + # Get the metric's results list (will set a new list in case it's a new metric): + if metric_name not in validation_set_metrics: + validation_set_metrics[metric_name] = [] + metric_results = validation_set_metrics.setdefault(metric_name, []) + + # Log the metric's result: + metric_results.append(result) + + def log_static_hyperparameter( + self, parameter_name: str, value: MLTypes.TrackableType + ): + """ + Log the given parameter value in the static hyperparameters dictionary. + + :param parameter_name: The parameter name. + :param value: The parameter value to log. + """ + self._static_hyperparameters[parameter_name] = value + + def log_dynamic_hyperparameter( + self, parameter_name: str, value: MLTypes.TrackableType + ): + """ + Log the given parameter value in the dynamic hyperparameters dictionary at the current iteration (if it's a new + parameter it will be iteration 0). If the parameter appears in the static hyperparameters dictionary, it will be + removed from there as it is now dynamic. + + :param parameter_name: The parameter name. + :param value: The parameter value to log. + """ + # Check if it's a new hyperparameter being tracked: + if parameter_name not in self._dynamic_hyperparameters: + # Look in the static hyperparameters: + if parameter_name in self._static_hyperparameters: + self._static_hyperparameters.pop(parameter_name) + # Add it as a dynamic hyperparameter: + self._dynamic_hyperparameters[parameter_name] = [value] + else: + self._dynamic_hyperparameters[parameter_name].append(value) diff --git a/mlrun/frameworks/_ml_common/loggers/mlrun_logger.py b/mlrun/frameworks/_ml_common/loggers/mlrun_logger.py new file mode 100644 index 00000000000..790604920c7 --- /dev/null +++ b/mlrun/frameworks/_ml_common/loggers/mlrun_logger.py @@ -0,0 +1,166 @@ +import re +from typing import Dict, List + +import numpy as np +import plotly.graph_objects as go + +import mlrun +from mlrun.artifacts import Artifact, PlotlyArtifact + +from .logger import Logger + + +class MLRunLogger(Logger): + """ + MLRun logger is logging the information collected during training of the base logger and logging it to MLRun using + an MLRun context. + """ + + def __init__( + self, + context: mlrun.MLClientCtx, + ): + """ + Initialize the MLRun logging interface to work with the given context. + + :param context: MLRun context to log to. The context parameters can be logged as static hyperparameters. + """ + super(MLRunLogger, self).__init__() + + # An MLRun context to log to: + self._context = context + + # Prepare the artifacts dictionary: + self._artifacts = {} # type: Dict[str, Artifact] + + def get_artifacts(self) -> Dict[str, Artifact]: + """ + Get the artifacts created by this logger. + + :return: The artifacts dictionary. + """ + return self._artifacts + + def get_metrics(self) -> Dict[str, float]: + """ + Generate a metrics summary to log along the model. + + :return: The metrics summary. + """ + return { + f"{validation_set}_{metric_name}": results[-1] + for validation_set, metrics in self._results.items() + for metric_name, results in metrics.items() + } + + def log_context_parameters(self): + """ + Log the context given parameters as static hyperparameters. Should be called once as the context parameters do + not change. + """ + for parameter_name, parameter_value in self._context.parameters.items(): + # Check if the parameter is a trackable value: + if isinstance(parameter_value, (str, bool, float, int)): + self.log_static_hyperparameter( + parameter_name=parameter_name, value=parameter_value + ) + else: + # See if its string representation length is below the maximum value length: + string_value = str(parameter_value) + if ( + len(string_value) < 30 + ): # Temporary to no log to long variables into the UI. + # TODO: Make the user specify the parameters and take them all by default. + self.log_static_hyperparameter( + parameter_name=parameter_name, value=parameter_value + ) + + def log_iteration_to_context(self): + """ + Log the information of the last iteration and produce the updated artifacts. Each call will log the following + information: + + * Results table: + + * Static hyperparameters. + * Dynamic hyperparameters. + * Metric results. + + * Plot artifacts: + + * A chart for each of the metrics iteration results. + * A chart for each of the dynamic hyperparameters values. + """ + # Log the collected hyperparameters: + for static_parameter, value in self._static_hyperparameters.items(): + self._context.log_result(static_parameter, value) + for dynamic_parameter, values in self._dynamic_hyperparameters.items(): + # Log as a result to the context (take the most recent result in the training history (-1 index): + self._context.log_result(dynamic_parameter, values[-1]) + # Create the plotly artifact: + artifact = self._produce_convergence_plot_artifact( + name=dynamic_parameter, + values=values, + ) + # Log the artifact: + self._context.log_artifact(artifact) + # Collect it for later adding it to the model logging as extra data: + self._artifacts[artifact.metadata.key] = artifact + + # Log the metrics: + for metric_name, metric_results in { + f"{validation_set}_{metric_name}": results + for validation_set, metrics in self._results.items() + for metric_name, results in metrics.items() + }.items(): + # Log as a result to the context: + self._context.log_result(metric_name, metric_results[-1]) + # Create the plotly artifact: + artifact = self._produce_convergence_plot_artifact( + name=f"{metric_name}_plot", + values=metric_results, + ) + # Log the artifact: + self._context.log_artifact(artifact) + # Collect it for later adding it to the model logging as extra data: + self._artifacts[artifact.metadata.key] = artifact + + # Commit to update the changes, so they will be available in the MLRun UI: + self._context.commit(completed=False) + + @staticmethod + def _produce_convergence_plot_artifact( + name: str, values: List[float] + ) -> PlotlyArtifact: + """ + Produce the convergences for the provided metric according. + + :param name: The name of the metric / hyperparameter. + :param values: The values per iteration of the metric / hyperparameter. + + :return: Plotly artifact of the convergence plot. + """ + # Initialize a plotly figure: + metric_figure = go.Figure() + + # Add titles: + metric_figure.update_layout( + title=f"{re.sub('_', ' ', name).capitalize()}", + xaxis_title="Iterations", + yaxis_title="Values", + ) + + # Draw: + metric_figure.add_trace( + go.Scatter( + x=list(np.arange(len(values))), + y=values, + mode="lines", + ) + ) + + # Creating the artifact: + return PlotlyArtifact( + key=name, + figure=metric_figure, + ) diff --git a/mlrun/frameworks/_ml_common/mlrun_interface.py b/mlrun/frameworks/_ml_common/mlrun_interface.py deleted file mode 100644 index 737e4d7ecbe..00000000000 --- a/mlrun/frameworks/_ml_common/mlrun_interface.py +++ /dev/null @@ -1,306 +0,0 @@ -from abc import ABC -from typing import List - -import mlrun - -from .._common import MLRunInterface, ModelType, RestorationInformation -from .logger import Logger, LoggerMode -from .metrics_library import Metric -from .model_handler import MLModelHandler -from .plan import MLPlan, MLPlanStages -from .utils import DatasetType, concatenate_x_y - - -class MLMLRunInterface(MLRunInterface, ABC): - """ - Interface for adding MLRun features for machine learning common API. - """ - - # MLRun's context default name: - DEFAULT_CONTEXT_NAME = "mlrun-ml" - - # Attributes to be inserted so the MLRun interface will be fully enabled. - _PROPERTIES = { - # A model handler instance with the model for logging / updating the model (if not provided the model won't be - # logged / updated at the end of training / testing): - "_model_handler": None, # > type: MLModelHandler - # The logger that is logging this model's training / evaluation: - "_logger": None, # > type: Logger - # The test set (For validation post training or evaluation post prediction): - "_x_test": None, # > type: DatasetType - "_y_test": None, # > type: DatasetType - } - _METHODS = [ - "set_model_handler", - "configure_logger", - "_pre_fit", - "_post_fit", - "_pre_predict", - "_post_predict", - ] - - # Attributes to replace so the MLRun interface will be fully enabled. - _REPLACED_METHODS = ["fit", "predict", "predict_proba"] - - @classmethod - def add_interface( - cls, - obj: ModelType, - restoration_information: RestorationInformation = None, - ): - """ - Enrich the object with this interface properties, methods and functions so it will have this framework MLRun's - features. - - :param obj: The model object to enrich his interface. - :param restoration_information: Restoration information tuple as returned from 'remove_interface' in order to - add the interface in a certain state. - """ - # Check if the given model has the 'predict_proba' method to replace: - if not hasattr(obj, "predict_proba"): - cls._REPLACED_METHODS.remove("predict_proba") - - # Add the interface to the model: - super(MLMLRunInterface, cls).add_interface( - obj=obj, restoration_information=restoration_information - ) - - # Restore the '_REPLACED_METHODS' list for next models: - if "predict_proba" not in cls._REPLACED_METHODS: - cls._REPLACED_METHODS.append("predict_proba") - - # Setup a default logger: - if obj._logger is None: - obj._logger = Logger() - - @classmethod - def mlrun_fit(cls): - """ - MLRun's common ML API fit wrapper. It will run a validation post training if provided the required attributes. - - Unsupervised learning algorithms won't be using 'y' in their 'fit' method, but for consistency reasons (as - written in SciKit-Learn's API documentation): the signature of their 'fit' method will still have 'y'. For that - we do not need another wrapper that support unsupervised learning models. - """ - - def wrapper( - self: ModelType, X: DatasetType, y: DatasetType = None, *args, **kwargs - ): - # Restore the prediction methods as fit will use them: - cls._restore_attribute(obj=self, attribute_name="predict") - if hasattr(self, "predict_proba"): - cls._restore_attribute(obj=self, attribute_name="predict_proba") - - # Call the pre fit method: - self._pre_fit(x=X, y=y) - - # Call the original fit method: - result = self.original_fit(X, y, *args, **kwargs) - - # Call the post fit method: - self._post_fit(x=X, y=y) - - # Replace the prediction methods again: - cls._replace_function(obj=self, function_name="predict") - if hasattr(self, "predict_proba"): - cls._replace_function(obj=self, function_name="predict_proba") - return result - - return wrapper - - def mlrun_predict(self, X: DatasetType, *args, **kwargs): - """ - MLRun's wrapper for the common ML API predict method. - """ - self._pre_predict(x=X, y=self._y_test) - - y_pred = self.original_predict(X, *args, **kwargs) - - self._post_predict(x=X, y=self._y_test, y_pred=y_pred, is_predict_proba=False) - - return y_pred - - def mlrun_predict_proba(self, X: DatasetType, *args, **kwargs): - """ - MLRun's wrapper for the common ML API predict_proba method. - """ - self._pre_predict(x=X, y=self._y_test) - - y_pred = self.original_predict_proba(X, *args, **kwargs) - - self._post_predict(x=X, y=self._y_test, y_pred=y_pred, is_predict_proba=True) - - return y_pred - - def set_model_handler(self, model_handler: MLModelHandler): - """ - Set this model's MLRun handler for logging the model as a model artifact post training (post calling 'fit') or - update the existing model artifact post testing (calling 'predict' / 'predict_proba'). If the logger's context - is None, it will set its context to the handler's context. - - :param model_handler: The ML model handler with a loaded model. - """ - # Store the given model handler: - self._model_handler = model_handler - - # Update the logger's context to the handler's context if its None: - if self._logger.context is None: - self._logger.set_context(context=model_handler.context) - - def configure_logger( - self, - context: mlrun.MLClientCtx = None, - plans: List[MLPlan] = None, - metrics: List[Metric] = None, - x_test: DatasetType = None, - y_test: DatasetType = None, - ): - """ - Initialize the MLRun logger for this model using the provided context and artifacts plans, metrics and model - logging attributes. - - :param context: A MLRun context to log to. By default, uses `mlrun.get_or_create_ctx` - :param plans: A list of plans to produce. - :param metrics: A list of metrics to calculate. - :param x_test: The testing data for producing and calculating artifacts and metrics post training or post - prediction. Without this, training validation will not be performed. - :param y_test: The test data for producing and calculating artifacts and metrics post training (calling 'fit') - or post testing (calling 'predict' / 'predict_proba'). - """ - # Update the MLRun logger: - if context is None and self._logger.context is None: - context = mlrun.get_or_create_ctx( - name=MLMLRunInterface.DEFAULT_CONTEXT_NAME - ) - if context is not None: - self._logger.set_context(context=context) - self._logger.set_plans(plans=plans) - self._logger.set_metrics(metrics=metrics) - - # Validate that if the prediction probabilities are required, this model has the 'predict_proba' method: - if self._logger.is_probabilities_required() and not hasattr( - self, "predict_proba" - ): - raise mlrun.errors.MLRunInvalidArgumentError( - f"Some of the metrics and or artifacts required to be calculated and produced require prediction " - f"probabilities yet this model: '{type(self)}' do not has the 'predict_proba' method." - ) - - # Store the given dataset: - self._x_test = x_test - self._y_test = y_test - - def _pre_fit(self, x: DatasetType, y: DatasetType = None): - """ - Method for creating the artifacts before the fit method. - - :param x: The input dataset to the fit method ('x_train'). - :param y: The input dataset to the fit method ('y_train'). - """ - self._logger.log_stage(stage=MLPlanStages.PRE_FIT, model=self, x=x, y=y) - - def _post_fit(self, x: DatasetType, y: DatasetType = None): - """ - Method for creating the artifacts after the fit method. If a validation set is available, the method will start - a validation process calling predict - creating and calculating validation artifacts and metrics. - - :param x: The input dataset to the fit method ('x_train'). - :param y: The input dataset to the fit method ('y_train'). - """ - # The model is done training, log all artifacts post fit: - self._logger.log_stage(stage=MLPlanStages.POST_FIT, model=self, x=x, y=y) - - # If there is a validation set, run validation: - if self._x_test is not None: - self._logger.log_stage( - stage=MLPlanStages.PRE_PREDICT, - model=self, - x=self._x_test, - y=self._y_test, - ) - y_pred = self.predict(self._x_test) - self._post_predict( - x=self._x_test, - y=self._y_test, - y_pred=y_pred, - is_predict_proba=False, - ) - - # Log the model with the given attributes: - if self._model_handler is not None: - # Set the sample set to the training set if None: - if self._model_handler.sample_set is None: - sample_set, y_columns = concatenate_x_y( - x=x, y=y, y_columns=self._model_handler.y_columns - ) - self._model_handler.set_y_columns(y_columns=y_columns) - self._model_handler.set_sample_set(sample_set=sample_set) - # Log the model: - self._logger.log_run(model_handler=self._model_handler) - - def _pre_predict(self, x: DatasetType, y: DatasetType): - """ - Method for creating the artifacts before the predict method. - - :param x: The input dataset to the predict / predict_proba method ('x_test'). - :param y: The input dataset to the predict / predict_proba method ('y_test'). - """ - # This function is only called for evaluation, then set the logger's mode: - self._logger.set_mode(mode=LoggerMode.EVALUATION) - - # Produce and log all the artifacts pre prediction: - self._logger.log_stage(stage=MLPlanStages.PRE_PREDICT, model=self, x=x, y=y) - - def _post_predict( - self, - x: DatasetType, - y: DatasetType, - y_pred: DatasetType, - is_predict_proba: bool, - ): - """ - Method for creating and calculating the artifacts and metrics after the predict method. This method can be - called after a user call to predict as part of testing or as part of validation after training (calling fit). - - :param x: The input dataset to the predict / predict_proba method ('x_test' / 'x_validation'). - :param y: The input dataset to the predict / predict_proba method ('y_test' / 'y_validation'). - :param y_pred: The prediction outcome. - :param is_predict_proba: Whether the prediction returned from predict or predict_proba. - """ - # Produce and log all the artifacts post prediction: - self._logger.log_stage( - stage=MLPlanStages.POST_PREDICT, - model=self, - x=x, - y=y, - y_pred=y_pred, - is_probabilities=is_predict_proba, - ) - - # Calculate and log the metrics results: - self._logger.log_results( - y_true=y, y_pred=y_pred, is_probabilities=is_predict_proba - ) - - # If some metrics and / or plans require probabilities, run 'predict_proba': - if not is_predict_proba and self._logger.is_probabilities_required(): - y_pred_proba = self.predict_proba(x) - self._logger.log_stage( - stage=MLPlanStages.POST_PREDICT, - is_probabilities=True, - model=self, - x=x, - y=y, - y_pred=y_pred_proba, - ) - self._logger.log_results( - y_true=y, y_pred=y_pred_proba, is_probabilities=True - ) - - # If its part of validation post training, return: - if self._logger.mode == LoggerMode.TRAINING: - return - - # Update the model with the testing artifacts and results: - if self._model_handler is not None: - self._logger.log_run(model_handler=self._model_handler) diff --git a/mlrun/frameworks/_ml_common/model_handler.py b/mlrun/frameworks/_ml_common/model_handler.py index 357f180f91f..451ecf510af 100644 --- a/mlrun/frameworks/_ml_common/model_handler.py +++ b/mlrun/frameworks/_ml_common/model_handler.py @@ -6,8 +6,8 @@ from mlrun.datastore import is_store_uri from mlrun.features import Feature -from .._common import ExtraDataType, ModelHandler, ModelType, PathType -from .utils import DatasetType, YColumnsType, to_dataframe +from .._common import ModelHandler +from .utils import MLTypes, MLUtils class MLModelHandler(ModelHandler, ABC): @@ -17,12 +17,16 @@ class MLModelHandler(ModelHandler, ABC): def __init__( self, - model: ModelType = None, - model_path: PathType = None, + model: MLTypes.ModelType = None, + model_path: MLTypes.PathType = None, model_name: str = None, - modules_map: Union[Dict[str, Union[None, str, List[str]]], PathType] = None, - custom_objects_map: Union[Dict[str, Union[str, List[str]]], PathType] = None, - custom_objects_directory: PathType = None, + modules_map: Union[ + Dict[str, Union[None, str, List[str]]], MLTypes.PathType + ] = None, + custom_objects_map: Union[ + Dict[str, Union[str, List[str]]], MLTypes.PathType + ] = None, + custom_objects_directory: MLTypes.PathType = None, context: mlrun.MLClientCtx = None, **kwargs, ): @@ -81,8 +85,8 @@ def __init__( """ # Setup additional properties for logging a ml model into a ModelArtifact: self._algorithm = None # type: str - self._sample_set = None # type: DatasetType - self._y_columns = None # type: YColumnsType + self._sample_set = None # type: MLTypes.DatasetType + self._target_columns = None # type: MLTypes.TargetColumnsNamesType self._feature_vector = None # type: str self._feature_weights = None # type: List[float] @@ -108,7 +112,7 @@ def algorithm(self) -> str: return self._algorithm @property - def sample_set(self) -> DatasetType: + def sample_set(self) -> MLTypes.DatasetType: """ Get the sample dataset set in this handler. @@ -117,13 +121,13 @@ def sample_set(self) -> DatasetType: return self._sample_set @property - def y_columns(self) -> YColumnsType: + def target_columns(self) -> MLTypes.TargetColumnsNamesType: """ - Get the sample dataset y columns set in this handler. + Get the sample dataset target columns set in this handler. - :return: The handler's sample dataset y columns. + :return: The handler's sample dataset target columns names. """ - return self._y_columns + return self._target_columns @property def feature_vector(self) -> str: @@ -151,7 +155,9 @@ def set_algorithm(self, algorithm: str): """ self._algorithm = algorithm - def set_sample_set(self, sample_set: Union[DatasetType, mlrun.DataItem, str]): + def set_sample_set( + self, sample_set: Union[MLTypes.DatasetType, mlrun.DataItem, str] + ): """ Set the sample set this model will be logged with. The sample set will be casted to a pd.DataFrame. Can be sent as a DataItem and as a store object string. @@ -174,15 +180,15 @@ def set_sample_set(self, sample_set: Union[DatasetType, mlrun.DataItem, str]): sample_set = sample_set.as_df() # Set the sample set casting it to a DataFrame: - self._sample_set = to_dataframe(sample_set) + self._sample_set = MLUtils.to_dataframe(dataset=sample_set) - def set_y_columns(self, y_columns: YColumnsType): + def set_target_columns(self, target_columns: MLTypes.TargetColumnsNamesType): """ Set the ground truth column names of the sample set this model will be logged with. - :param y_columns: The ground truth (y) columns to set. + :param target_columns: The ground truth (y) columns names to set. """ - self._y_columns = y_columns + self._target_columns = target_columns def set_feature_vector(self, feature_vector: str): """ @@ -210,10 +216,10 @@ def log( outputs: List[Feature] = None, metrics: Dict[str, Union[int, float]] = None, artifacts: Dict[str, Artifact] = None, - extra_data: Dict[str, ExtraDataType] = None, + extra_data: Dict[str, MLTypes.ExtraDataType] = None, algorithm: str = None, - sample_set: DatasetType = None, - y_columns: YColumnsType = None, + sample_set: MLTypes.DatasetType = None, + target_columns: MLTypes.TargetColumnsNamesType = None, feature_vector: str = None, feature_weights: List[float] = None, ): @@ -235,7 +241,7 @@ def log( :param algorithm: The algorithm of this model. If None it will be read as the model's class name. :param sample_set: Sample set to use for getting the model's inputs, outputs and base stats for model monitoring. Do not pass both sample set and inputs / outputs. - :param y_columns: The ground truth (y) labels names. + :param target_columns: The ground truth (y) labels names. :param feature_vector: Feature store feature vector uri (store://feature-vectors//[:tag]) :param feature_weights: List of feature weights, one per input column. @@ -251,8 +257,8 @@ def log( # Update the sample set: if sample_set is not None: self.set_sample_set(sample_set=sample_set) - if y_columns is not None: - self.set_y_columns(y_columns=y_columns) + if target_columns is not None: + self.set_target_columns(target_columns=target_columns) # Update the feature parameters: if feature_vector is not None: @@ -272,7 +278,7 @@ def log( extra_data=extra_data, algorithm=self._algorithm, sample_set=self._sample_set, - y_columns=self._y_columns, + target_columns=self._target_columns, feature_vector=self._feature_vector, feature_weights=self._feature_weights, ) @@ -285,7 +291,7 @@ def update( outputs: List[Feature] = None, metrics: Dict[str, Union[int, float]] = None, artifacts: Dict[str, Artifact] = None, - extra_data: Dict[str, ExtraDataType] = None, + extra_data: Dict[str, MLTypes.ExtraDataType] = None, feature_vector: str = None, feature_weights: List[float] = None, ): diff --git a/mlrun/frameworks/_ml_common/plan.py b/mlrun/frameworks/_ml_common/plan.py index 2aa392e479b..df1b000c527 100644 --- a/mlrun/frameworks/_ml_common/plan.py +++ b/mlrun/frameworks/_ml_common/plan.py @@ -6,9 +6,8 @@ import mlrun -from .._common import ModelType -from .._common.artifacts_library import Plan -from .utils import DatasetType +from .._common import Plan +from .utils import MLTypes class MLPlanStages(Enum): @@ -16,11 +15,18 @@ class MLPlanStages(Enum): Stages for a machine learning plan to be produced. """ + # SciKit-Learn's API: PRE_FIT = "pre_fit" POST_FIT = "post_fit" PRE_PREDICT = "pre_predict" POST_PREDICT = "post_predict" + # Boosting API: + PRE_TRAIN = "pre_train" + POST_TRAIN = "post_train" + PRE_ITERATION = "pre_iteration" + POST_ITERATION = "post_iteration" + class MLPlan(Plan, ABC): """ @@ -87,8 +93,11 @@ def _gui_display(self): display(HTML(artifact.get_body())) def _calculate_predictions( - self, y_pred: DatasetType = None, model: ModelType = None, x: DatasetType = None - ) -> DatasetType: + self, + y_pred: MLTypes.DatasetType = None, + model: MLTypes.ModelType = None, + x: MLTypes.DatasetType = None, + ) -> MLTypes.DatasetType: """ Calculate the predictions using the model and input dataset only if the predictions (y_pred) were not provided. diff --git a/mlrun/frameworks/_ml_common/plans/calibration_curve_plan.py b/mlrun/frameworks/_ml_common/plans/calibration_curve_plan.py index 1c5e5283079..b6f7fc380f1 100644 --- a/mlrun/frameworks/_ml_common/plans/calibration_curve_plan.py +++ b/mlrun/frameworks/_ml_common/plans/calibration_curve_plan.py @@ -5,9 +5,8 @@ from mlrun.artifacts import Artifact, PlotlyArtifact -from ..._common import ModelType from ..plan import MLPlanStages, MLPlotPlan -from ..utils import DatasetType +from ..utils import MLTypes class CalibrationCurvePlan(MLPlotPlan): @@ -59,10 +58,10 @@ def is_ready(self, stage: MLPlanStages, is_probabilities: bool) -> bool: def produce( self, - y: DatasetType, - y_pred: DatasetType = None, - model: ModelType = None, - x: DatasetType = None, + y: MLTypes.DatasetType, + y_pred: MLTypes.DatasetType = None, + model: MLTypes.ModelType = None, + x: MLTypes.DatasetType = None, **kwargs ) -> Dict[str, Artifact]: """ diff --git a/mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py b/mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py index 5796f7ce676..8efceaf95ef 100644 --- a/mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py +++ b/mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py @@ -7,9 +7,8 @@ from mlrun.artifacts import Artifact, PlotlyArtifact -from ..._common import ModelType from ..plan import MLPlanStages, MLPlotPlan -from ..utils import DatasetType, to_dataframe +from ..utils import MLTypes, MLUtils class ConfusionMatrixPlan(MLPlotPlan): @@ -48,8 +47,8 @@ def __init__( def is_ready(self, stage: MLPlanStages, is_probabilities: bool) -> bool: """ - Check whether or not the plan is fit for production by the given stage and prediction probabilities. The - confusion matrix is ready only post prediction. + Check whether the plan is fit for production by the given stage and prediction probabilities. The confusion + matrix is ready only post prediction. :param stage: The stage to check if the plan is ready. :param is_probabilities: True if the 'y_pred' that will be sent to 'produce' is a prediction of probabilities @@ -61,10 +60,10 @@ def is_ready(self, stage: MLPlanStages, is_probabilities: bool) -> bool: def produce( self, - y: DatasetType, - y_pred: DatasetType = None, - model: ModelType = None, - x: DatasetType = None, + y: MLTypes.DatasetType, + y_pred: MLTypes.DatasetType = None, + model: MLTypes.ModelType = None, + x: MLTypes.DatasetType = None, **kwargs ) -> Dict[str, Artifact]: """ @@ -82,8 +81,8 @@ def produce( y_pred = self._calculate_predictions(y_pred=y_pred, model=model, x=x) # Convert to DataFrame: - y = to_dataframe(dataset=y) - y_pred = to_dataframe(dataset=y_pred) + y = MLUtils.to_dataframe(dataset=y) + y_pred = MLUtils.to_dataframe(dataset=y_pred) # Set the labels array it not set: if self._labels is None: diff --git a/mlrun/frameworks/_ml_common/plans/dataset_plan.py b/mlrun/frameworks/_ml_common/plans/dataset_plan.py index 7ffb42f1dea..608416ec0dc 100644 --- a/mlrun/frameworks/_ml_common/plans/dataset_plan.py +++ b/mlrun/frameworks/_ml_common/plans/dataset_plan.py @@ -1,14 +1,11 @@ from enum import Enum -from typing import Dict, List, Union - -import numpy as np -import pandas as pd +from typing import Dict, Union import mlrun.errors from mlrun.artifacts import Artifact, DatasetArtifact from ..plan import MLPlan, MLPlanStages -from ..utils import concatenate_x_y +from ..utils import MLTypes, MLUtils class DatasetPlan(MLPlan): @@ -19,7 +16,7 @@ class DatasetPlan(MLPlan): class Purposes(Enum): """ - All of the dataset split purposes. + All the dataset split purposes. """ OTHER = "other" @@ -111,24 +108,26 @@ def is_ready(self, stage: MLPlanStages, is_probabilities: bool) -> bool: def produce( self, - x: Union[list, dict, np.ndarray, pd.DataFrame, pd.Series], - y: Union[list, dict, np.ndarray, pd.DataFrame, pd.Series] = None, - y_columns: Union[List[str], List[int]] = None, + x: MLTypes.DatasetType, + y: MLTypes.DatasetType = None, + target_columns_names: MLTypes.TargetColumnsNamesType = None, **kwargs, ) -> Dict[str, Artifact]: """ Produce the dataset artifact according to this plan. - :param x: A collection of inputs to a model. - :param y: A collection of ground truth labels corresponding to the inputs. - :param y_columns: List of names or indices to give the columns of the ground truth labels. + :param x: A collection of inputs to a model. + :param y: A collection of ground truth labels corresponding to the inputs. + :param target_columns_names: List of names or indices to give the columns of the ground truth labels. :return: The produced dataset artifact. :raise MLRunInvalidArgumentError: If no dataset parameters were passed. """ # Merge x and y into a single dataset: - dataset, y_columns = concatenate_x_y(x=x, y=y, y_columns=y_columns) + dataset, y_columns = MLUtils.concatenate_x_y( + x=x, y=y, target_columns_names=target_columns_names + ) # Create the dataset artifact: dataset_artifact = DatasetArtifact( @@ -143,7 +142,7 @@ def produce( if self._purpose != self.Purposes.OTHER: dataset_artifact.labels["Purpose"] = self._purpose.value - # TODO: Add the y columns as an additional artifact (save as a json for example) + # TODO: Add the target columns names as an additional artifact (save as a json for example) # Store it: self._artifacts[self._name] = dataset_artifact diff --git a/mlrun/frameworks/_ml_common/plans/feature_importance_plan.py b/mlrun/frameworks/_ml_common/plans/feature_importance_plan.py index bcd75682e40..e170fea4839 100644 --- a/mlrun/frameworks/_ml_common/plans/feature_importance_plan.py +++ b/mlrun/frameworks/_ml_common/plans/feature_importance_plan.py @@ -6,9 +6,8 @@ import mlrun from mlrun.artifacts import Artifact, PlotlyArtifact -from ..._common import ModelType from ..plan import MLPlanStages, MLPlotPlan -from ..utils import DatasetType, to_dataframe +from ..utils import MLTypes, MLUtils class FeatureImportancePlan(MLPlotPlan): @@ -29,8 +28,8 @@ def __init__(self): def is_ready(self, stage: MLPlanStages, is_probabilities: bool) -> bool: """ - Check whether or not the plan is fit for production by the given stage and prediction probabilities. The - feature importance is ready post training. + Check whether the plan is fit for production by the given stage and prediction probabilities. The feature + importance is ready post training. :param stage: The stage to check if the plan is ready. :param is_probabilities: True if the 'y_pred' that will be sent to 'produce' is a prediction of probabilities @@ -41,7 +40,7 @@ def is_ready(self, stage: MLPlanStages, is_probabilities: bool) -> bool: return stage == MLPlanStages.POST_FIT def produce( - self, model: ModelType, x: DatasetType, **kwargs + self, model: MLTypes.ModelType, x: MLTypes.DatasetType, **kwargs ) -> Dict[str, Artifact]: """ Produce the feature importance according to the given model and dataset ('x'). @@ -68,7 +67,7 @@ def produce( # Create a table of features and their importance: df = pd.DataFrame( { - "features": to_dataframe(x).columns, + "features": MLUtils.to_dataframe(dataset=x).columns, "feature_importance": importance_score, } ).sort_values(by="feature_importance", ascending=False) diff --git a/mlrun/frameworks/_ml_common/plans/roc_curve_plan.py b/mlrun/frameworks/_ml_common/plans/roc_curve_plan.py index 4a27437f925..26bbb92f7e8 100644 --- a/mlrun/frameworks/_ml_common/plans/roc_curve_plan.py +++ b/mlrun/frameworks/_ml_common/plans/roc_curve_plan.py @@ -7,9 +7,8 @@ from mlrun.artifacts import Artifact, PlotlyArtifact -from ..._common import ModelType from ..plan import MLPlanStages, MLPlotPlan -from ..utils import DatasetType, to_dataframe +from ..utils import MLTypes, MLUtils class ROCCurvePlan(MLPlotPlan): @@ -79,10 +78,10 @@ def is_ready(self, stage: MLPlanStages, is_probabilities: bool) -> bool: def produce( self, - y: DatasetType, - y_pred: DatasetType = None, - model: ModelType = None, - x: DatasetType = None, + y: MLTypes.DatasetType, + y_pred: MLTypes.DatasetType = None, + model: MLTypes.ModelType = None, + x: MLTypes.DatasetType = None, **kwargs, ) -> Dict[str, Artifact]: """ @@ -100,8 +99,8 @@ def produce( y_pred = self._calculate_predictions(y_pred=y_pred, model=model, x=x) # Convert to DataFrame: - y = to_dataframe(dataset=y) - y_pred = to_dataframe(dataset=y_pred) + y = MLUtils.to_dataframe(dataset=y) + y_pred = MLUtils.to_dataframe(dataset=y_pred) # One hot encode the labels in order to plot them y_one_hot = pd.get_dummies(y, columns=y.columns.to_list()) diff --git a/mlrun/frameworks/_ml_common/plots.py b/mlrun/frameworks/_ml_common/plots.py deleted file mode 100644 index 86fae2c9fed..00000000000 --- a/mlrun/frameworks/_ml_common/plots.py +++ /dev/null @@ -1,645 +0,0 @@ -from itertools import cycle -from typing import List - -import matplotlib.pyplot as plt -import numpy as np -import pandas as pd - -try: - import seaborn as sns -except ModuleNotFoundError: - pass - -try: - from scikitplot.metrics import plot_calibration_curve -except ModuleNotFoundError: - pass - -try: - from scipy import interp -except ModuleNotFoundError: - pass - -from sklearn import metrics -from sklearn.base import is_classifier, is_regressor -from sklearn.calibration import calibration_curve -from sklearn.metrics import confusion_matrix as sklearn_confusion_matrix -from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score -from sklearn.preprocessing import LabelBinarizer - -from mlrun.artifacts import PlotArtifact - - -def gcf_clear(plt): - """Utility to clear matplotlib figure - Run this inside every plot method before calling any matplotlib - methods - :param plot: matloblib figure object - """ - plt.cla() - plt.clf() - plt.close() - - -def feature_importances(model, header): - """Display estimated feature importances - Only works for models with attribute 'feature_importances_` - :param model: fitted model - :param header: feature labels - """ - if not hasattr(model, "feature_importances_"): - raise Exception( - "feature importances are only available for some models, if you got " - "here then please make sure to check your estimated model for a " - "`feature_importances_` attribute before calling this method" - ) - - # create a feature importance table with desired labels - zipped = zip(model.feature_importances_, header) - feature_imp = pd.DataFrame(sorted(zipped), columns=["freq", "feature"]).sort_values( - by="freq", ascending=False - ) - - plt.clf() # gcf_clear(plt) - plt.figure() - sns.barplot(x="freq", y="feature", data=feature_imp) - plt.title("features") - plt.tight_layout() - - return ( - PlotArtifact( - "feature-importances", body=plt.gcf(), title="Feature Importances" - ), - feature_imp, - ) - - -def plot_importance( - context, model, key: str = "feature-importances", plots_dest: str = "plots" -): - """Display estimated feature importances - Only works for models with attribute 'feature_importances_` - - **legacy version please deprecate in functions and demos** - - :param context: function context - :param model: fitted model - :param key: key of feature importances plot and table in artifact - store - :param plots_dest: subfolder in artifact store - """ - if not hasattr(model, "feature_importances_"): - raise Exception("feature importaces are only available for some models") - - # create a feature importance table with desired labels - zipped = zip(model.feature_importances_, context.header) - feature_imp = pd.DataFrame(sorted(zipped), columns=["freq", "feature"]).sort_values( - by="freq", ascending=False - ) - - gcf_clear(plt) - plt.figure(figsize=(20, 10)) - sns.barplot(x="freq", y="feature", data=feature_imp) - plt.title("features") - plt.tight_layout() - - fname = f"{plots_dest}/{key}.html" - context.log_artifact(PlotArtifact(key, body=plt.gcf()), local_path=fname) - - # feature importances are also saved as a csv table (generally small): - fname = key + "-tbl.csv" - return context.log_dataset(key + "-tbl", df=feature_imp, local_path=fname) - - -def learning_curves(model): - """model class dependent - - WIP - - get training history plots for xgboost, lightgbm - - returns list of PlotArtifacts, can be empty if no history - is found - """ - plots = [] - - # do this here and not in the call to learning_curve plots, - # this is default approach for xgboost and lightgbm - if hasattr(model, "evals_result"): - results = model.evals_result() - train_set = list(results.items())[0] - valid_set = list(results.items())[1] - - learning_curves = pd.DataFrame( - { - "train_error": train_set[1]["error"], - "train_auc": train_set[1]["auc"], - "valid_error": valid_set[1]["error"], - "valid_auc": valid_set[1]["auc"], - } - ) - - plt.clf() # gcf_clear(plt) - fig, ax = plt.subplots() - plt.xlabel("# training examples") - plt.ylabel("auc") - plt.title("learning curve - auc") - ax.plot(learning_curves.train_auc, label="train") - ax.plot(learning_curves.valid_auc, label="valid") - ax.legend(loc="lower left") - plots.append(PlotArtifact("learning curve - auc", body=plt.gcf())) - - plt.clf() # gcf_clear(plt) - fig, ax = plt.subplots() - plt.xlabel("# training examples") - plt.ylabel("error rate") - plt.title("learning curve - error") - ax.plot(learning_curves.train_error, label="train") - ax.plot(learning_curves.valid_error, label="valid") - ax.legend(loc="lower left") - plots.append(PlotArtifact("learning curve - taoot", body=plt.gcf())) - - # elif some other model history api... - - return plots - - -def confusion_matrix(model, xtest, ytest, cmap="Blues"): - cmd = metrics.plot_confusion_matrix( - model, - xtest, - ytest, - normalize="all", - values_format=".2g", - cmap=plt.get_cmap(cmap), - ) - # for now only 1, add different views to this array for display in UI - cmd.plot() - return PlotArtifact( - "confusion-matrix-normalized", - body=cmd.figure_, - title="Confusion Matrix - Normalized Plot", - ) - - -def precision_recall_multi(ytest_b, yprob, labels, scoring="micro"): - """""" - n_classes = len(labels) - - precision = dict() - recall = dict() - avg_prec = dict() - for i in range(n_classes): - precision[i], recall[i], _ = metrics.precision_recall_curve( - ytest_b[:, i], yprob[:, i] - ) - avg_prec[i] = metrics.average_precision_score(ytest_b[:, i], yprob[:, i]) - precision["micro"], recall["micro"], _ = metrics.precision_recall_curve( - ytest_b.ravel(), yprob.ravel() - ) - avg_prec["micro"] = metrics.average_precision_score(ytest_b, yprob, average="micro") - ap_micro = avg_prec["micro"] - # model_metrics.update({'precision-micro-avg-classes': ap_micro}) - - # gcf_clear(plt) - colors = cycle(["navy", "turquoise", "darkorange", "cornflowerblue", "teal"]) - plt.figure() - f_scores = np.linspace(0.2, 0.8, num=4) - lines = [] - labels = [] - for f_score in f_scores: - x = np.linspace(0.01, 1) - y = f_score * x / (2 * x - f_score) - (l,) = plt.plot(x[y >= 0], y[y >= 0], color="gray", alpha=0.2) - plt.annotate(f"f1={f_score:0.1f}", xy=(0.9, y[45] + 0.02)) - - lines.append(l) - labels.append("iso-f1 curves") - (l,) = plt.plot(recall["micro"], precision["micro"], color="gold", lw=10) - lines.append(l) - labels.append(f"micro-average precision-recall (area = {ap_micro:0.2f})") - - for i, color in zip(range(n_classes), colors): - (l,) = plt.plot(recall[i], precision[i], color=color, lw=2) - lines.append(l) - labels.append(f"precision-recall for class {i} (area = {avg_prec[i]:0.2f})") - - # fig = plt.gcf() - # fig.subplots_adjust(bottom=0.25) - plt.xlim([0.0, 1.0]) - plt.ylim([0.0, 1.05]) - plt.xlabel("recall") - plt.ylabel("precision") - plt.title("precision recall - multiclass") - plt.legend(lines, labels, loc=(0, -0.41), prop=dict(size=10)) - - return PlotArtifact( - "precision-recall-multiclass", - body=plt.gcf(), - title="Multiclass Precision Recall", - ) - - -def roc_multi(ytest_b, yprob, labels): - """""" - n_classes = len(labels) - - # Compute ROC curve and ROC area for each class - fpr = dict() - tpr = dict() - roc_auc = dict() - for i in range(n_classes): - fpr[i], tpr[i], _ = metrics.roc_curve(ytest_b[:, i], yprob[:, i]) - roc_auc[i] = metrics.auc(fpr[i], tpr[i]) - - # Compute micro-average ROC curve and ROC area - fpr["micro"], tpr["micro"], _ = metrics.roc_curve(ytest_b.ravel(), yprob.ravel()) - roc_auc["micro"] = metrics.auc(fpr["micro"], tpr["micro"]) - - # First aggregate all false positive rates - all_fpr = np.unique(np.concatenate([fpr[i] for i in range(n_classes)])) - - # Then interpolate all ROC curves at this points - mean_tpr = np.zeros_like(all_fpr) - for i in range(n_classes): - mean_tpr += interp(all_fpr, fpr[i], tpr[i]) - - # Finally average it and compute AUC - mean_tpr /= n_classes - - fpr["macro"] = all_fpr - tpr["macro"] = mean_tpr - roc_auc["macro"] = metrics.auc(fpr["macro"], tpr["macro"]) - - # Plot all ROC curves - gcf_clear(plt) - plt.figure() - plt.plot( - fpr["micro"], - tpr["micro"], - label=f"micro-average ROC curve (area = {roc_auc['micro']:0.2f})", - color="deeppink", - linestyle=":", - linewidth=4, - ) - - plt.plot( - fpr["macro"], - tpr["macro"], - label=f"macro-average ROC curve (area = {roc_auc['macro']:0.2f})", - color="navy", - linestyle=":", - linewidth=4, - ) - - colors = cycle(["aqua", "darkorange", "cornflowerblue"]) - for i, color in zip(range(n_classes), colors): - plt.plot( - fpr[i], - tpr[i], - color=color, - lw=2, - label=f"ROC curve of class {i} (area = {roc_auc[i]:0.2f})", - ) - - plt.plot([0, 1], [0, 1], "k--", lw=2) - plt.xlim([0.0, 1.0]) - plt.ylim([0.0, 1.05]) - plt.xlabel("False Positive Rate") - plt.ylabel("True Positive Rate") - plt.title("receiver operating characteristic - multiclass") - plt.legend(loc=(0, -0.68), prop=dict(size=10)) - - return PlotArtifact("roc-multiclass", body=plt.gcf(), title="Multiclass ROC Curve") - - -def roc_bin(ytest, yprob, clear: bool = False): - """""" - # ROC plot - if clear: - gcf_clear(plt) - fpr, tpr, _ = metrics.roc_curve(ytest, yprob) - plt.figure() - plt.plot([0, 1], [0, 1], "k--") - plt.plot(fpr, tpr, label="a label") - plt.xlabel("false positive rate") - plt.ylabel("true positive rate") - plt.title("roc curve") - plt.legend(loc="best") - - return PlotArtifact("roc-binary", body=plt.gcf(), title="Binary ROC Curve") - - -def precision_recall_bin(model, xtest, ytest, yprob, clear=False): - """""" - if clear: - gcf_clear(plt) - disp = metrics.plot_precision_recall_curve(model, xtest, ytest) - disp.ax_.set_title( - f"precision recall: AP={metrics.average_precision_score(ytest, yprob):0.2f}" - ) - - return PlotArtifact( - "precision-recall-binary", body=disp.figure_, title="Binary Precision Recall" - ) - - -def plot_roc( - context, - y_labels, - y_probs, - key="roc", - plots_dir: str = "plots", - fmt="png", - fpr_label: str = "false positive rate", - tpr_label: str = "true positive rate", - title: str = "roc curve", - legend_loc: str = "best", - clear: bool = True, -): - """plot roc curves - - **legacy version please deprecate in functions and demos** - - :param context: the function context - :param y_labels: ground truth labels, hot encoded for multiclass - :param y_probs: model prediction probabilities - :param key: ("roc") key of plot in artifact store - :param plots_dir: ("plots") destination folder relative path to artifact path - :param fmt: ("png") plot format - :param fpr_label: ("false positive rate") x-axis labels - :param tpr_label: ("true positive rate") y-axis labels - :param title: ("roc curve") title of plot - :param legend_loc: ("best") location of plot legend - :param clear: (True) clear the matplotlib figure before drawing - """ - # clear matplotlib current figure - if clear: - gcf_clear(plt) - - # draw 45 degree line - plt.plot([0, 1], [0, 1], "k--") - - # labelling - plt.xlabel(fpr_label) - plt.ylabel(tpr_label) - plt.title(title) - plt.legend(loc=legend_loc) - - # single ROC or multiple - if y_labels.shape[1] > 1: - - # data accumulators by class - fpr = dict() - tpr = dict() - roc_auc = dict() - for i in range(y_labels[:, :-1].shape[1]): - fpr[i], tpr[i], _ = metrics.roc_curve( - y_labels[:, i], y_probs[:, i], pos_label=1 - ) - roc_auc[i] = metrics.auc(fpr[i], tpr[i]) - plt.plot(fpr[i], tpr[i], label=f"class {i}") - else: - fpr, tpr, _ = metrics.roc_curve(y_labels, y_probs[:, 1], pos_label=1) - plt.plot(fpr, tpr, label="positive class") - - fname = f"{plots_dir}/{key}.html" - return context.log_artifact(PlotArtifact(key, body=plt.gcf()), local_path=fname) - - -# move to the individual framework -def eval_model_v2( - context, - xtest, - ytest, - model, - pcurve_bins: int = 10, - pcurve_names: List[str] = ["my classifier"], - plots_artifact_path: str = "", - pred_params: dict = {}, - cmap="Blues", - is_xgb=False, -): - """generate predictions and validation stats - - pred_params are non-default, scikit-learn api prediction-function - parameters. For example, a tree-type of model may have a tree depth - limit for its prediction function. - - :param xtest: features array type Union(DataItem, DataFrame, - numpy array) - :param ytest: ground-truth labels Union(DataItem, DataFrame, - Series, numpy array, List) - :param model: estimated model - :param pcurve_bins: (10) subdivide [0,1] interval into n bins, x-axis - :param pcurve_names: label for each calibration curve - :param pred_params: (None) dict of predict function parameters - :param cmap: ('Blues') matplotlib color map - :param is_xgb - """ - - if hasattr(model, "get_xgb_params"): - is_xgb = True - - def df_blob(df): - return bytes(df.to_csv(index=False), encoding="utf-8") - - if isinstance(ytest, np.ndarray): - unique_labels = np.unique(ytest) - elif isinstance(ytest, list): - unique_labels = set(ytest) - else: - try: - ytest = ytest.values - unique_labels = np.unique(ytest) - except Exception as exc: - raise Exception(f"unrecognized data type for ytest {exc}") - - n_classes = len(unique_labels) - is_multiclass = True if n_classes > 2 else False - - # INIT DICT...OR SOME OTHER COLLECTOR THAT CAN BE ACCESSED - plots_path = plots_artifact_path or context.artifact_subpath("plots") - extra_data = {} - - ypred = model.predict(xtest, **pred_params) - - if isinstance(ypred.flat[0], np.floating): - accuracy = mean_absolute_error(ytest, ypred) - - else: - accuracy = float(metrics.accuracy_score(ytest, ypred)) - - context.log_results( - {"accuracy": accuracy, "test-error": np.sum(ytest != ypred) / ytest.shape[0]} - ) - - # PROBABILITIES - if hasattr(model, "predict_proba"): - yprob = model.predict_proba(xtest, **pred_params) - if not is_multiclass: - fraction_of_positives, mean_predicted_value = calibration_curve( - ytest, yprob[:, -1], n_bins=pcurve_bins, strategy="uniform" - ) - cmd = plot_calibration_curve(ytest, [yprob], pcurve_names) - calibration = context.log_artifact( - PlotArtifact( - "probability-calibration", - body=cmd.get_figure(), - title="probability calibration plot", - ), - artifact_path=plots_path, - db_key=False, - ) - extra_data["probability calibration"] = calibration - - # CONFUSION MATRIX - if is_classifier(model): - cm = sklearn_confusion_matrix(ytest, ypred, normalize="all") - df = pd.DataFrame(data=cm) - extra_data["confusion matrix table.csv"] = df_blob(df) - - cmd = metrics.plot_confusion_matrix( - model, - xtest, - ytest, - normalize="all", - values_format=".2g", - cmap=plt.get_cmap(cmap), - ) - confusion = context.log_artifact( - PlotArtifact( - "confusion-matrix", - body=cmd.figure_, - title="Confusion Matrix - Normalized Plot", - ), - artifact_path=plots_path, - db_key=False, - ) - extra_data["confusion matrix"] = confusion - - # LEARNING CURVES - if hasattr(model, "evals_result") and is_xgb is False: - results = model.evals_result() - train_set = list(results.items())[0] - valid_set = list(results.items())[1] - - learning_curves_df = None - if is_multiclass: - if hasattr(train_set[1], "merror"): - learning_curves_df = pd.DataFrame( - { - "train_error": train_set[1]["merror"], - "valid_error": valid_set[1]["merror"], - } - ) - else: - if hasattr(train_set[1], "error"): - learning_curves_df = pd.DataFrame( - { - "train_error": train_set[1]["error"], - "valid_error": valid_set[1]["error"], - } - ) - - if learning_curves_df: - extra_data["learning curve table.csv"] = df_blob(learning_curves_df) - - _, ax = plt.subplots() - plt.xlabel("# training examples") - plt.ylabel("error rate") - plt.title("learning curve - error") - ax.plot(learning_curves_df["train_error"], label="train") - ax.plot(learning_curves_df["valid_error"], label="valid") - learning = context.log_artifact( - PlotArtifact( - "learning-curve", body=plt.gcf(), title="Learning Curve - error" - ), - artifact_path=plots_path, - db_key=False, - ) - extra_data["learning curve"] = learning - - # FEATURE IMPORTANCES - if hasattr(model, "feature_importances_"): - (fi_plot, fi_tbl) = feature_importances(model, xtest.columns) - extra_data["feature importances"] = context.log_artifact( - fi_plot, db_key=False, artifact_path=plots_path - ) - extra_data["feature importances table.csv"] = df_blob(fi_tbl) - - # AUC - ROC - PR CURVES - if is_multiclass and is_classifier(model): - lb = LabelBinarizer() - ytest_b = lb.fit_transform(ytest) - - extra_data["precision_recall_multi"] = context.log_artifact( - precision_recall_multi(ytest_b, yprob, unique_labels), - artifact_path=plots_path, - db_key=False, - ) - extra_data["roc_multi"] = context.log_artifact( - roc_multi(ytest_b, yprob, unique_labels), - artifact_path=plots_path, - db_key=False, - ) - - # AUC multiclass - aucmicro = metrics.roc_auc_score( - ytest_b, yprob, multi_class="ovo", average="micro" - ) - aucweighted = metrics.roc_auc_score( - ytest_b, yprob, multi_class="ovo", average="weighted" - ) - - context.log_results({"auc-micro": aucmicro, "auc-weighted": aucweighted}) - - # others (todo - macro, micro...) - f1 = metrics.f1_score(ytest, ypred, average="macro") - ps = metrics.precision_score(ytest, ypred, average="macro") - rs = metrics.recall_score(ytest, ypred, average="macro") - context.log_results({"f1-score": f1, "precision_score": ps, "recall_score": rs}) - - elif is_classifier(model): - yprob_pos = yprob[:, 1] - extra_data["precision_recall_bin"] = context.log_artifact( - precision_recall_bin(model, xtest, ytest, yprob_pos), - artifact_path=plots_path, - db_key=False, - ) - extra_data["roc_bin"] = context.log_artifact( - roc_bin(ytest, yprob_pos, clear=True), - artifact_path=plots_path, - db_key=False, - ) - - rocauc = metrics.roc_auc_score(ytest, yprob_pos) - brier_score = metrics.brier_score_loss(ytest, yprob_pos, pos_label=ytest.max()) - f1 = metrics.f1_score(ytest, ypred) - ps = metrics.precision_score(ytest, ypred) - rs = metrics.recall_score(ytest, ypred) - context.log_results( - { - "rocauc": rocauc, - "brier_score": brier_score, - "f1-score": f1, - "precision_score": ps, - "recall_score": rs, - } - ) - - elif is_regressor(model): - r_squared = r2_score(ytest, ypred) - rmse = mean_squared_error(ytest, ypred, squared=False) - mse = mean_squared_error(ytest, ypred, squared=True) - mae = mean_absolute_error(ytest, ypred) - context.log_results( - { - "R2": r_squared, - "root_mean_squared_error": rmse, - "mean_squared_error": mse, - "mean_absolute_error": mae, - } - ) - # return all model metrics and plots - return extra_data diff --git a/mlrun/frameworks/_ml_common/producer.py b/mlrun/frameworks/_ml_common/producer.py new file mode 100644 index 00000000000..4f795489df5 --- /dev/null +++ b/mlrun/frameworks/_ml_common/producer.py @@ -0,0 +1,81 @@ +from .._common import LoggingMode, Producer +from .plan import MLPlanStages + + +class MLProducer(Producer): + """ + Class for handling production of artifact plans during a run. + """ + + def is_probabilities_required(self) -> bool: + """ + Check if probabilities are required in order to produce some of the artifacts. + + :return: True if probabilities are required by at least one plan and False otherwise. + """ + return any(plan.need_probabilities for plan in self._plans) + + def produce_stage( + self, stage: MLPlanStages, is_probabilities: bool = False, **kwargs + ): + """ + Produce the artifacts ready at the given stage and log them. + + :param stage: The current stage to log at. + :param is_probabilities: True if the 'y_pred' is a prediction of probabilities (from 'predict_proba') and False + if not. Defaulted to False. + :param kwargs: All of the required produce arguments to pass onto the plans. + """ + # Produce all the artifacts according to the given stage: + self._produce_artifacts( + stage=stage, is_probabilities=is_probabilities, **kwargs + ) + + # Log if a context is available: + if self._context is not None: + # Log the artifacts in queue: + self._log_artifacts() + # Commit: + self._context.commit(completed=False) + + def _produce_artifacts( + self, stage: MLPlanStages, is_probabilities: bool = False, **kwargs + ): + """ + Go through the plans and check if they are ready to be produced in the given stage of the run. If they are, + the logger will pass all the arguments to the 'plan.produce' method and collect the returned artifact. + + :param stage: The stage to produce the artifact to check if its ready. + :param is_probabilities: True if the 'y_pred' that will be sent to 'produce' is a prediction of probabilities + (from 'predict_proba') and False if not. Defaulted to False. + :param kwargs: All of the required produce arguments to pass onto the plans. + """ + # Initialize a new list of plans for all the plans that will still need to be produced: + plans = [] + + # Go ver the plans to produce their artifacts: + for plan in self._plans: + # Check if the plan is ready: + if plan.is_ready(stage=stage, is_probabilities=is_probabilities): + # Produce the artifact: + self._not_logged_artifacts = { + **self._not_logged_artifacts, + **plan.produce(**kwargs), + } + # If the plan should not be produced again, continue to the next one so it won't be collected: + if not plan.is_reproducible(): + continue + # Collect the plan to produce it later (or again if reproducible): + plans.append(plan) + + # Clear the old plans: + self._plans = plans + + # Add evaluation prefix if in Evaluation mode: + if self._mode == LoggingMode.EVALUATION: + self._not_logged_artifacts = { + f"evaluation-{key}": value + for key, value in self._not_logged_artifacts.items() + } + for artifact in self._not_logged_artifacts.values(): + artifact.key = f"evaluation-{artifact.key}" diff --git a/mlrun/frameworks/_ml_common/utils.py b/mlrun/frameworks/_ml_common/utils.py index 353fa7dce2d..db54a76d3d0 100644 --- a/mlrun/frameworks/_ml_common/utils.py +++ b/mlrun/frameworks/_ml_common/utils.py @@ -1,128 +1,86 @@ +from abc import ABC from enum import Enum from typing import Callable, List, Tuple, Union -import numpy as np import pandas as pd -import scipy.sparse.base from sklearn.base import is_classifier, is_regressor import mlrun -from .._common import ModelType +from .._common import CommonTypes, CommonUtils -# A type for all the supported dataset types: -DatasetType = Union[ - list, dict, np.ndarray, pd.DataFrame, pd.Series, scipy.sparse.base.spmatrix -] -# Type for a metric entry, can be passed as the metric function itself, as a callable object, a string of the name of -# the function and the full module path to the function to import. Arguments to use when calling the metric can be -# joined by wrapping it as a tuple: -MetricEntry = Union[Tuple[Union[Callable, str], dict], Callable, str] - -# Type for the 'y_columns' parameter - a list of indices or column names that are the ground truth (y) of a dataset. -YColumnsType = Union[List[str], List[int]] - - -def to_array(dataset: DatasetType) -> np.ndarray: - """ - Convert the given dataset to np.ndarray. - - :param dataset: The dataset to convert. Must be one of {pd.DataFrame, pd.Series, scipy.sparse.base.spmatrix, list, - dict}. - - :return: The dataset as a ndarray. - - :raise MLRunInvalidArgumentError: If the dataset type is not supported. +class MLTypes(CommonTypes, ABC): """ - if isinstance(dataset, np.ndarray): - return dataset - if isinstance(dataset, (pd.DataFrame, pd.Series)): - return dataset.to_numpy() - if isinstance(dataset, scipy.sparse.base.spmatrix): - return dataset.toarray() - if isinstance(dataset, list): - return np.array(dataset) - if isinstance(dataset, dict): - return np.array(list(dataset.values())) - raise mlrun.errors.MLRunInvalidArgumentError( - f"Could not convert the given dataset into a numpy ndarray. Supporting conversion from: " - f"'pandas.DataFrame', 'pandas.Series', 'scipy.sparse.base.spmatrix', list and dict. The given dataset was of " - f"type: '{type(dataset)}'" - ) - - -def to_dataframe(dataset: DatasetType) -> pd.DataFrame: + Machine learning frameworks type hints. """ - Convert the given dataset to pd.DataFrame. - :param dataset: The dataset to convert. Must be one of {np.ndarray, pd.Series, scipy.sparse.base.spmatrix, list, - dict}. + # Type for a metric entry, can be passed as the metric function itself, as a callable object, a string of the name + # of the function and the full module path to the function to import. Arguments to use when calling the metric can + # be joined by wrapping it as a tuple: + # TODO: will be moved to SKLearn's framework once LightGBM and XGBoost are fully supported. + MetricEntryType = Union[Tuple[Union[Callable, str], dict], Callable, str] - :return: The dataset as a DataFrame. + # Type for the target column name - a list of indices or column names that are the ground truth (y) of a dataset. + TargetColumnsNamesType = Union[List[str], List[int]] - :raise MLRunInvalidArgumentError: If the dataset type is not supported. - """ - if isinstance(dataset, pd.DataFrame): - return dataset - if isinstance(dataset, (np.ndarray, pd.Series, list, dict)): - return pd.DataFrame(dataset) - if isinstance(dataset, scipy.sparse.base.spmatrix): - return pd.DataFrame.sparse.from_spmatrix(dataset) - raise mlrun.errors.MLRunInvalidArgumentError( - f"Could not convert the given dataset into a pandas DataFrame. Supporting conversion from: " - f"'numpy.ndarray', 'pandas.Series', 'scipy.sparse.base.spmatrix' list and dict. The given dataset was of type: " - f"'{type(dataset)}'" - ) - - -def concatenate_x_y( - x: DatasetType, - y: DatasetType = None, - y_columns: YColumnsType = None, - default_y_column_prefix: str = "y_", -) -> Tuple[pd.DataFrame, Union[YColumnsType, None]]: + +class MLUtils(CommonUtils, ABC): """ - Concatenating the provided x and y data into a single pd.DataFrame, casting from np.ndarray and renaming y's - original columns if 'y_columns' was not provided. The concatenated dataset index level will be reset to 0 - (multi-level indexes will be dropped using pandas 'reset_index' method). - - :param x: A collection of inputs to a model. - :param y: A collection of ground truth labels corresponding to the inputs. - :param y_columns: List of names or indices to give the columns of the ground truth labels. - :param default_y_column_prefix: A default value to join the y columns in case one of them is found in x (so there - won't be any duplicates). Defaulted to: "y_". - :return: A tuple of: - [0] = The concatenated x and y as a single DataFrame. - [1] = The y columns names / indices. + Machine learning frameworks utilities. """ - # Cast x to a DataFrame (from np.ndarray and pd.Series): - x = to_dataframe(dataset=x) - if y is None: - # Reset the indices levels: - x = x.reset_index(drop=True) - return x, None - - # Cast y to a DataFrame (from np.ndarray and pd.Series): - y = to_dataframe(dataset=y) - - # Check if y's columns are given, if not set the default avoiding duplicates with x's columns: - if y_columns is None: - y_columns = [ - column if column not in x.columns else f"{default_y_column_prefix}{column}" - for column in list(y.columns) - ] - # Override the columns with the names the user provided: - y.columns = y_columns - - # Concatenate the x and y into a single dataset: - dataset = pd.concat([x, y], axis=1) + @staticmethod + def concatenate_x_y( + x: CommonTypes.DatasetType, + y: CommonTypes.DatasetType = None, + target_columns_names: MLTypes.TargetColumnsNamesType = None, + default_target_column_prefix: str = "y_", + ) -> Tuple[pd.DataFrame, Union[MLTypes.TargetColumnsNamesType, None]]: + """ + Concatenating the provided x and y data into a single pd.DataFrame, casting from np.ndarray and renaming y's + original columns if 'y_columns' was not provided. The concatenated dataset index level will be reset to 0 + (multi-level indexes will be dropped using pandas 'reset_index' method). + + :param x: A collection of inputs to a model. + :param y: A collection of ground truth labels corresponding to the inputs. + :param target_columns_names: List of names or indices to give the columns of the ground truth labels. + :param default_target_column_prefix: A default value to join the y columns in case one of them is found in x (so + there won't be any duplicates). Defaulted to: "y_". + + :return: A tuple of: + [0] = The concatenated x and y as a single DataFrame. + [1] = The target columns names / indices. + """ + # Cast x to a DataFrame (from np.ndarray and pd.Series): + x = MLUtils.to_dataframe(dataset=x) + if y is None: + # Reset the indices levels: + x = x.reset_index(drop=True) + return x, None + + # Cast y to a DataFrame (from np.ndarray and pd.Series): + y = MLUtils.to_dataframe(dataset=y) + + # Check if y's columns are given, if not set the default avoiding duplicates with x's columns: + if target_columns_names is None: + target_columns_names = [ + column + if column not in x.columns + else f"{default_target_column_prefix}{column}" + for column in list(y.columns) + ] + + # Override the columns with the names the user provided: + y.columns = target_columns_names + + # Concatenate the x and y into a single dataset: + dataset = pd.concat([x, y], axis=1) - # Reset the indices levels: - dataset.reset_index(drop=True, inplace=True) + # Reset the indices levels: + dataset.reset_index(drop=True, inplace=True) - return dataset, y_columns + return dataset, target_columns_names class AlgorithmFunctionality(Enum): @@ -156,7 +114,7 @@ class AlgorithmFunctionality(Enum): @classmethod def get_algorithm_functionality( - cls, model: ModelType, y: DatasetType = None + cls, model: MLTypes.ModelType, y: MLTypes.DatasetType = None ) -> "AlgorithmFunctionality": """ Get the algorithm functionality according to the provided model and ground truth labels. @@ -170,7 +128,7 @@ def get_algorithm_functionality( """ # Convert the provided ground truths to DataFrame: if y is not None: - y = to_dataframe(dataset=y) + y = MLUtils.to_dataframe(dataset=y) # Check for classification: if is_classifier(model): diff --git a/mlrun/frameworks/auto_mlrun/auto_mlrun.py b/mlrun/frameworks/auto_mlrun/auto_mlrun.py index 3de30bf33fa..e2d4b8b7b5b 100644 --- a/mlrun/frameworks/auto_mlrun/auto_mlrun.py +++ b/mlrun/frameworks/auto_mlrun/auto_mlrun.py @@ -4,10 +4,10 @@ import mlrun from mlrun.artifacts import get_model -from .._common import ModelHandler, ModelType +from .._common import CommonTypes, ModelHandler -def get_framework_by_instance(model: ModelType) -> str: +def get_framework_by_instance(model: CommonTypes.ModelType) -> str: """ Get the framework name of the given model by its instance. @@ -103,7 +103,7 @@ def get_framework_by_instance(model: ModelType) -> str: ) -def get_framework_by_class_name(model: ModelType) -> str: +def get_framework_by_class_name(model: CommonTypes.ModelType) -> str: """ Get the framework name of the given model by its class name. @@ -247,7 +247,7 @@ class AutoMLRun: @staticmethod def _get_framework( - model: ModelType = None, model_path: str = None + model: CommonTypes.ModelType = None, model_path: str = None ) -> Union[Tuple[str, dict]]: """ Try to get the framework from the model or model path provided. The framework can be read from the model path @@ -397,7 +397,7 @@ def load_model( @staticmethod def apply_mlrun( - model: ModelType = None, + model: CommonTypes.ModelType = None, model_name: str = None, tag: str = "", model_path: str = None, diff --git a/mlrun/frameworks/lgbm/__init__.py b/mlrun/frameworks/lgbm/__init__.py index 08f61a99f8a..673d665e6b7 100644 --- a/mlrun/frameworks/lgbm/__init__.py +++ b/mlrun/frameworks/lgbm/__init__.py @@ -1,32 +1,171 @@ -import warnings -from typing import Dict, List, Union +# flake8: noqa - this is until we take care of the F401 violations with respect to __all__ & sphinx +from typing import Any, Dict, List, Union import lightgbm as lgb import mlrun - -from .._common import ExtraDataType, get_plans -from .._ml_common import ( - DatasetType, - Metric, - MetricEntry, - MetricsLibrary, - MLArtifactsLibrary, - MLPlan, - PickleModelServer, - get_metrics, +from mlrun.frameworks.lgbm.mlrun_interfaces import ( + LGBMBoosterMLRunInterface, + LGBMMLRunInterface, + LGBMModelMLRunInterface, ) -from .mlrun_interface import LGBMModelMLRunInterface + +from .._ml_common import MLArtifactsLibrary, MLPlan +from ..sklearn import Metric, MetricsLibrary from .model_handler import LGBMModelHandler +from .model_server import LGBMModelServer +from .utils import LGBMTypes, LGBMUtils -# Placeholders as the SciKit-Learn API is commonly used among all of the ML frameworks: +# Placeholders as the SciKit-Learn API is commonly used among all ML frameworks: LGBMArtifactsLibrary = MLArtifactsLibrary -LGBMMetricsLibrary = MetricsLibrary -LGBMModelServer = PickleModelServer + + +def _apply_mlrun_on_module( + model_name: str = "model", + tag: str = "", + modules_map: Union[Dict[str, Union[None, str, List[str]]], str] = None, + custom_objects_map: Union[Dict[str, Union[str, List[str]]], str] = None, + custom_objects_directory: str = None, + context: mlrun.MLClientCtx = None, + model_format: str = LGBMModelHandler.ModelFormats.PKL, + sample_set: Union[LGBMTypes.DatasetType, mlrun.DataItem, str] = None, + y_columns: Union[List[str], List[int]] = None, + feature_vector: str = None, + feature_weights: List[float] = None, + labels: Dict[str, Union[str, int, float]] = None, + parameters: Dict[str, Union[str, int, float]] = None, + extra_data: Dict[str, LGBMTypes.ExtraDataType] = None, + auto_log: bool = True, + mlrun_logging_callback_kwargs: Dict[str, Any] = None, +): + # Apply MLRun's interface on the LightGBM module: + LGBMMLRunInterface.add_interface(obj=lgb) + + # If automatic logging is required, set the future logging callbacks that will be applied to the training functions: + if auto_log: + lgb.configure_logging( + context=context, + model_handler_kwargs={ + "model_name": model_name, + "modules_map": modules_map, + "custom_objects_map": custom_objects_map, + "custom_objects_directory": custom_objects_directory, + "model_format": model_format, + }, + log_model_kwargs={ + "tag": tag, + "sample_set": sample_set, + "target_columns": y_columns, + "feature_vector": feature_vector, + "feature_weights": feature_weights, + "labels": labels, + "parameters": parameters, + "extra_data": extra_data, + }, + mlrun_logging_callback_kwargs=mlrun_logging_callback_kwargs, + ) + + +def _apply_mlrun_on_model( + model: LGBMTypes.ModelType = None, + model_name: str = "model", + tag: str = "", + model_path: str = None, + modules_map: Union[Dict[str, Union[None, str, List[str]]], str] = None, + custom_objects_map: Union[Dict[str, Union[str, List[str]]], str] = None, + custom_objects_directory: str = None, + context: mlrun.MLClientCtx = None, + model_format: str = LGBMModelHandler.ModelFormats.PKL, + artifacts: Union[List[MLPlan], List[str], Dict[str, dict]] = None, + metrics: Union[ + List[Metric], + List[LGBMTypes.MetricEntryType], + Dict[str, LGBMTypes.MetricEntryType], + ] = None, + x_test: LGBMTypes.DatasetType = None, + y_test: LGBMTypes.DatasetType = None, + sample_set: Union[LGBMTypes.DatasetType, mlrun.DataItem, str] = None, + y_columns: Union[List[str], List[int]] = None, + feature_vector: str = None, + feature_weights: List[float] = None, + labels: Dict[str, Union[str, int, float]] = None, + parameters: Dict[str, Union[str, int, float]] = None, + extra_data: Dict[str, LGBMTypes.ExtraDataType] = None, + auto_log: bool = True, + **kwargs +): + # Create a model handler: + model_handler_kwargs = ( + kwargs.pop("model_handler_kwargs") if "model_handler_kwargs" in kwargs else {} + ) + handler = LGBMModelHandler( + model_name=model_name, + model_path=model_path, + model=model, + context=context, + model_format=model_format, + modules_map=modules_map, + custom_objects_map=custom_objects_map, + custom_objects_directory=custom_objects_directory, + **model_handler_kwargs, + ) + + # Set the handler's logging attributes: + handler.set_tag(tag=tag) + if sample_set is not None: + handler.set_sample_set(sample_set=sample_set) + if y_columns is not None: + handler.set_target_columns(target_columns=y_columns) + if feature_vector is not None: + handler.set_feature_vector(feature_vector=feature_vector) + if feature_weights is not None: + handler.set_feature_weights(feature_weights=feature_weights) + if labels is not None: + handler.set_labels(to_add=labels) + if parameters is not None: + handler.set_parameters(to_add=parameters) + if extra_data is not None: + handler.set_extra_data(to_add=extra_data) + + # Load the model if it was not provided: + if handler.model is None: + handler.load() + model = handler.model + + # Add MLRun's interface to the model according to the model type (LGBMModel or Booster): + if isinstance(model, lgb.LGBMModel): + # Apply MLRun's interface on the `LGBMModel`: + LGBMModelMLRunInterface.add_interface(obj=model) + # Configure the logger: + model.configure_logging( + context=context, + plans=LGBMArtifactsLibrary.get_plans( + artifacts=artifacts, + context=context, + include_default=auto_log, + model=model, + y=y_test, + ), + metrics=MetricsLibrary.get_metrics( + metrics=metrics, + context=context, + include_default=auto_log, + model=model, + y=y_test, + ), + x_test=x_test, + y_test=y_test, + model_handler=handler, + ) + else: # lgb.Booster + LGBMBoosterMLRunInterface.add_interface(obj=model) + model.model_handler = handler + + return handler def apply_mlrun( - model: lgb.LGBMModel = None, + model: LGBMTypes.ModelType = None, model_name: str = "model", tag: str = "", model_path: str = None, @@ -34,22 +173,29 @@ def apply_mlrun( custom_objects_map: Union[Dict[str, Union[str, List[str]]], str] = None, custom_objects_directory: str = None, context: mlrun.MLClientCtx = None, + model_format: str = LGBMModelHandler.ModelFormats.PKL, artifacts: Union[List[MLPlan], List[str], Dict[str, dict]] = None, - metrics: Union[List[Metric], List[MetricEntry], Dict[str, MetricEntry]] = None, - x_test: DatasetType = None, - y_test: DatasetType = None, - sample_set: Union[DatasetType, mlrun.DataItem, str] = None, + metrics: Union[ + List[Metric], + List[LGBMTypes.MetricEntryType], + Dict[str, LGBMTypes.MetricEntryType], + ] = None, + x_test: LGBMTypes.DatasetType = None, + y_test: LGBMTypes.DatasetType = None, + sample_set: Union[LGBMTypes.DatasetType, mlrun.DataItem, str] = None, y_columns: Union[List[str], List[int]] = None, feature_vector: str = None, feature_weights: List[float] = None, labels: Dict[str, Union[str, int, float]] = None, parameters: Dict[str, Union[str, int, float]] = None, - extra_data: Dict[str, ExtraDataType] = None, + extra_data: Dict[str, LGBMTypes.ExtraDataType] = None, auto_log: bool = True, + mlrun_logging_callback_kwargs: Dict[str, Any] = None, **kwargs -) -> LGBMModelHandler: +) -> Union[LGBMModelHandler, None]: """ - Wrap the given model with MLRun's interface providing it with mlrun's additional features. + Apply MLRun's interface on top of LightGBM by wrapping the module itself or the given model, providing both with + MLRun's quality of life features. :param model: The model to wrap. Can be loaded from the model path given as well. :param model_name: The model name to use for storing the model artifact. Defaulted to "model". @@ -114,94 +260,63 @@ def apply_mlrun( :param labels: Labels to log with the model. :param parameters: Parameters to log with the model. :param extra_data: Extra data to log with the model. - :param auto_log: Whether or not to apply MLRun's auto logging on the model. Auto logging will add - the default artifacts and metrics to the lists of artifacts and metrics. Defaulted - to True. + :param auto_log: Whether to apply MLRun's auto logging on the model. Auto logging will add the + default artifacts and metrics to the lists of artifacts and metrics. Defaulted to + True. + :param mlrun_logging_callback_kwargs: Key word arguments for the MLRun callback. For further information see the + documentation of the class 'MLRunLoggingCallback'. Note that 'context' is already + given here. - :return: The model handler initialized with the provided model. - """ - if "X_test" in kwargs: - warnings.warn( - "The attribute 'X_test' was changed to 'x_test' and will be removed next version.", - # TODO: Remove in mlrun 1.0.0 - PendingDeprecationWarning, - ) - x_test = kwargs["X_test"] - if "X_train" in kwargs or "y_train" in kwargs: - warnings.warn( - "The attributes 'X_train' and 'y_train' are no longer required and will be removed next version.", - # TODO: Remove in mlrun 1.0.0 - PendingDeprecationWarning, - ) + :return: If a model was provided via `model` or `model_path` the model handler initialized with the provided model + will be returned. Otherwise, None. + """ # Get the default context: if context is None: - context = mlrun.get_or_create_ctx(LGBMModelMLRunInterface.DEFAULT_CONTEXT_NAME) + context = mlrun.get_or_create_ctx(LGBMMLRunInterface.DEFAULT_CONTEXT_NAME) - # Create a model handler: - model_handler_kwargs = ( - kwargs.pop("model_handler_kwargs") if "model_handler_kwargs" in kwargs else {} - ) - handler = LGBMModelHandler( + # If a model or a model path were provided, apply on the provided model, otherwise on the LightGBM module: + if model is None and model_path is None: + _apply_mlrun_on_module( + model_name=model_name, + tag=tag, + modules_map=modules_map, + custom_objects_map=custom_objects_map, + custom_objects_directory=custom_objects_directory, + context=context, + model_format=model_format, + sample_set=sample_set, + y_columns=y_columns, + feature_vector=feature_vector, + feature_weights=feature_weights, + labels=labels, + parameters=parameters, + extra_data=extra_data, + auto_log=auto_log, + mlrun_logging_callback_kwargs=mlrun_logging_callback_kwargs, + ) + return + return _apply_mlrun_on_model( + model=model, model_name=model_name, + tag=tag, model_path=model_path, - model=model, - context=context, modules_map=modules_map, custom_objects_map=custom_objects_map, custom_objects_directory=custom_objects_directory, - **model_handler_kwargs, - ) - - # Load the model if it was not provided: - if model is None: - handler.load() - model = handler.model - - # Set the handler's logging attributes: - handler.set_tag(tag=tag) - if sample_set is not None: - handler.set_sample_set(sample_set=sample_set) - if y_columns is not None: - handler.set_y_columns(y_columns=y_columns) - if feature_vector is not None: - handler.set_feature_vector(feature_vector=feature_vector) - if feature_weights is not None: - handler.set_feature_weights(feature_weights=feature_weights) - if labels is not None: - handler.set_labels(to_add=labels) - if parameters is not None: - handler.set_parameters(to_add=parameters) - if extra_data is not None: - handler.set_extra_data(to_add=extra_data) - - # Add MLRun's interface to the model: - LGBMModelMLRunInterface.add_interface(obj=model) - - # Set the handler to the model: - model.set_model_handler(model_handler=handler) - - # Configure the logger: - model.configure_logger( context=context, - plans=get_plans( - artifacts_library=LGBMArtifactsLibrary, - artifacts=artifacts, - context=context, - include_default=auto_log, - model=model, - y=y_test, - ), - metrics=get_metrics( - metrics_library=LGBMMetricsLibrary, - metrics=metrics, - context=context, - include_default=auto_log, - model=model, - y=y_test, - ), + model_format=model_format, + artifacts=artifacts, + metrics=metrics, x_test=x_test, y_test=y_test, + sample_set=sample_set, + y_columns=y_columns, + feature_vector=feature_vector, + feature_weights=feature_weights, + labels=labels, + parameters=parameters, + extra_data=extra_data, + auto_log=auto_log, + **kwargs, ) - - return handler diff --git a/mlrun/frameworks/lgbm/callbacks/__init__.py b/mlrun/frameworks/lgbm/callbacks/__init__.py new file mode 100644 index 00000000000..124d2856ca3 --- /dev/null +++ b/mlrun/frameworks/lgbm/callbacks/__init__.py @@ -0,0 +1,6 @@ +# flake8: noqa - this is until we take care of the F401 violations with respect to __all__ & sphinx +from .callback import Callback +from .logging_callback import LoggingCallback +from .mlrun_logging_callback import MLRunLoggingCallback + +# TODO: Implement a tensorboard logging callback. diff --git a/mlrun/frameworks/lgbm/callbacks/callback.py b/mlrun/frameworks/lgbm/callbacks/callback.py new file mode 100644 index 00000000000..681b7659779 --- /dev/null +++ b/mlrun/frameworks/lgbm/callbacks/callback.py @@ -0,0 +1,92 @@ +from abc import ABC + +from lightgbm.callback import CallbackEnv + + +class Callback(ABC): + """ + An abstract callback class for writing callbacks usable by LightGBM's training. The function expects a callable + object, as such the `__call__` method will be used. + + There are two configurable class properties: + + * order: int = 10 - The priority of the callback to be called first. Lower value means higher priority. Defaulted to + 10. + * before_iteration: bool = False - Whether to call this callback before each iteration or after. Defaulted to after + (False). + + LightGBM will pass a `CallbackEnv` object to the callbacks - a `NamedTuple` with the following items: + + * model: Booster - The model's Booster object. + * params: Dict - The training parameters passed to 'lgb.train'. + * iteration: int - The current iteration. + * begin_iteration: int - The first iteration. + * end_iteration: int - The last iteration. + * evaluation_result_list: List[Tuple[str, str, float, bool, Optional[float]]] - A list of metric tuples. Each metric + tuple is constructed by: + + * [0] str - The validation set name the metric was calculated on. + * [1] str - The metric name. + * [2] float - The metric score (mean score in case of `lightgbm.cv`). + * [3] bool - The aim of the metric, True means bigger score is better and False means smaller score is better. + * [4] Optional[float] - The metric stdv score (only in case of `lightgbm.cv`) + + In addition, if the LightGBM module or model are wrapped with MLRun, the methods `on_train_begin` and `on_train_end` + will be called as well. + + example:: + + class ExampleCallback(Callback): + def __init__(self, name: str): + self.name = name + + def __call__(self, env: CallbackEnv): + print(f"{self.name}: current iteration: {env.iteration}") + + def on_train_begin(self): + print("{self.name}: Starting training...") + + def on_train_end(self): + print("{self.name}: Done training!") + + apply_mlrun() + lgb.train( + ..., + callbacks=[ExampleCallback(name="Example")] + ) + """ + + def __init__(self, order: int = 10, before_iteration: bool = False): + """ + Initialize a new callback to use in LightGBM's training. + + :param order: The priority of the callback to be called first. Lower value means higher priority. + Defaulted to 10. + :param before_iteration: Whether to call this callback before each iteration or after. Defaulted to after + (False). + """ + self.order = order + self.before_iteration = before_iteration + + def __call__(self, env: CallbackEnv): + """ + The method to be called during training of a LightGBM model. It will be called at the end of each iteration + post validating on all the given validation datasets. + + :param env: The CallbackEnv representing the current iteration of the training. + """ + pass + + def on_train_begin(self): + """ + Method to be called before the training starts. Will only be called if the model is wrapped with an MLRun + interface. + """ + pass + + def on_train_end(self): + """ + Method to be called after the training ends. Will only be called if the model is wrapped with an MLRun + interface. + """ + pass diff --git a/mlrun/frameworks/lgbm/callbacks/logging_callback.py b/mlrun/frameworks/lgbm/callbacks/logging_callback.py new file mode 100644 index 00000000000..8dc9af0543a --- /dev/null +++ b/mlrun/frameworks/lgbm/callbacks/logging_callback.py @@ -0,0 +1,109 @@ +from typing import List + +from ..._ml_common.loggers import Logger +from ..utils import LGBMTypes +from .callback import Callback, CallbackEnv + + +class LoggingCallback(Callback): + """ + A logging callback to collect training data. + """ + + def __init__( + self, + dynamic_hyperparameters: List[str] = None, + static_hyperparameters: List[str] = None, + ): + """ + Initialize the logging callback with the given configuration. All the metrics data will be collected but the + hyperparameters to log must be given. The hyperparameters will be taken from the `params` of the model in each + iteration. + + :param dynamic_hyperparameters: If needed to track a hyperparameter dynamically (sample it each iteration) it + should be passed here. The parameter expects a list of all the hyperparameters + names to track our of the `params` dictionary. + :param static_hyperparameters: If needed to track a hyperparameter one time per run it should be passed here. + The parameter expects a list of all the hyperparameters names to track our of + the `params` dictionary. + """ + super(LoggingCallback, self).__init__() + self._logger = Logger() + self._dynamic_hyperparameters_keys = ( + dynamic_hyperparameters if dynamic_hyperparameters is not None else {} + ) + self._static_hyperparameters_keys = ( + static_hyperparameters if static_hyperparameters is not None else {} + ) + + @property + def logger(self) -> Logger: + """ + Get the logger of the callback. In the logger you may access the collected training data. + + :return: The logger. + """ + return self._logger + + def __call__(self, env: CallbackEnv): + """ + Log the iteration that ended and all the results it calculated. + + :param env: A named tuple passed ad the end of each iteration containing the metrics results. For more + information check the `Callback` doc string. + """ + # Log the iteration: + self._logger.log_iteration() + + # Log the metrics results out of the `evaluation_result_list` field: + self._log_results(evaluation_result_list=env.evaluation_result_list) + + # Log the hyperparameters out of the `params` field: + self._log_hyperparameters(parameters=env.params) + + def _log_results( + self, evaluation_result_list: List[LGBMTypes.EvaluationResultType] + ): + """ + Log the callback environment results data into the logger. + + :param evaluation_result_list: The metrics results as provided by the callback environment of LightGBM. + """ + for evaluation_result in evaluation_result_list: + # Check what results were given, from `lightgbm.train` or `lightgbm.cv`: + if len(evaluation_result) == 4: + # `lightgbm.train` is used: + self._logger.log_result( + validation_set_name=evaluation_result[0], + metric_name=evaluation_result[1], + result=evaluation_result[2], + ) + else: + # `lightgbm.cv` is used, unpack both mean and stdv scores: + self._logger.log_result( + validation_set_name=evaluation_result[0], + metric_name=f"{evaluation_result[1]}_mean", + result=evaluation_result[2], + ) + self._logger.log_result( + validation_set_name=evaluation_result[0], + metric_name=f"{evaluation_result[1]}_stdv", + result=evaluation_result[4], + ) + + def _log_hyperparameters(self, parameters: dict): + """ + Log the callback environment parameters into the logger. + + :param parameters: The parameters as provided by the callback environment of LightGBM. + """ + for parameter_name, value in parameters.items(): + if parameter_name in self._dynamic_hyperparameters_keys: + self._logger.log_dynamic_hyperparameter( + parameter_name=parameter_name, value=value + ) + continue + if parameter_name in self._static_hyperparameters_keys: + self._logger.log_static_hyperparameter( + parameter_name=parameter_name, value=value + ) diff --git a/mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py b/mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py new file mode 100644 index 00000000000..2a33fd5c88e --- /dev/null +++ b/mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py @@ -0,0 +1,80 @@ +from typing import List + +import mlrun + +from ..._ml_common.loggers import MLRunLogger +from .callback import CallbackEnv +from .logging_callback import LoggingCallback + + +class MLRunLoggingCallback(LoggingCallback): + """ + A logging callback to collect training data into MLRun. The logging includes: + + * Per iteration chart artifacts for the metrics results. + * Per iteration chart artifacts for the dynamic hyperparameters values. + * Results table of the training including the static hyperparameters, and the last iteration dynamic hyperparameters + values and metrics results. + """ + + def __init__( + self, + context: mlrun.MLClientCtx, + dynamic_hyperparameters: List[str] = None, + static_hyperparameters: List[str] = None, + logging_frequency: int = 100, + ): + """ + Initialize an MLRun logging callback with the given configuration. All the metrics data will be collected but + the hyperparameters to log must be given. The hyperparameters will be taken from the `params` of the model in + each iteration. + + :param context: MLRun context to log to. The context parameters can be logged as static + hyperparameters. + :param dynamic_hyperparameters: If needed to track a hyperparameter dynamically (sample it each iteration) it + should be passed here. The parameter expects a list of all the hyperparameters + names to track our of the `params` dictionary. + :param static_hyperparameters: If needed to track a hyperparameter one time per run it should be passed here. + The parameter expects a list of all the hyperparameters names to track our of + the `params` dictionary. + :param logging_frequency: Per how many iterations to write the logs to MLRun (create the plots and log + them and the results to MLRun). Two low frequency may slow the training time. + Defaulted to 100. + """ + super(MLRunLoggingCallback, self).__init__( + dynamic_hyperparameters=dynamic_hyperparameters, + static_hyperparameters=static_hyperparameters, + ) + + # Replace the logger with an MLRun logger: + del self._logger + self._logger = MLRunLogger(context=context) + + # Store the logging frequency, it will be compared with the iteration received in the `CallbackEnv` tuple. + self._logging_frequency = logging_frequency + + def __call__(self, env: CallbackEnv): + """ + Log the iteration that ended and all the results it calculated. + + :param env: A named tuple passed ad the end of each iteration containing the metrics results. For more + information check the `Callback` doc string. + """ + # Log the results and parameters: + super(MLRunLoggingCallback, self).__call__(env=env) + + # Produce the artifacts (post iteration stage): + if env.iteration % self._logging_frequency == 0: + self._logger.log_iteration_to_context() + + def on_train_begin(self): + """ + Log the context parameters when training begins. + """ + self._logger.log_context_parameters() + + def on_train_end(self): + """ + Log the last iteration training data into MLRun. + """ + self._logger.log_iteration_to_context() diff --git a/mlrun/frameworks/lgbm/mlrun_interface.py b/mlrun/frameworks/lgbm/mlrun_interface.py deleted file mode 100644 index 795beb6cc6d..00000000000 --- a/mlrun/frameworks/lgbm/mlrun_interface.py +++ /dev/null @@ -1,12 +0,0 @@ -from abc import ABC - -from .._ml_common import MLMLRunInterface - - -class LGBMModelMLRunInterface(MLMLRunInterface, ABC): - """ - Interface for adding MLRun features for LightGBM models (SciKit-Learn API models). - """ - - # MLRun's context default name: - DEFAULT_CONTEXT_NAME = "mlrun-lgbm" diff --git a/mlrun/frameworks/lgbm/mlrun_interfaces/__init__.py b/mlrun/frameworks/lgbm/mlrun_interfaces/__init__.py new file mode 100644 index 00000000000..9d099d54467 --- /dev/null +++ b/mlrun/frameworks/lgbm/mlrun_interfaces/__init__.py @@ -0,0 +1,4 @@ +# flake8: noqa - this is until we take care of the F401 violations with respect to __all__ & sphinx +from .booster_mlrun_interface import LGBMBoosterMLRunInterface +from .mlrun_interface import LGBMMLRunInterface +from .model_mlrun_interface import LGBMModelMLRunInterface diff --git a/mlrun/frameworks/lgbm/mlrun_interfaces/booster_mlrun_interface.py b/mlrun/frameworks/lgbm/mlrun_interfaces/booster_mlrun_interface.py new file mode 100644 index 00000000000..2f61485c067 --- /dev/null +++ b/mlrun/frameworks/lgbm/mlrun_interfaces/booster_mlrun_interface.py @@ -0,0 +1,35 @@ +from abc import ABC + +import lightgbm as lgb + +from ..._common import MLRunInterface +from ..._ml_common import MLModelHandler +from ..utils import LGBMTypes + + +class LGBMBoosterMLRunInterface(MLRunInterface, ABC): + """ + Interface for adding MLRun features for LightGBM models (Booster API). + """ + + _PROPERTIES = { + "model_handler": None, # type: MLModelHandler + } + + @classmethod + def add_interface( + cls, + obj: lgb.Booster, + restoration: LGBMTypes.MLRunInterfaceRestorationType = None, + ): + """ + Enrich the object with this interface properties, methods and functions, so it will have this LightGBM MLRun's + features. + + :param obj: The object to enrich his interface. + :param restoration: Restoration information tuple as returned from 'remove_interface' in order to add the + interface in a certain state. + """ + super(LGBMBoosterMLRunInterface, cls).add_interface( + obj=obj, restoration=restoration + ) diff --git a/mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py b/mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py new file mode 100644 index 00000000000..2b8ed220b75 --- /dev/null +++ b/mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py @@ -0,0 +1,267 @@ +from abc import ABC +from types import ModuleType +from typing import Callable, List + +import lightgbm as lgb + +import mlrun + +from ..._common import MLRunInterface +from ..callbacks import Callback, MLRunLoggingCallback +from ..model_handler import LGBMModelHandler +from ..utils import LGBMTypes, LGBMUtils +from .booster_mlrun_interface import LGBMBoosterMLRunInterface + + +class LGBMMLRunInterface(MLRunInterface, ABC): + """ + Interface for adding MLRun features for the LightGBM module (Training API). + """ + + # Attributes to be inserted so the MLRun interface will be fully enabled. + _PROPERTIES = { + # A list for collecting all the mlrun API callbacks (collected automatically): + "_mlrun_callbacks": [], # type: List[Callback] + # Context to use for logging: + "_context": None, # type: mlrun.MLClientCtx + # Model logging configurations: + "_log_model": False, + "_model_handler_kwargs": {}, + "_log_model_kwargs": {}, # Keyword arguments for the model handler's log method. + # Training logging configurations: + "_log_training": False, + "_mlrun_logging_callback_kwargs": {}, + } + _FUNCTIONS = [ + "configure_logging", + "_parse_parameters", + "_parse_callbacks", + "_pre_train", + "_post_train", + ] + + # Attributes to replace so the MLRun interface will be fully enabled. + _REPLACED_FUNCTIONS = [ + "train", + # TODO: Wrap `cv` as well. + ] + + # MLRun's context default name: + DEFAULT_CONTEXT_NAME = "mlrun-lightgbm" + + @classmethod + def add_interface( + cls, + obj: ModuleType = None, + restoration: LGBMTypes.MLRunInterfaceRestorationType = None, + ): + """ + Enrich the object with this interface properties, methods and functions, so it will have LightGBM MLRun's + features. + + :param obj: The object to enrich his interface. + :param restoration: Restoration information tuple as returned from 'remove_interface' in order to add the + interface in a certain state. + """ + # If the lightgbm module was not provided: + if obj is None: + # Set it to the module imported here: + obj = lgb + # See if lightgbm was imported outside this file's scope: + if all(lgb_import not in globals() for lgb_import in ["lightgbm", "lgb"]): + # Import lightgbm globally: + globals().update({"lightgbm": lgb, "lgb": lgb}) + + # Add the interface to the provided lightgbm module: + super(LGBMMLRunInterface, cls).add_interface(obj=obj, restoration=restoration) + + @staticmethod + def mlrun_train(*args, **kwargs): + """ + MLRun's `lightgbm.train` wrapper. It will parse the parameters and run the training supervised by MLRun. + """ + # Get the training parameters (must be given) and parse them: + parameters, _ = MLRunInterface._get_function_argument( + func=lgb.original_train, + argument_name="params", + passed_args=args, + passed_kwargs=kwargs, + default_value={}, + ) + lgb._parse_parameters(parameters=parameters) + + # Get the training set (must be given): + train_set, _ = MLRunInterface._get_function_argument( + func=lgb.original_train, + argument_name="train_set", + passed_args=args, + passed_kwargs=kwargs, + default_value=None, + ) + x_train = train_set.data + y_train = train_set.label + + # Collect the mlrun callbacks from the provided callbacks: + callbacks, is_given = MLRunInterface._get_function_argument( + func=lgb.original_train, + argument_name="callbacks", + passed_args=args, + passed_kwargs=kwargs, + default_value=[], + ) + lgb._parse_callbacks(callbacks=callbacks) + if is_given is None: + kwargs["callbacks"] = callbacks + + # Call the pre train function: + lgb._pre_train() + + # Call the original train function: + booster = lgb.original_train(*args, **kwargs) + + # Call the post train function: + lgb._post_train(booster=booster, x_train=x_train, y_train=y_train) + + return booster + + @staticmethod + def configure_logging( + context: mlrun.MLClientCtx = None, + log_model: bool = True, + model_handler_kwargs: dict = None, + log_model_kwargs: dict = None, + log_training: bool = True, + mlrun_logging_callback_kwargs: dict = None, + ): + """ + Configure the logging of the training API in LightGBM to log the training and model into MLRun. Each `train` + call will use these configurations to initialize callbacks and log the model at the end of training. + + :param context: MLRun context to log to. + :param log_model: Whether to log the model at the end of training. Defaulted to True. + :param model_handler_kwargs: Keyword arguments to use for initializing the model handler for the newly + trained model. + :param log_model_kwargs: Keyword arguments to use for calling the handler's `log` method. + :param log_training: Whether to log the training metrics and hyperparameters to MLRun. + :param mlrun_logging_callback_kwargs: Keyword arguments to use for initializing the `MLRunLoggingCallback` for + logging the training results during and post training. + + :raise MLRunInvalidArgumentError: In case the 'model' keyword argument was provided in the + `model_handler_kwargs`. + """ + # Store the context: + lgb._context = context + + # Store the given model logging configurations: + lgb._log_model = log_model + if model_handler_kwargs is not None: + if "model" in model_handler_kwargs: + raise mlrun.errors.MLRunInvalidArgumentError( + "The 'model' keyword cannot be passed in the model handler keyword arguments as it will be used to" + "set the booster that was trained." + ) + lgb._model_handler_kwargs = model_handler_kwargs + if log_model_kwargs is not None: + lgb._log_model_kwargs = log_model_kwargs + + # Store the given training logging configurations: + lgb._log_training = log_training + if mlrun_logging_callback_kwargs is not None: + lgb._mlrun_logging_callback_kwargs = mlrun_logging_callback_kwargs + + @staticmethod + def _parse_parameters(parameters: dict): + """ + Parse the parameters passed to the training API functions of LightGBM for enabling MLRun quality of life + features. + + Notice: The parameters dictionary should be edited and not returned as it is passed by reference. + + :param parameters: The `params` argument that was passed. + """ + # TODO: Parse `input_model` - if it's set and is a store path, download it locally and set the parameter to the + # temp path. + pass + + @staticmethod + def _parse_callbacks(callbacks: List[Callable]): + """ + Parse the callbacks passed to the training API functions of LightGBM for adding logging and enabling the MLRun + callbacks API. + + Notice: The callbacks list should be edited and not returned as it is passed by reference. + + :param callbacks: The `callbacks` argument that was passed. + """ + # Check if needed to log training: + if lgb._log_training: + # Add only if it was not given already by the user: + if MLRunLoggingCallback not in [type(callback) for callback in callbacks]: + callbacks.append( + MLRunLoggingCallback( + context=lgb._context, **lgb._mlrun_logging_callback_kwargs + ) + ) + + # Collect all the MLRun API callbacks: + lgb._mlrun_callbacks.clear() + for callback in callbacks: + if isinstance(callback, Callback): + lgb._mlrun_callbacks.append(callback) + + @staticmethod + def _pre_train(): + """ + Called pre-training to call the mlrun callbacks `on_train_begin` method. + """ + for callback in lgb._mlrun_callbacks: + callback.on_train_begin() + + @staticmethod + def _post_train( + booster: lgb.Booster, + x_train: LGBMTypes.DatasetType, + y_train: LGBMTypes.DatasetType, + ): + """ + Called post training to call the mlrun callbacks `on_train_end` method and to log the model. + + :param booster: The booster to log. + :param train_set: The training set that was used to train the given booster. + """ + # Call the `on_train_end` method of the callbacks while collecting extra data from the mlrun logging callback: + extra_data = {} + metrics = {} + for callback in lgb._mlrun_callbacks: + callback.on_train_end() + if isinstance(callback, MLRunLoggingCallback): + extra_data = {**extra_data, **callback.logger.get_artifacts()} + metrics = {**metrics, **callback.logger.get_metrics()} + + # Apply the booster MLRun interface: + LGBMBoosterMLRunInterface.add_interface(obj=booster) + + # Set the handler to the booster: + booster.model_handler = LGBMModelHandler( + model=booster, context=lgb._context, **lgb._model_handler_kwargs + ) + + # Register found extra data and metrics: + booster.model_handler.set_extra_data(to_add=extra_data) + booster.model_handler.set_metrics(to_add=metrics) + + # Set the sample set to the training set if None: + if lgb._log_model_kwargs.get("sample_set", None) is None: + sample_set, target_columns = LGBMUtils.concatenate_x_y( + x=x_train, + y=y_train, + target_columns_names=lgb._log_model_kwargs.get("target_columns", None), + ) + booster.model_handler.set_target_columns(target_columns=target_columns) + booster.model_handler.set_sample_set(sample_set=sample_set) + + # Check if needed to log the model: + if lgb._log_model: + booster.model_handler.log(**lgb._log_model_kwargs) + + lgb._context.commit(completed=False) diff --git a/mlrun/frameworks/lgbm/mlrun_interfaces/model_mlrun_interface.py b/mlrun/frameworks/lgbm/mlrun_interfaces/model_mlrun_interface.py new file mode 100644 index 00000000000..c40ce854d75 --- /dev/null +++ b/mlrun/frameworks/lgbm/mlrun_interfaces/model_mlrun_interface.py @@ -0,0 +1,17 @@ +from abc import ABC + +from ...sklearn import SKLearnMLRunInterface + + +class LGBMModelMLRunInterface(SKLearnMLRunInterface, ABC): + """ + Interface for adding MLRun features for LightGBM models (SciKit-Learn API). + """ + + # TODO: Should be changed from SKLearn's interface to its own, it has the same `params` and callbacks passed to + # `train`. + # TODO: Add to `apply_mlrun` a "use_dask": bool = None argument. A boolean value that will replace the object of a + # SciKit-Learn API `LGBMModel` to its Dask version (`LGBMClassifier` to `DaskLGBMClassifier`). None will look + # for dask parameters in the given context and turn on and off accordingly. + # MLRun's context default name: + DEFAULT_CONTEXT_NAME = "mlrun-lgbm" diff --git a/mlrun/frameworks/lgbm/model_handler.py b/mlrun/frameworks/lgbm/model_handler.py index f6daaeac343..daca43ea840 100644 --- a/mlrun/frameworks/lgbm/model_handler.py +++ b/mlrun/frameworks/lgbm/model_handler.py @@ -8,8 +8,10 @@ import mlrun from .._common import without_mlrun_interface -from .._ml_common import DatasetType, MLModelHandler -from .mlrun_interface import LGBMModelMLRunInterface +from .._ml_common import MLModelHandler +from .mlrun_interfaces.booster_mlrun_interface import LGBMBoosterMLRunInterface +from .mlrun_interfaces.model_mlrun_interface import LGBMModelMLRunInterface +from .utils import LGBMTypes class LGBMModelHandler(MLModelHandler): @@ -28,11 +30,18 @@ class ModelFormats: PKL = "pkl" TXT = "txt" + class _LabelKeys: + """ + Required labels keys to log with the model. + """ + + MODEL_FORMAT = "model_format" + def __init__( self, model_name: str = None, model_path: str = None, - model: lgb.LGBMModel = None, + model: LGBMTypes.ModelType = None, modules_map: Union[Dict[str, Union[None, str, List[str]]], str] = None, custom_objects_map: Union[Dict[str, Union[str, List[str]]], str] = None, custom_objects_directory: str = None, @@ -41,7 +50,7 @@ def __init__( **kwargs, ): """ - Initialize the handler. The model can be set here so it won't require loading. Note you must provide at least + Initialize the handler. The model can be set here, so it won't require loading. Note you must provide at least one of 'model' and 'model_path'. If a model is not given, the files in the model path will be collected automatically to be ready for loading. @@ -53,17 +62,21 @@ def __init__( * Mandatory for loading the model from a local path. * If given a logged model (store model path) it will be read from the artifact. * If given a loaded model object and the model name is None, the name will be - set to the model's object name / class. - :param modules_map: A dictionary of all the modules required for loading the model. Each key - is a path to a module and its value is the object name to import from it. All - the modules will be imported globally. If multiple objects needed to be - imported from the same module a list can be given. The map can be passed as a - path to a json file as well. For example: - { - "module1": None, # => import module1 - "module2": ["func1", "func2"], # => from module2 import func1, func2 - "module3.sub_module": "func3", # => from module3.sub_module import func3 - } + set to the model's object class. + :param modules_map: A dictionary of all the modules required for loading the model. Each key is a + path to a module and its value is the object name to import from it. All the + modules will be imported globally. If multiple objects needed to be imported + from the same module a list can be given. The map can be passed as a path to a + json file as well. For example: + + .. code-block:: python + + { + "module1": None, # import module1 + "module2": ["func1", "func2"], # from module2 import func1, func2 + "module3.sub_module": "func3", # from module3.sub_module import func3 + } + If the model path given is of a store object, the modules map will be read from the logged modules map artifact of the model. :param custom_objects_map: A dictionary of all the custom objects required for loading the model. Each key @@ -71,10 +84,14 @@ def __init__( from it. If multiple objects needed to be imported from the same py file a list can be given. The map can be passed as a path to a json file as well. For example: - { - "/.../custom_optimizer.py": "optimizer", - "/.../custom_layers.py": ["layer1", "layer2"] - } + + .. code-block:: python + + { + "/.../custom_model.py": "MyModel", + "/.../custom_objects.py": ["object1", "object2"] + } + All the paths will be accessed from the given 'custom_objects_directory', meaning each py file will be read from 'custom_objects_directory/'. If the model path given is of a store object, the custom objects map will be @@ -89,7 +106,8 @@ def __init__( model. :param context: MLRun context to work with for logging the model. :param model_format: The format to use for saving and loading the model. Should be passed as a - member of the class 'ModelFormats'. Defaulted to 'ModelFormats.PKL'. + member of the class 'LGBMModelHandler.ModelFormats'. Defaulted to + 'LGBMModelHandler.ModelFormats.PKL'. :raise MLRunInvalidArgumentError: In case one of the given parameters are invalid. """ @@ -102,14 +120,11 @@ def __init__( f"Unrecognized model format: '{model_format}'. Please use one of the class members of " "'TFKerasModelHandler.ModelFormats'" ) - if model_format == LGBMModelHandler.ModelFormats.TXT: - raise NotImplementedError( - "TXT model format is not yet implemented for LightGBM." - ) # Store the configuration: self._model_format = model_format + # Set up the base handler class: super(LGBMModelHandler, self).__init__( model=model, model_path=model_path, @@ -121,6 +136,23 @@ def __init__( **kwargs, ) + def set_labels( + self, + to_add: Dict[str, Union[str, int, float]] = None, + to_remove: List[str] = None, + ): + """ + Update the labels dictionary of this model artifact. There are required labels that cannot be edited or removed. + + :param to_add: The labels to add. + :param to_remove: A list of labels keys to remove. + """ + # Update the user's labels: + super(LGBMModelHandler, self).set_labels(to_add=to_add, to_remove=to_remove) + + # Set the required labels: + self._labels[self._LabelKeys.MODEL_FORMAT] = self._model_format + def _collect_files_from_local_path(self): """ If the model path given is of a local path, search for the needed model files and collect them into this handler @@ -128,16 +160,15 @@ def _collect_files_from_local_path(self): :raise MLRunNotFoundError: If the model file was not found. """ - # ModelFormats.PKL - Get the pickle model file: - if self._model_format == LGBMModelHandler.ModelFormats.PKL: - self._model_file = os.path.join(self._model_path, f"{self._model_name}.pkl") - if not os.path.exists(self._model_file): - raise mlrun.errors.MLRunNotFoundError( - f"The model file '{self._model_name}.pkl' was not found within the given 'model_path': " - f"'{self._model_path}'" - ) + self._model_file = os.path.join( + self._model_path, f"{self._model_name}.{self._model_format}" + ) + if not os.path.exists(self._model_file): + raise mlrun.errors.MLRunNotFoundError( + f"The model file '{self._model_name}.{self._model_format}' was not found within the given " + f"'model_path': '{self._model_path}'" + ) - @without_mlrun_interface(interface=LGBMModelMLRunInterface) def save(self, output_path: str = None, **kwargs): """ Save the handled model at the given output path. If a MLRun context is available, the saved model files will be @@ -150,13 +181,9 @@ def save(self, output_path: str = None, **kwargs): """ super(LGBMModelHandler, self).save(output_path=output_path) - # ModelFormats.PICKLE - Save from a pkl file: - if self._model_format == LGBMModelHandler.ModelFormats.PKL: - self._model_file = f"{self._model_name}.pkl" - with open(self._model_file, "wb") as pickle_file: - cloudpickle.dump(self._model, pickle_file) - - return None + if isinstance(self._model, lgb.LGBMModel): + return self._save_lgbmmodel() + return self._save_booster() def load(self, **kwargs): """ @@ -165,16 +192,20 @@ def load(self, **kwargs): """ super(LGBMModelHandler, self).load() - # ModelFormats.PICKLE - Load from a pkl file: + # ModelFormats.PKL - Load from a pkl file: if self._model_format == LGBMModelHandler.ModelFormats.PKL: with open(self._model_file, "rb") as pickle_file: self._model = pickle.load(pickle_file) + return + + # ModelFormats.TXT: + self._model = lgb.Booster(model_file=self._model_file) def to_onnx( self, model_name: str = None, optimize: bool = True, - input_sample: DatasetType = None, + input_sample: LGBMTypes.DatasetType = None, log: bool = None, ): """ @@ -191,7 +222,7 @@ def to_onnx( :return: The converted ONNX model (onnx.ModelProto). - :raise MLRunMissingDependencyError: If some of the ONNX packages are missing. + :raise MLRunMissingDependencyError: If some ONNX packages are missing. """ # Import onnx related modules: try: @@ -206,3 +237,41 @@ def to_onnx( ) raise NotImplementedError # TODO: Finish ONNX conversion + + @without_mlrun_interface(interface=LGBMModelMLRunInterface) + def _save_lgbmmodel(self): + """ + Save the model handled in this handler (must be a type of `lgb.LGBMModel`). + """ + # ModelFormats.PKL: + if self._model_format == LGBMModelHandler.ModelFormats.PKL: + self._model_file = f"{self._model_name}.pkl" + with open(self._model_file, "wb") as pickle_file: + cloudpickle.dump(self._model, pickle_file) + return + + # Other model formats require saving directly from the booster object: + return self._save_booster(booster=self.model.booster_) + + @without_mlrun_interface(interface=LGBMBoosterMLRunInterface) + def _save_booster(self, booster: lgb.Booster = None): + """ + Save the booster object in the format saved in this handler. If not booster passed, the model handled in this + handler will be used (must be a type of `lgb.Booster` object). + + :param booster: The booster object to save. + """ + # Set the model as the booster to save in case no booster was provided: + if booster is None: + booster = self._model + + # ModelFormats.PICKLE: + if self._model_format == LGBMModelHandler.ModelFormats.PKL: + self._model_file = f"{self._model_name}.pkl" + with open(self._model_file, "wb") as pickle_file: + cloudpickle.dump(booster, pickle_file) + return + + # ModelFormats.TXT: + self._model_file = f"{self._model_name}.txt" + booster.save_model(self._model_file) diff --git a/mlrun/frameworks/lgbm/model_server.py b/mlrun/frameworks/lgbm/model_server.py new file mode 100644 index 00000000000..ffb3c1ad89d --- /dev/null +++ b/mlrun/frameworks/lgbm/model_server.py @@ -0,0 +1,148 @@ +from typing import Any, Dict, List, Union + +import numpy as np + +import mlrun +from mlrun.serving.v2_serving import V2ModelServer + +from .model_handler import LGBMModelHandler +from .utils import LGBMTypes + + +class LGBMModelServer(V2ModelServer): + """ + LightGBM Model serving class, inheriting the V2ModelServer class for being initialized automatically by the model + server and be able to run locally as part of a nuclio serverless function, or as part of a real-time pipeline. + """ + + def __init__( + self, + context: mlrun.MLClientCtx, + name: str, + model: LGBMTypes.ModelType = None, + model_path: LGBMTypes.PathType = None, + model_name: str = None, + model_format: str = LGBMModelHandler.ModelFormats.PKL, + modules_map: Union[Dict[str, Union[None, str, List[str]]], str] = None, + custom_objects_map: Union[Dict[str, Union[str, List[str]]], str] = None, + custom_objects_directory: str = None, + to_list: bool = True, + protocol: str = None, + **class_args, + ): + """ + Initialize a serving class for a torch model. + + :param context: The mlrun context to work with. + :param name: The name of this server to be initialized. + :param model: Model to handle or None in case a loading parameters were supplied. + :param model_path: Path to the model's directory with the saved '.pt' file. The file must start + with the given model name. The model path can be also passed as a model object + path in the following format: + 'store://models//:'. + :param model_name: The model name for saving and logging the model: + * Mandatory for loading the model from a local path. + * If given a logged model (store model path) it will be read from the artifact. + * If given a loaded model object and the model name is None, the name will be + set to the model's object name / class. + :param model_class: The model's class type object. Can be passed as the class's name (string) as + well. The model class must appear in the custom objects / modules map + dictionary / json. If the model path given is of a store object, this model + class name will be read from the logged label of the model. + :param modules_map: A dictionary of all the modules required for loading the model. Each key + is a path to a module and its value is the object name to import from it. All + the modules will be imported globally. If multiple objects needed to be + imported from the same module a list can be given. The map can be passed as a + path to a json file as well. For example: + { + "module1": None, # => import module1 + "module2": ["func1", "func2"], # => from module2 import func1, func2 + "module3.sub_module": "func3", # => from module3.sub_module import func3 + } + If the model path given is of a store object, the modules map will be read from + the logged modules map artifact of the model. + :param custom_objects_map: A dictionary of all the custom objects required for loading the model. Each key + is a path to a python file and its value is the custom object name to import + from it. If multiple objects needed to be imported from the same py file a list + can be given. The map can be passed as a path to a json file as well. For + example: + { + "/.../custom_optimizer.py": "optimizer", + "/.../custom_layers.py": ["layer1", "layer2"] + } + All the paths will be accessed from the given 'custom_objects_directory', + meaning each py file will be read from 'custom_objects_directory/'. + If the model path given is of a store object, the custom objects map will be + read from the logged custom object map artifact of the model. + Notice: The custom objects will be imported in the order they came in this + dictionary (or json). If a custom object is depended on another, make sure to + put it below the one it relies on. + :param custom_objects_directory: Path to the directory with all the python files required for the custom + objects. Can be passed as a zip file as well (will be extracted during the run + before loading the model). If the model path given is of a store object, the + custom objects files will be read from the logged custom object artifact of the + model. + :param to_list: Whether to return a list instead of a `np.ndarray`. Defaulted to True. + :param protocol: - + :param class_args: - + """ + super(LGBMModelServer, self).__init__( + context=context, + name=name, + model_path=model_path, + model=model, + protocol=protocol, + **class_args, + ) + + # Set up a model handler: + self._model_handler = LGBMModelHandler( + model_path=model_path, + model=model, + model_name=model_name, + modules_map=modules_map, + custom_objects_map=custom_objects_map, + custom_objects_directory=custom_objects_directory, + context=self.context, + model_format=model_format, + ) + + # Store preferences: + self._to_list = to_list + + def load(self): + """ + Use the model handler to load the model. + """ + if self._model_handler.model is None: + self._model_handler.load() + self.model = self._model_handler.model + + def predict(self, request: Dict[str, Any]) -> Union[np.ndarray, list]: + """ + Infer the inputs through the model using MLRun's PyTorch interface and return its output. The inferred data will + be read from the "inputs" key of the request. + + :param request: The request to the model. The input to the model will be read from the "inputs" key. + + :return: The model's prediction on the given input. If 'to_list' was set to True in initialization, a list will + be returned instead of a torch.Tensor. + """ + # Get the inputs: + inputs = request["inputs"] + + # Predict: + predictions = self.model.predict(inputs) + + # Return as list if required: + return predictions if not self._to_list else predictions.tolist() + + def explain(self, request: Dict[str, Any]) -> str: + """ + Return a string explaining what model is being served in this serving function and the function name. + + :param request: A given request. + + :return: Explanation string. + """ + return f"The '{type(self.model)}' model serving function named '{self.name}'" diff --git a/mlrun/frameworks/lgbm/utils.py b/mlrun/frameworks/lgbm/utils.py new file mode 100644 index 00000000000..ee94ddeb08f --- /dev/null +++ b/mlrun/frameworks/lgbm/utils.py @@ -0,0 +1,92 @@ +from typing import List, Tuple, Union + +import lightgbm as lgb +import numpy as np +import pandas as pd + +import mlrun.errors + +from .._ml_common import MLTypes, MLUtils + + +class LGBMTypes(MLTypes): + """ + Typing hints for the LightGBM framework. + """ + + # A union of all LightGBM model base classes: + ModelType = Union[lgb.LGBMModel, lgb.Booster] + + # A type for all the supported dataset types: + DatasetType = Union[MLTypes.DatasetType, lgb.Dataset] + + # An evaluation result as packaged by the training in LightGBM: + EvaluationResultType = Union[ + Tuple[str, str, float, bool], # As packaged in `lightgbm.train` + Tuple[str, str, float, bool, float], # As packaged in `lightgbm.cv` + ] + + # Detailed type for the named tuple `CallbackEnv` passed during LightGBM's training for the callbacks: + CallbackEnvType = Tuple[ + lgb.Booster, dict, int, int, int, List[EvaluationResultType] + ] + + +class LGBMUtils(MLUtils): + """ + Utilities functions for the LightGBM framework. + """ + + @staticmethod + def to_array(dataset: LGBMTypes.DatasetType) -> np.ndarray: + """ + Convert the given dataset to np.ndarray. + + :param dataset: The dataset to convert. Must be one of {lgb.Dataset, pd.DataFrame, pd.Series, + scipy.sparse.base.spmatrix, list, tuple, dict}. + + :return: The dataset as a ndarray. + + :raise MLRunInvalidArgumentError: If the dataset type is not supported. + """ + if isinstance(dataset, lgb.Dataset): + x = LGBMUtils.to_array(dataset=dataset.data) + if dataset.label is None: + return x + y = LGBMUtils.to_array(dataset=dataset.label) + return LGBMUtils.to_array(LGBMUtils.concatenate_x_y(x=x, y=y)[0]) + try: + return MLUtils.to_array(dataset=dataset) + except mlrun.errors.MLRunInvalidArgumentError: + raise mlrun.errors.MLRunInvalidArgumentError( + f"Could not convert the given dataset into a numpy ndarray. Supporting conversion from: " + f"{LGBMUtils.get_union_typehint_string(LGBMTypes.DatasetType)}. The given dataset was of type: " + f"'{type(dataset)}'" + ) + + @staticmethod + def to_dataframe(dataset: LGBMTypes.DatasetType) -> pd.DataFrame: + """ + Convert the given dataset to pd.DataFrame. + + :param dataset: The dataset to convert. Must be one of {lgb.Dataset, np.ndarray, pd.Series, + scipy.sparse.base.spmatrix, list, tuple, dict}. + + :return: The dataset as a DataFrame. + + :raise MLRunInvalidArgumentError: If the dataset type is not supported. + """ + if isinstance(dataset, lgb.Dataset): + x = LGBMUtils.to_dataframe(dataset=dataset.data) + if dataset.label is None: + return x + y = LGBMUtils.to_dataframe(dataset=dataset.label) + return LGBMUtils.concatenate_x_y(x=x, y=y)[0] + try: + return MLUtils.to_dataframe(dataset=dataset) + except mlrun.errors.MLRunInvalidArgumentError: + raise mlrun.errors.MLRunInvalidArgumentError( + f"Could not convert the given dataset into a pandas DataFrame. Supporting conversion from: " + f"{LGBMUtils.get_union_typehint_string(LGBMTypes.DatasetType)}. The given dataset was of type: " + f"'{type(dataset)}'" + ) diff --git a/mlrun/frameworks/onnx/model_server.py b/mlrun/frameworks/onnx/model_server.py index f2c4f6242e6..60bbdd4423c 100644 --- a/mlrun/frameworks/onnx/model_server.py +++ b/mlrun/frameworks/onnx/model_server.py @@ -32,16 +32,16 @@ def __init__( :param context: The mlrun context to work with. :param name: The model name to be served. - :param model: Model to handle or None in case a loading parameters were supplied. - :param model_path: Path to the model's directory to load it from. The onnx file must start with the given model - name and the directory must contain the onnx file. The model path can be also passed as a - model object path in the following format: - 'store://models//:'. - :param model_name: The model name for saving and logging the model: - * Mandatory for loading the model from a local path. - * If given a logged model (store model path) it will be read from the artifact. - * If given a loaded model object and the model name is None, the name will be set to the - model's object name / class. + :param model: Model to handle or None in case a loading parameters were supplied. + :param model_path: Path to the model's directory to load it from. The onnx file must start with the + given model name and the directory must contain the onnx file. The model path can be + also passed as a model object path in the following format: + 'store://models//:'. + :param model_name: The model name for saving and logging the model: + * Mandatory for loading the model from a local path. + * If given a logged model (store model path) it will be read from the artifact. + * If given a loaded model object and the model name is None, the name will be set to + the model's object name / class. :param execution_providers: List of the execution providers. The first provider in the list will be the most preferred. For example, a CUDA execution provider with configurations and a CPU execution provider: diff --git a/mlrun/frameworks/pytorch/__init__.py b/mlrun/frameworks/pytorch/__init__.py index 2db799752c7..52f443b3453 100644 --- a/mlrun/frameworks/pytorch/__init__.py +++ b/mlrun/frameworks/pytorch/__init__.py @@ -7,11 +7,12 @@ import mlrun -from .callbacks import Callback, MetricFunctionType, MetricValueType +from .callbacks import Callback from .callbacks_handler import CallbacksHandler from .mlrun_interface import PyTorchMLRunInterface from .model_handler import PyTorchModelHandler from .model_server import PyTorchModelServer +from .utils import PyTorchTypes, PyTorchUtils def train( @@ -20,7 +21,7 @@ def train( loss_function: Module, optimizer: Optimizer, validation_set: DataLoader = None, - metric_functions: List[MetricFunctionType] = None, + metric_functions: List[PyTorchTypes.MetricFunctionType] = None, scheduler=None, scheduler_step_frequency: Union[int, float, str] = "epoch", epochs: int = 1, @@ -190,7 +191,7 @@ def evaluate( dataset: DataLoader, model: Module = None, loss_function: Module = None, - metric_functions: List[MetricFunctionType] = None, + metric_functions: List[PyTorchTypes.MetricFunctionType] = None, iterations: int = None, callbacks_list: List[Callback] = None, use_cuda: bool = True, @@ -202,7 +203,7 @@ def evaluate( custom_objects_directory: str = None, mlrun_callback_kwargs: Dict[str, Any] = None, context: mlrun.MLClientCtx = None, -) -> Tuple[PyTorchModelHandler, List[MetricValueType]]: +) -> Tuple[PyTorchModelHandler, List[PyTorchTypes.MetricValueType]]: """ Use MLRun's PyTorch interface to evaluate the model with the given parameters. For more information and further options regarding the auto logging, see 'PyTorchMLRunInterface' documentation. Notice for auto-logging: In order to diff --git a/mlrun/frameworks/pytorch/callbacks/__init__.py b/mlrun/frameworks/pytorch/callbacks/__init__.py index 45005e8cfbd..677e6bceee0 100644 --- a/mlrun/frameworks/pytorch/callbacks/__init__.py +++ b/mlrun/frameworks/pytorch/callbacks/__init__.py @@ -1,5 +1,5 @@ # flake8: noqa - this is until we take care of the F401 violations with respect to __all__ & sphinx -from .callback import Callback, MetricFunctionType, MetricValueType -from .logging_callback import HyperparametersKeys, LoggingCallback, TrackableType +from .callback import Callback +from .logging_callback import HyperparametersKeys, LoggingCallback from .mlrun_logging_callback import MLRunLoggingCallback from .tensorboard_logging_callback import TensorboardLoggingCallback diff --git a/mlrun/frameworks/pytorch/callbacks/callback.py b/mlrun/frameworks/pytorch/callbacks/callback.py index 4c0c7b013c9..e3c28d5c0e8 100644 --- a/mlrun/frameworks/pytorch/callbacks/callback.py +++ b/mlrun/frameworks/pytorch/callbacks/callback.py @@ -1,17 +1,12 @@ from abc import ABC, abstractmethod -from typing import Callable, List, Union +from typing import List -import numpy as np from torch import Tensor from torch.nn import Module from torch.optim import Optimizer from torch.utils.data import DataLoader -# Supported types of loss and metrics values: -MetricValueType = Union[int, float, np.ndarray, Tensor] - -# Supported types of metrics: -MetricFunctionType = Union[Callable[[Tensor, Tensor], MetricValueType], Module] +from ..utils import PyTorchTypes class Callback(ABC): @@ -59,7 +54,7 @@ def on_setup( validation_set: DataLoader = None, loss_function: Module = None, optimizer: Optimizer = None, - metric_functions: List[MetricFunctionType] = None, + metric_functions: List[PyTorchTypes.MetricFunctionType] = None, scheduler=None, ): """ @@ -132,7 +127,7 @@ def on_validation_begin(self): pass def on_validation_end( - self, loss_value: MetricValueType, metric_values: List[float] + self, loss_value: PyTorchTypes.MetricValueType, metric_values: List[float] ) -> bool: """ Before the validation (in a training case it will be per epoch) ends, this method will be called. @@ -221,7 +216,7 @@ def on_train_loss_begin(self): """ pass - def on_train_loss_end(self, loss_value: MetricValueType): + def on_train_loss_end(self, loss_value: PyTorchTypes.MetricValueType): """ After the training calculation of the loss, this method will be called. @@ -235,7 +230,7 @@ def on_validation_loss_begin(self): """ pass - def on_validation_loss_end(self, loss_value: MetricValueType): + def on_validation_loss_end(self, loss_value: PyTorchTypes.MetricValueType): """ After the validating calculation of the loss, this method will be called. @@ -249,7 +244,7 @@ def on_train_metrics_begin(self): """ pass - def on_train_metrics_end(self, metric_values: List[MetricValueType]): + def on_train_metrics_end(self, metric_values: List[PyTorchTypes.MetricValueType]): """ After the training calculation of the metrics, this method will be called. @@ -263,7 +258,9 @@ def on_validation_metrics_begin(self): """ pass - def on_validation_metrics_end(self, metric_values: List[MetricValueType]): + def on_validation_metrics_end( + self, metric_values: List[PyTorchTypes.MetricValueType] + ): """ After the validating calculation of the metrics, this method will be called. diff --git a/mlrun/frameworks/pytorch/callbacks/logging_callback.py b/mlrun/frameworks/pytorch/callbacks/logging_callback.py index 61ed2765fa2..54939eb834b 100644 --- a/mlrun/frameworks/pytorch/callbacks/logging_callback.py +++ b/mlrun/frameworks/pytorch/callbacks/logging_callback.py @@ -6,9 +6,10 @@ import mlrun -from ..._common import TrackableType -from ..._dl_common.loggers import Logger, LoggerMode -from .callback import Callback, MetricFunctionType, MetricValueType +from ..._common import LoggingMode +from ..._dl_common.loggers import Logger +from ..utils import PyTorchTypes +from .callback import Callback class HyperparametersKeys: @@ -45,10 +46,14 @@ def __init__( self, context: mlrun.MLClientCtx = None, dynamic_hyperparameters: Dict[ - str, Tuple[str, Union[List[Union[str, int]], Callable[[], TrackableType]]] + str, + Tuple[ + str, + Union[List[Union[str, int]], Callable[[], PyTorchTypes.TrackableType]], + ], ] = None, static_hyperparameters: Dict[ - str, Union[TrackableType, Tuple[str, List[Union[str, int]]]] + str, Union[PyTorchTypes.TrackableType, Tuple[str, List[Union[str, int]]]] ] = None, auto_log: bool = False, ): @@ -118,7 +123,7 @@ def get_validation_results(self) -> Dict[str, List[List[float]]]: """ return self._logger.validation_results - def get_static_hyperparameters(self) -> Dict[str, TrackableType]: + def get_static_hyperparameters(self) -> Dict[str, PyTorchTypes.TrackableType]: """ Get the static hyperparameters logged. The hyperparameters will be stored in a dictionary where each key is the hyperparameter name and the value is his logged value. @@ -127,7 +132,9 @@ def get_static_hyperparameters(self) -> Dict[str, TrackableType]: """ return self._logger.static_hyperparameters - def get_dynamic_hyperparameters(self) -> Dict[str, List[TrackableType]]: + def get_dynamic_hyperparameters( + self, + ) -> Dict[str, List[PyTorchTypes.TrackableType]]: """ Get the dynamic hyperparameters logged. The hyperparameters will be stored in a dictionary where each key is the hyperparameter name and the value is a list of his logged values per epoch. @@ -283,10 +290,10 @@ def on_validation_begin(self): """ if self._is_training is None: self._is_training = False - self._logger.set_mode(mode=LoggerMode.EVALUATION) + self._logger.set_mode(mode=LoggingMode.EVALUATION) def on_validation_end( - self, loss_value: MetricValueType, metric_values: List[float] + self, loss_value: PyTorchTypes.MetricValueType, metric_values: List[float] ): """ Before the validation (in a training case it will be per epoch) ends, this method will be called to log the @@ -336,7 +343,7 @@ def on_validation_batch_begin(self, batch: int, x: Tensor, y_true: Tensor): """ self._logger.log_validation_iteration() - def on_train_loss_end(self, loss_value: MetricValueType): + def on_train_loss_end(self, loss_value: PyTorchTypes.MetricValueType): """ After the training calculation of the loss, this method will be called to log the loss value. @@ -350,7 +357,7 @@ def on_train_loss_end(self, loss_value: MetricValueType): result=float(loss_value), ) - def on_validation_loss_end(self, loss_value: MetricValueType): + def on_validation_loss_end(self, loss_value: PyTorchTypes.MetricValueType): """ After the validating calculation of the loss, this method will be called to log the loss value. @@ -364,7 +371,7 @@ def on_validation_loss_end(self, loss_value: MetricValueType): result=float(loss_value), ) - def on_train_metrics_end(self, metric_values: List[MetricValueType]): + def on_train_metrics_end(self, metric_values: List[PyTorchTypes.MetricValueType]): """ After the training calculation of the metrics, this method will be called to log the metrics values. @@ -380,7 +387,9 @@ def on_train_metrics_end(self, metric_values: List[MetricValueType]): result=float(metric_value), ) - def on_validation_metrics_end(self, metric_values: List[MetricValueType]): + def on_validation_metrics_end( + self, metric_values: List[PyTorchTypes.MetricValueType] + ): """ After the validating calculation of the metrics, this method will be called to log the metrics values. @@ -407,45 +416,48 @@ def _add_auto_hyperparameters(self): self._logger.log_context_parameters() # Add batch size: - bath_size_key = "batch_size" - if bath_size_key not in self._static_hyperparameters_keys: + if "batch_size" not in self._static_hyperparameters_keys: if self._objects[self._ObjectKeys.TRAINING_SET] is not None and hasattr( self._objects[self._ObjectKeys.TRAINING_SET], "batch_size" ): - self._static_hyperparameters_keys[bath_size_key] = getattr( + self._static_hyperparameters_keys["batch_size"] = getattr( self._objects[self._ObjectKeys.TRAINING_SET], "batch_size" ) elif self._objects[self._ObjectKeys.VALIDATION_SET] is not None and hasattr( self._objects[self._ObjectKeys.VALIDATION_SET], "batch_size" ): - self._static_hyperparameters_keys[bath_size_key] = getattr( + self._static_hyperparameters_keys["batch_size"] = getattr( self._objects[self._ObjectKeys.VALIDATION_SET], "batch_size" ) # Add learning rate: - learning_rate_key = "lr" learning_rate_key_chain = ( HyperparametersKeys.OPTIMIZER, ["param_groups", 0, "lr"], ) - if learning_rate_key not in self._dynamic_hyperparameters_keys: + if ( + "lr" not in self._dynamic_hyperparameters_keys + and "learning_rate" not in self._dynamic_hyperparameters_keys + ): if self._objects[self._ObjectKeys.OPTIMIZER] is not None: try: + # Try to get the learning rate value: self._get_hyperparameter( source=learning_rate_key_chain[0], key_chain=learning_rate_key_chain[1], ) - self._dynamic_hyperparameters_keys[ - learning_rate_key - ] = learning_rate_key_chain + # If it passes without raising an exception, store the keychain ot it: + self._dynamic_hyperparameters_keys["lr"] = learning_rate_key_chain except (TypeError, KeyError, IndexError, ValueError): pass def _get_hyperparameter( self, source: str, - key_chain: Union[List[Union[str, int]], Callable[[], TrackableType]], - ) -> TrackableType: + key_chain: Union[ + List[Union[str, int]], Callable[[], PyTorchTypes.TrackableType] + ], + ) -> PyTorchTypes.TrackableType: """ Access the hyperparameter from the source using the given key chain. @@ -476,11 +488,7 @@ def _get_hyperparameter( value = self._objects[source] for key in key_chain: try: - if ( - isinstance(value, dict) - or isinstance(value, list) - or isinstance(value, tuple) - ): + if isinstance(value, (dict, list, tuple)): value = value[key] else: value = getattr(value, key) @@ -491,7 +499,7 @@ def _get_hyperparameter( ) # Parse the value: - if isinstance(value, Tensor) or isinstance(value, Parameter): + if isinstance(value, (Tensor, Parameter)): if value.numel() == 1: value = float(value) else: @@ -507,12 +515,7 @@ def _get_hyperparameter( f"The parameter with the following key chain: {key_chain} is a numpy.ndarray with {value.size} " f"elements. numpy arrays are trackable only if they have 1 element." ) - elif not ( - isinstance(value, float) - or isinstance(value, int) - or isinstance(value, str) - or isinstance(value, bool) - ): + elif not isinstance(value, (float, int, str, bool)): raise mlrun.errors.MLRunInvalidArgumentError( f"The parameter with the following key chain: {key_chain} is of type '{type(value)}'. " f"The only trackable types are: float, int, str and bool." @@ -520,7 +523,7 @@ def _get_hyperparameter( return value @staticmethod - def _get_metric_name(metric_function: MetricFunctionType): + def _get_metric_name(metric_function: PyTorchTypes.MetricFunctionType): """ Get the given metric name. diff --git a/mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py b/mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py index 3fd5bdfe589..15be4098045 100644 --- a/mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +++ b/mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py @@ -6,9 +6,10 @@ import mlrun from mlrun.artifacts import Artifact -from ..._common import TrackableType -from ..._dl_common.loggers import LoggerMode, MLRunLogger +from ..._common import LoggingMode +from ..._dl_common.loggers import MLRunLogger from ..model_handler import PyTorchModelHandler +from ..utils import PyTorchTypes from .logging_callback import LoggingCallback @@ -38,14 +39,20 @@ def __init__( context: mlrun.MLClientCtx, model_handler: PyTorchModelHandler, log_model_tag: str = "", - log_model_labels: Dict[str, TrackableType] = None, - log_model_parameters: Dict[str, TrackableType] = None, - log_model_extra_data: Dict[str, Union[TrackableType, Artifact]] = None, + log_model_labels: Dict[str, PyTorchTypes.TrackableType] = None, + log_model_parameters: Dict[str, PyTorchTypes.TrackableType] = None, + log_model_extra_data: Dict[ + str, Union[PyTorchTypes.TrackableType, Artifact] + ] = None, dynamic_hyperparameters: Dict[ - str, Tuple[str, Union[List[Union[str, int]], Callable[[], TrackableType]]] + str, + Tuple[ + str, + Union[List[Union[str, int]], Callable[[], PyTorchTypes.TrackableType]], + ], ] = None, static_hyperparameters: Dict[ - str, Union[TrackableType, Tuple[str, List[Union[str, int]]]] + str, Union[PyTorchTypes.TrackableType, Tuple[str, List[Union[str, int]]]] ] = None, auto_log: bool = False, ): @@ -114,7 +121,7 @@ def on_run_end(self): Before the run ends, this method will be called to log the model and the run summaries charts. """ # Check if the logger is in evaluation mode, if so, log the last epoch - if self._logger.mode == LoggerMode.EVALUATION: + if self._logger.mode == LoggingMode.EVALUATION: self._logger.log_epoch_to_context(epoch=1) # Set the inputs and outputs: diff --git a/mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py b/mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py index c84e13e3e4a..76f1550f0d8 100644 --- a/mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +++ b/mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py @@ -11,9 +11,9 @@ import mlrun -from ..._common import TrackableType from ..._dl_common.loggers import TensorboardLogger -from .logging_callback import LoggingCallback, MetricFunctionType, MetricValueType +from ..utils import PyTorchTypes +from .logging_callback import LoggingCallback class _MLRunSummaryWriter(SummaryWriter): @@ -240,10 +240,14 @@ def __init__( Callable[[Union[Parameter, Tensor]], Union[float, Tensor]] ] = None, dynamic_hyperparameters: Dict[ - str, Tuple[str, Union[List[Union[str, int]], Callable[[], TrackableType]]] + str, + Tuple[ + str, + Union[List[Union[str, int]], Callable[[], PyTorchTypes.TrackableType]], + ], ] = None, static_hyperparameters: Dict[ - str, Union[TrackableType, Tuple[str, List[Union[str, int]]]] + str, Union[PyTorchTypes.TrackableType, Tuple[str, List[Union[str, int]]]] ] = None, update_frequency: Union[int, str] = "epoch", auto_log: bool = False, @@ -363,7 +367,7 @@ def on_setup( validation_set: DataLoader = None, loss_function: Module = None, optimizer: Optimizer = None, - metric_functions: List[MetricFunctionType] = None, + metric_functions: List[PyTorchTypes.MetricFunctionType] = None, scheduler=None, ): """ @@ -455,7 +459,7 @@ def on_run_end(self): super(TensorboardLoggingCallback, self).on_run_end() def on_validation_end( - self, loss_value: MetricValueType, metric_values: List[float] + self, loss_value: PyTorchTypes.MetricValueType, metric_values: List[float] ): """ Before the validation (in a training case it will be per epoch) ends, this method will be called to log the diff --git a/mlrun/frameworks/pytorch/callbacks_handler.py b/mlrun/frameworks/pytorch/callbacks_handler.py index 5e5acee1341..5dc5e049c43 100644 --- a/mlrun/frameworks/pytorch/callbacks_handler.py +++ b/mlrun/frameworks/pytorch/callbacks_handler.py @@ -5,7 +5,8 @@ from torch.optim import Optimizer from torch.utils.data import DataLoader -from .callbacks import Callback, MetricFunctionType, MetricValueType +from .callbacks import Callback +from .utils import PyTorchTypes class _CallbackInterface: @@ -99,7 +100,7 @@ def on_setup( validation_set: DataLoader, loss_function: Module, optimizer: Optimizer, - metric_functions: List[MetricFunctionType], + metric_functions: List[PyTorchTypes.MetricFunctionType], scheduler, callbacks: List[str] = None, ) -> bool: @@ -234,7 +235,7 @@ def on_validation_begin(self, callbacks: List[str] = None) -> bool: def on_validation_end( self, - loss_value: MetricValueType, + loss_value: PyTorchTypes.MetricValueType, metric_values: List[float], callbacks: List[str] = None, ) -> bool: @@ -415,7 +416,7 @@ def on_train_loss_begin(self, callbacks: List[str] = None) -> bool: ) def on_train_loss_end( - self, loss_value: MetricValueType, callbacks: List[str] = None + self, loss_value: PyTorchTypes.MetricValueType, callbacks: List[str] = None ) -> bool: """ Call the 'on_train_loss_end' method of every callback in the callbacks list. If the list is 'None' (not given), @@ -447,7 +448,7 @@ def on_validation_loss_begin(self, callbacks: List[str] = None) -> bool: ) def on_validation_loss_end( - self, loss_value: MetricValueType, callbacks: List[str] = None + self, loss_value: PyTorchTypes.MetricValueType, callbacks: List[str] = None ) -> bool: """ Call the 'on_validation_loss_end' method of every callback in the callbacks list. If the list is 'None' @@ -479,7 +480,9 @@ def on_train_metrics_begin(self, callbacks: List[str] = None) -> bool: ) def on_train_metrics_end( - self, metric_values: List[MetricValueType], callbacks: List[str] = None + self, + metric_values: List[PyTorchTypes.MetricValueType], + callbacks: List[str] = None, ) -> bool: """ Call the 'on_train_metrics_end' method of every callback in the callbacks list. If the list is 'None' @@ -511,7 +514,9 @@ def on_validation_metrics_begin(self, callbacks: List[str] = None) -> bool: ) def on_validation_metrics_end( - self, metric_values: List[MetricValueType], callbacks: List[str] = None + self, + metric_values: List[PyTorchTypes.MetricValueType], + callbacks: List[str] = None, ) -> bool: """ Call the 'on_validation_metrics_end' method of every callback in the callbacks list. If the list is 'None' @@ -635,7 +640,7 @@ def _run_callbacks( :param method_name: The name of the method to run. Should be given from the 'CallbackInterface'. :param callbacks: List of all the callbacks names to run the method. - :return: True if all of the callbacks called returned True and False if not. + :return: True if all the callbacks called returned True and False if not. """ all_result = True for callback in callbacks: diff --git a/mlrun/frameworks/pytorch/mlrun_interface.py b/mlrun/frameworks/pytorch/mlrun_interface.py index f20abad2705..2a4b11acb87 100644 --- a/mlrun/frameworks/pytorch/mlrun_interface.py +++ b/mlrun/frameworks/pytorch/mlrun_interface.py @@ -17,12 +17,11 @@ from .callbacks import ( Callback, HyperparametersKeys, - MetricFunctionType, - MetricValueType, MLRunLoggingCallback, TensorboardLoggingCallback, ) from .callbacks_handler import CallbacksHandler +from .utils import PyTorchTypes class PyTorchMLRunInterface: @@ -56,7 +55,7 @@ def __init__(self, model: Module, context: mlrun.MLClientCtx = None): self._loss_function = None # type: Module self._optimizer = None # type: Optimizer self._validation_set = None # type: DataLoader - self._metric_functions = None # type: List[MetricFunctionType] + self._metric_functions = None # type: List[PyTorchTypes.MetricFunctionType] self._scheduler = None self._scheduler_step_frequency = None # type: int self._epochs = None # type: int @@ -96,7 +95,7 @@ def train( loss_function: Module, optimizer: Optimizer, validation_set: DataLoader = None, - metric_functions: List[MetricFunctionType] = None, + metric_functions: List[PyTorchTypes.MetricFunctionType] = None, scheduler=None, scheduler_step_frequency: Union[int, float, str] = "epoch", epochs: int = 1, @@ -127,10 +126,9 @@ def train( :param validation_iterations: Amount of iterations (batches) to perform on each epoch's validation. If 'None' the entire validation set will be used. :param callbacks: The callbacks to use on this run. - :param use_cuda: Whether or not to use cuda. Only relevant if cuda is available. Defaulted to - True. - :param use_horovod: Whether or not to use horovod - a distributed training framework. Defaulted to - None, meaning it will be read from context if available and if not - False. + :param use_cuda: Whether to use cuda. Only relevant if cuda is available. Defaulted to True. + :param use_horovod: Whether to use horovod - a distributed training framework. Defaulted to None, + meaning it will be read from context if available and if not - False. """ # Load the input: self._parse_and_store( @@ -149,7 +147,7 @@ def train( use_horovod=use_horovod, ) - # Setup the inner attributes (initializing horovod and creating the callbacks handler): + # Set up the inner attributes (initializing horovod and creating the callbacks handler): self._setup() # Beginning of run callbacks: @@ -209,12 +207,12 @@ def evaluate( self, dataset: DataLoader, loss_function: Module = None, - metric_functions: List[MetricFunctionType] = None, + metric_functions: List[PyTorchTypes.MetricFunctionType] = None, iterations: int = None, callbacks: List[Callback] = None, use_cuda: bool = True, use_horovod: bool = None, - ) -> List[MetricValueType]: + ) -> List[PyTorchTypes.MetricValueType]: """ Initiate an evaluation process on this interface configuration. @@ -390,7 +388,7 @@ def _parse_and_store( loss_function: Module = None, optimizer: Optimizer = None, validation_set: DataLoader = None, - metric_functions: List[MetricFunctionType] = None, + metric_functions: List[PyTorchTypes.MetricFunctionType] = None, scheduler=None, scheduler_step_frequency: Union[int, float, str] = "epoch", epochs: int = 1, @@ -722,7 +720,7 @@ def _train(self): def _validate( self, is_evaluation: bool = False - ) -> Tuple[MetricValueType, List[MetricValueType]]: + ) -> Tuple[PyTorchTypes.MetricValueType, List[PyTorchTypes.MetricValueType]]: """ Initiate a single epoch validation. @@ -871,7 +869,7 @@ def _clear(self): self._loss_function = None # type: Module self._optimizer = None # type: Optimizer self._validation_set = None # type: DataLoader - self._metric_functions = None # type: List[MetricFunctionType] + self._metric_functions = None # type: List[PyTorchTypes.MetricFunctionType] self._scheduler = None self._scheduler_step_frequency = None # type: int self._epochs = None # type: int @@ -968,7 +966,7 @@ def _tensor_to_cuda( return tensor @staticmethod - def _get_metric_name(metric: MetricFunctionType) -> str: + def _get_metric_name(metric: PyTorchTypes.MetricFunctionType) -> str: """ Get the given metric function name. @@ -985,7 +983,7 @@ def _create_progress_bar( dataset: DataLoader, iterations: int, description: str, - metrics: List[MetricFunctionType], + metrics: List[PyTorchTypes.MetricFunctionType], ) -> tqdm: """ Create a progress bar for training and validating / evaluating. @@ -1016,8 +1014,8 @@ def _create_progress_bar( @staticmethod def _update_progress_bar( progress_bar: tqdm, - metrics: List[MetricFunctionType], - values: List[MetricValueType], + metrics: List[PyTorchTypes.MetricFunctionType], + values: List[PyTorchTypes.MetricValueType], ): """ Update the progress bar metrics results. diff --git a/mlrun/frameworks/pytorch/model_handler.py b/mlrun/frameworks/pytorch/model_handler.py index af3ab962fc2..80242df0e4d 100644 --- a/mlrun/frameworks/pytorch/model_handler.py +++ b/mlrun/frameworks/pytorch/model_handler.py @@ -7,10 +7,10 @@ import mlrun from mlrun.artifacts import Artifact -from mlrun.data_types import ValueType from mlrun.features import Feature from .._dl_common import DLModelHandler +from .utils import PyTorchUtils class PyTorchModelHandler(DLModelHandler): @@ -21,7 +21,7 @@ class PyTorchModelHandler(DLModelHandler): # Framework name: FRAMEWORK_NAME = "torch" - # Declare a type of an input sample: + # Declare a type of input sample: IOSample = Union[torch.Tensor, np.ndarray] class _LabelKeys: @@ -121,7 +121,7 @@ class name will be read from the logged label of the model. model_class if isinstance(model_class, str) else model_class.__name__ ) - # Setup the base handler class: + # Set up the base handler class: super(PyTorchModelHandler, self).__init__( model=model, model_path=model_path, @@ -289,7 +289,7 @@ def to_onnx( [ torch.zeros( size=input_feature.dims, - dtype=self.convert_value_type_to_torch_dtype( + dtype=PyTorchUtils.convert_value_type_to_torch_dtype( value_type=input_feature.value_type ), ) @@ -381,86 +381,6 @@ def to_onnx( return onnx_handler.model - @staticmethod - def convert_value_type_to_torch_dtype( - value_type: str, - ) -> torch.dtype: # TODO: Move to utils - """ - Get the 'torch.dtype' equivalent to the given MLRun data type. - - :param value_type: The MLRun value type to convert to torch data type. - - :return: The 'torch.dtype' equivalent to the given MLRun data type. - - :raise MLRunInvalidArgumentError: If torch is not supporting the given value type. - """ - # Initialize the mlrun to torch data type conversion map: - conversion_map = { - ValueType.BOOL: torch.bool, - ValueType.INT8: torch.int8, - ValueType.INT16: torch.int16, - ValueType.INT32: torch.int32, - ValueType.INT64: torch.int64, - ValueType.UINT8: torch.uint8, - ValueType.BFLOAT16: torch.bfloat16, - ValueType.FLOAT16: torch.float16, - ValueType.FLOAT: torch.float32, - ValueType.DOUBLE: torch.float64, - } - - # Convert and return: - if value_type in conversion_map: - return conversion_map[value_type] - raise mlrun.errors.MLRunInvalidArgumentError( - f"The ValueType given is not supported in torch: '{value_type}'." - ) - - @staticmethod - def convert_torch_dtype_to_value_type( - torch_dtype: Union[torch.dtype, str] - ) -> str: # TODO: Move to utils - """ - Convert the given torch data type to MLRun value type. All of the CUDA supported data types are supported. For - more information regarding torch data types, visit: https://pytorch.org/docs/stable/tensors.html#data-types - - :param torch_dtype: The torch data type to convert to MLRun's value type. Expected to be a 'torch.dtype' or - 'str'. - - :return: The MLRun value type converted from the given data type. - - :raise MLRunInvalidArgumentError: If the torch data type is not supported by MLRun. - """ - # Initialize the torch to mlrun data type conversion map: - conversion_map = { - str(torch.bool): ValueType.BOOL, - str(torch.int8): ValueType.INT8, - str(torch.short): ValueType.INT16, - str(torch.int16): ValueType.INT16, - str(torch.int): ValueType.INT32, - str(torch.int32): ValueType.INT32, - str(torch.long): ValueType.INT64, - str(torch.int64): ValueType.INT64, - str(torch.uint8): ValueType.UINT8, - str(torch.bfloat16): ValueType.BFLOAT16, - str(torch.half): ValueType.FLOAT16, - str(torch.float16): ValueType.FLOAT16, - str(torch.float): ValueType.FLOAT, - str(torch.float32): ValueType.FLOAT, - str(torch.double): ValueType.DOUBLE, - str(torch.float64): ValueType.DOUBLE, - } - - # Parse the given torch data type to string: - if isinstance(torch_dtype, torch.dtype): - torch_dtype = str(torch_dtype) - - # Convert and return: - if torch_dtype in conversion_map: - return conversion_map[torch_dtype] - raise mlrun.errors.MLRunInvalidArgumentError( - f"MLRun value type is not supporting the given torch data type: '{torch_dtype}'." - ) - def _collect_files_from_store_object(self): """ If the model path given is of a store object, collect the needed model files into this handler for later loading @@ -512,7 +432,7 @@ def _read_sample(self, sample: IOSample) -> Feature: return super(PyTorchModelHandler, self)._read_sample(sample=sample) elif isinstance(sample, torch.Tensor): return Feature( - value_type=self.convert_torch_dtype_to_value_type( + value_type=PyTorchUtils.convert_torch_dtype_to_value_type( torch_dtype=sample.dtype ), dims=list(sample.shape), diff --git a/mlrun/frameworks/pytorch/utils.py b/mlrun/frameworks/pytorch/utils.py new file mode 100644 index 00000000000..57346122975 --- /dev/null +++ b/mlrun/frameworks/pytorch/utils.py @@ -0,0 +1,110 @@ +from typing import Callable, Union + +import numpy as np +import torch +from torch import Tensor +from torch.nn import Module + +import mlrun +from mlrun.data_types import ValueType + +from .._dl_common import DLTypes, DLUtils + + +class PyTorchTypes(DLTypes): + """ + Typing hints for the PyTorch framework. + """ + + # Every model in PyTorch must inherit from torch.nn.Module: + ModelType = Module + + # Supported types of loss and metrics values: + MetricValueType = Union[int, float, np.ndarray, Tensor] + + # Supported types of metrics: + MetricFunctionType = Union[Callable[[Tensor, Tensor], MetricValueType], Module] + + +class PyTorchUtils(DLUtils): + """ + Utilities functions for the PyTorch framework. + """ + + @staticmethod + def convert_value_type_to_torch_dtype( + value_type: str, + ) -> torch.dtype: + """ + Get the 'torch.dtype' equivalent to the given MLRun data type. + + :param value_type: The MLRun value type to convert to torch data type. + + :return: The 'torch.dtype' equivalent to the given MLRun data type. + + :raise MLRunInvalidArgumentError: If torch is not supporting the given value type. + """ + # Initialize the mlrun to torch data type conversion map: + conversion_map = { + ValueType.BOOL: torch.bool, + ValueType.INT8: torch.int8, + ValueType.INT16: torch.int16, + ValueType.INT32: torch.int32, + ValueType.INT64: torch.int64, + ValueType.UINT8: torch.uint8, + ValueType.BFLOAT16: torch.bfloat16, + ValueType.FLOAT16: torch.float16, + ValueType.FLOAT: torch.float32, + ValueType.DOUBLE: torch.float64, + } + + # Convert and return: + if value_type in conversion_map: + return conversion_map[value_type] + raise mlrun.errors.MLRunInvalidArgumentError( + f"The ValueType given is not supported in torch: '{value_type}'." + ) + + @staticmethod + def convert_torch_dtype_to_value_type(torch_dtype: Union[torch.dtype, str]) -> str: + """ + Convert the given torch data type to MLRun value type. All the CUDA supported data types are supported. For + more information regarding torch data types, visit: https://pytorch.org/docs/stable/tensors.html#data-types + + :param torch_dtype: The torch data type to convert to MLRun's value type. Expected to be a 'torch.dtype' or + 'str'. + + :return: The MLRun value type converted from the given data type. + + :raise MLRunInvalidArgumentError: If the torch data type is not supported by MLRun. + """ + # Initialize the torch to mlrun data type conversion map: + conversion_map = { + str(torch.bool): ValueType.BOOL, + str(torch.int8): ValueType.INT8, + str(torch.short): ValueType.INT16, + str(torch.int16): ValueType.INT16, + str(torch.int): ValueType.INT32, + str(torch.int32): ValueType.INT32, + str(torch.long): ValueType.INT64, + str(torch.int64): ValueType.INT64, + str(torch.uint8): ValueType.UINT8, + str(torch.bfloat16): ValueType.BFLOAT16, + str(torch.half): ValueType.FLOAT16, + str(torch.float16): ValueType.FLOAT16, + str(torch.float): ValueType.FLOAT, + str(torch.float32): ValueType.FLOAT, + str(torch.double): ValueType.DOUBLE, + str(torch.float64): ValueType.DOUBLE, + } + + # Parse the given torch data type to string: + if isinstance(torch_dtype, torch.dtype): + torch_dtype = str(torch_dtype) + + # Convert and return: + if torch_dtype in conversion_map: + return conversion_map[torch_dtype] + raise mlrun.errors.MLRunInvalidArgumentError( + f"MLRun value type is not supporting the given torch data type: '{torch_dtype}'." + ) diff --git a/mlrun/frameworks/sklearn/__init__.py b/mlrun/frameworks/sklearn/__init__.py index 21e9c98f6b1..de689fd8cf4 100644 --- a/mlrun/frameworks/sklearn/__init__.py +++ b/mlrun/frameworks/sklearn/__init__.py @@ -1,31 +1,22 @@ -import warnings +# flake8: noqa - this is until we take care of the F401 violations with respect to __all__ & sphinx from typing import Dict, List, Union import mlrun +from mlrun.frameworks.sklearn.metric import Metric -from .._common import ExtraDataType, get_plans -from .._ml_common import ( - DatasetType, - Metric, - MetricEntry, - MetricsLibrary, - MLArtifactsLibrary, - MLPlan, - PickleModelServer, - get_metrics, -) +from .._ml_common import MLArtifactsLibrary, MLPlan, PickleModelServer +from .metrics_library import MetricsLibrary from .mlrun_interface import SKLearnMLRunInterface from .model_handler import SKLearnModelHandler -from .utils import SKLearnModelType +from .utils import SKLearnTypes, SKLearnUtils -# Placeholders as the SciKit-Learn API is commonly used among all of the ML frameworks: +# Placeholders as the SciKit-Learn API is commonly used among all ML frameworks: +SKLearnModelServer = PickleModelServer SKLearnArtifactsLibrary = MLArtifactsLibrary -SKLearnMetricsLibrary = MetricsLibrary -SklearnModelServer = PickleModelServer def apply_mlrun( - model: SKLearnModelType = None, + model: SKLearnTypes.ModelType = None, model_name: str = "model", tag: str = "", model_path: str = None, @@ -34,16 +25,20 @@ def apply_mlrun( custom_objects_directory: str = None, context: mlrun.MLClientCtx = None, artifacts: Union[List[MLPlan], List[str], Dict[str, dict]] = None, - metrics: Union[List[Metric], List[MetricEntry], Dict[str, MetricEntry]] = None, - x_test: DatasetType = None, - y_test: DatasetType = None, - sample_set: Union[DatasetType, mlrun.DataItem, str] = None, + metrics: Union[ + List[Metric], + List[SKLearnTypes.MetricEntryType], + Dict[str, SKLearnTypes.MetricEntryType], + ] = None, + x_test: SKLearnTypes.DatasetType = None, + y_test: SKLearnTypes.DatasetType = None, + sample_set: Union[SKLearnTypes.DatasetType, mlrun.DataItem, str] = None, y_columns: Union[List[str], List[int]] = None, feature_vector: str = None, feature_weights: List[float] = None, labels: Dict[str, Union[str, int, float]] = None, parameters: Dict[str, Union[str, int, float]] = None, - extra_data: Dict[str, ExtraDataType] = None, + extra_data: Dict[str, SKLearnTypes.ExtraDataType] = None, auto_log: bool = True, **kwargs ) -> SKLearnModelHandler: @@ -112,26 +107,12 @@ def apply_mlrun( :param labels: Labels to log with the model. :param parameters: Parameters to log with the model. :param extra_data: Extra data to log with the model. - :param auto_log: Whether or not to apply MLRun's auto logging on the model. Auto logging will add - the default artifacts and metrics to the lists of artifacts and metrics. Defaulted - to True. + :param auto_log: Whether to apply MLRun's auto logging on the model. Auto logging will add the + default artifacts and metrics to the lists of artifacts and metrics. Defaulted to + True. :return: The model handler initialized with the provided model. """ - if "X_test" in kwargs: - warnings.warn( - "The attribute 'X_test' was changed to 'x_test' and will be removed next version.", - # TODO: Remove in mlrun 1.0.0 - PendingDeprecationWarning, - ) - x_test = kwargs["X_test"] - if "X_train" in kwargs or "y_train" in kwargs: - warnings.warn( - "The attributes 'X_train' and 'y_train' are no longer required and will be removed next version.", - # TODO: Remove in mlrun 1.0.0 - PendingDeprecationWarning, - ) - # Get the default context: if context is None: context = mlrun.get_or_create_ctx(SKLearnMLRunInterface.DEFAULT_CONTEXT_NAME) @@ -161,7 +142,7 @@ def apply_mlrun( if sample_set is not None: handler.set_sample_set(sample_set=sample_set) if y_columns is not None: - handler.set_y_columns(y_columns=y_columns) + handler.set_target_columns(target_columns=y_columns) if feature_vector is not None: handler.set_feature_vector(feature_vector=feature_vector) if feature_weights is not None: @@ -176,22 +157,17 @@ def apply_mlrun( # Add MLRun's interface to the model: SKLearnMLRunInterface.add_interface(obj=model) - # Set the handler to the model: - model.set_model_handler(model_handler=handler) - # Configure the logger: - model.configure_logger( + model.configure_logging( context=context, - plans=get_plans( - artifacts_library=SKLearnArtifactsLibrary, + plans=SKLearnArtifactsLibrary.get_plans( artifacts=artifacts, context=context, include_default=auto_log, model=model, y=y_test, ), - metrics=get_metrics( - metrics_library=SKLearnMetricsLibrary, + metrics=MetricsLibrary.get_metrics( metrics=metrics, context=context, include_default=auto_log, @@ -200,6 +176,7 @@ def apply_mlrun( ), x_test=x_test, y_test=y_test, + model_handler=handler, ) return handler diff --git a/mlrun/frameworks/sklearn/estimator.py b/mlrun/frameworks/sklearn/estimator.py new file mode 100644 index 00000000000..9a1d29bd452 --- /dev/null +++ b/mlrun/frameworks/sklearn/estimator.py @@ -0,0 +1,158 @@ +from typing import Dict, List, Union + +import numpy as np +import pandas as pd + +import mlrun + +from .._common import LoggingMode +from .metric import Metric +from .utils import SKLearnUtils + + +class Estimator: + """ + Class for handling metrics calculations during a run. + """ + + def __init__( + self, + context: mlrun.MLClientCtx = None, + metrics: List[Metric] = None, + ): + """ + Initialize an estimator with the given metrics. The estimator will log the calculated results using the given + context. + + :param context: The context to log with. + :param metrics: The metrics + """ + # Store the context and metrics: + self._context = context + self._metrics = metrics if metrics is not None else [] + + # Setup the logger's mode (defaulted to Training): + self._mode = LoggingMode.TRAINING + + # Prepare the dictionaries to hold the results. Once they are logged they will be moved from one to another: + self._logged_results = {} # type: Dict[str, float] + self._not_logged_results = {} # type: Dict[str, float] + + @property + def context(self) -> mlrun.MLClientCtx: + """ + Get the logger's MLRun context. + + :return: The logger's MLRun context. + """ + return self._context + + @property + def results(self) -> Dict[str, float]: + """ + Get the logged results. + + :return: The logged results. + """ + return self._logged_results + + def set_mode(self, mode: LoggingMode): + """ + Set the estimator's mode. + + :param mode: The mode to set. + """ + self._mode = mode + + def set_context(self, context: mlrun.MLClientCtx): + """ + Set the context this logger will log with. + + :param context: The to be set MLRun context. + """ + self._context = context + + def set_metrics(self, metrics: List[Metric]): + """ + Update the metrics of this logger to the given list of metrics here. + + :param metrics: The list of metrics to override the current one. + """ + self._metrics = metrics + + def is_probabilities_required(self) -> bool: + """ + Check if probabilities are required in order to calculate some of the metrics. + + :return: True if probabilities are required by at least one metric and False otherwise. + """ + return any(metric.need_probabilities for metric in self._metrics) + + def estimate( + self, + y_true: Union[np.ndarray, pd.DataFrame, pd.Series], + y_pred: Union[np.ndarray, pd.DataFrame, pd.Series], + is_probabilities: bool = False, + ): + """ + Calculate the results according to the 'is_probabilities' flag and log them. + + :param y_true: The ground truth values to send for the metrics functions. + :param y_pred: The predictions to send for the metrics functions. + :param is_probabilities: True if the 'y_pred' is a prediction of probabilities (from 'predict_proba') and False + if not. Defaulted to False. + """ + # Calculate the metrics results: + self._calculate_results( + y_true=y_true, y_pred=y_pred, is_probabilities=is_probabilities + ) + + # Log if a context is available: + if self._context is not None: + # Log the results in queue: + self._log_results() + # Commit: + self._context.commit(completed=False) + + def _calculate_results( + self, + y_true: Union[np.ndarray, pd.DataFrame, pd.Series], + y_pred: Union[np.ndarray, pd.DataFrame, pd.Series], + is_probabilities: bool, + ): + """ + Calculate the results from all the metrics in the estimator. + + :param y_true: The ground truth values to send for the metrics functions. + :param y_pred: The predictions to send for the metrics functions. + :param is_probabilities: True if the 'y_pred' is a prediction of probabilities (from 'predict_proba') and False + if not. + """ + # Use squeeze to remove redundant dimensions: + y_true = np.squeeze(SKLearnUtils.to_array(dataset=y_true)) + y_pred = np.squeeze(SKLearnUtils.to_array(dataset=y_pred)) + + # Calculate the metrics: + for metric in self._metrics: + if metric.need_probabilities == is_probabilities: + self._not_logged_results[metric.name] = metric(y_true, y_pred) + + # Add evaluation prefix if in Evaluation mode: + if self._mode == LoggingMode.EVALUATION: + self._not_logged_results = { + f"evaluation_{key}": value + for key, value in self._not_logged_results.items() + } + + def _log_results(self): + """ + Log the calculated metrics results using the logger's context. + """ + # Use the context to log each metric result: + self._context.log_results(self._not_logged_results) + + # Collect the logged results: + self._logged_results = {**self._logged_results, **self._not_logged_results} + + # Clean the not logged results dictionary: + self._not_logged_results = {} diff --git a/mlrun/frameworks/_ml_common/metric.py b/mlrun/frameworks/sklearn/metric.py similarity index 96% rename from mlrun/frameworks/_ml_common/metric.py rename to mlrun/frameworks/sklearn/metric.py index d50201aef69..3811785b2b9 100644 --- a/mlrun/frameworks/_ml_common/metric.py +++ b/mlrun/frameworks/sklearn/metric.py @@ -6,8 +6,7 @@ import mlrun.errors -from .._common.utils import ModelType -from .utils import DatasetType +from .utils import SKLearnTypes class Metric: @@ -43,10 +42,10 @@ def __init__( def __call__( self, - y_true: DatasetType, - y_pred: DatasetType = None, - model: ModelType = None, - x: DatasetType = None, + y_true: SKLearnTypes.DatasetType, + y_pred: SKLearnTypes.DatasetType = None, + model: SKLearnTypes.ModelType = None, + x: SKLearnTypes.DatasetType = None, ) -> float: """ Call the metric function on the provided y_true and y_pred values using the stored additional arguments. diff --git a/mlrun/frameworks/_ml_common/metrics_library.py b/mlrun/frameworks/sklearn/metrics_library.py similarity index 60% rename from mlrun/frameworks/_ml_common/metrics_library.py rename to mlrun/frameworks/sklearn/metrics_library.py index 583f3a76fcf..e622e666494 100644 --- a/mlrun/frameworks/_ml_common/metrics_library.py +++ b/mlrun/frameworks/sklearn/metrics_library.py @@ -1,25 +1,109 @@ from abc import ABC -from typing import Dict, List, Type, Union +from typing import Dict, List, Union import sklearn from sklearn.preprocessing import LabelBinarizer import mlrun.errors -from .._common.utils import ModelType +from .._ml_common import AlgorithmFunctionality from .metric import Metric -from .utils import AlgorithmFunctionality, DatasetType, MetricEntry +from .utils import SKLearnTypes class MetricsLibrary(ABC): """ - Static class of a collection of metrics to use in training and evaluation of machine learning frameworks. + Static class for getting and parsing metrics to use in training and evaluation of SciKit-Learn. """ + # A constant name for the context parameter to use for passing a metrics configuration: + CONTEXT_PARAMETER = "_metrics" + + # A keyword to add in case the metric is based on predictions probabilities (not final predictions): _NEED_PROBABILITIES_KEYWORD = "need_probabilities" - @staticmethod - def from_list(metrics_list: List[Union[Metric, MetricEntry]]) -> List[Metric]: + @classmethod + def get_metrics( + cls, + metrics: Union[ + List[Metric], + List[SKLearnTypes.MetricEntryType], + Dict[str, SKLearnTypes.MetricEntryType], + ] = None, + context: mlrun.MLClientCtx = None, + include_default: bool = True, + **default_kwargs, + ) -> List[Metric]: + """ + Get metrics for a run. The metrics will be taken from the provided metrics / configuration via code, from + provided configuration via MLRun context and if the 'include_default' is True, from the metric library's + defaults as well. + + :param metrics: The metrics parameter passed to the function. Can be passed as a dictionary or a list of + metrics. + :param context: A context to look in if the configuration was passed as a parameter. + :param include_default: Whether to include the default in addition to the provided metrics. Defaulted to True. + :param default_kwargs: Additional key word arguments to pass to the 'default' method of the given metrics + library class. + + :return: The metrics list. + + :raise MLRunInvalidArgumentError: If the metrics were not passed in a list or a dictionary. + """ + # Set up the plans list: + parsed_metrics = [] # type: List[Metric] + + # Get the metrics passed via context: + if context is not None and cls.CONTEXT_PARAMETER in context.parameters: + parsed_metrics += cls._parse( + metrics=context.parameters.get(cls.CONTEXT_PARAMETER, None) + ) + + # Get the user's set metrics: + if metrics is not None: + parsed_metrics += cls._parse(metrics=metrics) + + # Get the library's default: + if include_default: + parsed_metrics += cls._default(**default_kwargs) + + return parsed_metrics + + @classmethod + def _parse( + cls, + metrics: Union[ + List[Metric], + List[SKLearnTypes.MetricEntryType], + Dict[str, SKLearnTypes.MetricEntryType], + ], + ) -> List[Metric]: + """ + Parse the given metrics by the possible rules of the framework implementing. + + :param metrics: A collection of metrics to parse. + + :return: The parsed metrics to use in training / evaluation. + """ + # Parse from dictionary: + if isinstance(metrics, dict): + return cls._from_dict(metrics_dictionary=metrics) + + # Parse from list: + if isinstance(metrics, list): + return cls._from_list(metrics_list=metrics) + + raise mlrun.errors.MLRunInvalidArgumentError( + f"The metrics are expected to be in a list or a dictionary. Received: {type(metrics)}. A metric can be a " + f"function, callable object, name of an imported function or a module path to import the function. " + f"Arguments can be passed as a tuple: in the following form: (metric, arguments). If used in a dictionary, " + f"each key will be the name to use for logging the metric." + ) + + @classmethod + def _from_list( + cls, metrics_list: List[Union[Metric, SKLearnTypes.MetricEntryType]] + ) -> List[Metric]: """ Collect the given metrics configurations from a list. The metrics names will be chosen by the following rules: @@ -39,12 +123,14 @@ def from_list(metrics_list: List[Union[Metric, MetricEntry]]) -> List[Metric]: return [ metric if isinstance(metric, Metric) - else MetricsLibrary._to_metric_class(metric_entry=metric) + else cls._to_metric_class(metric_entry=metric) for metric in metrics_list ] - @staticmethod - def from_dict(metrics_dictionary: Dict[str, MetricEntry]) -> List[Metric]: + @classmethod + def _from_dict( + cls, metrics_dictionary: Dict[str, SKLearnTypes.MetricEntryType] + ) -> List[Metric]: """ Collect the given metrics configurations from a dictionary. @@ -58,16 +144,16 @@ def from_dict(metrics_dictionary: Dict[str, MetricEntry]) -> List[Metric]: :return: A list of metrics objects. """ return [ - MetricsLibrary._to_metric_class( - metric_entry=metric, metric_name=metric_name - ) + cls._to_metric_class(metric_entry=metric, metric_name=metric_name) for metric_name, metric in metrics_dictionary.items() ] @classmethod - def default(cls, model: ModelType, y: DatasetType = None, **kwargs) -> List[Metric]: + def _default( + cls, model: SKLearnTypes.ModelType, y: SKLearnTypes.DatasetType = None + ) -> List[Metric]: """ - Get the default metrics list of this framework's library. + Get the default metrics list according to the algorithm functionality. :param model: The model to check if its a regression model or a classification model. :param y: The ground truth values to check if its multiclass and / or multi output. @@ -160,20 +246,21 @@ def default(cls, model: ModelType, y: DatasetType = None, **kwargs) -> List[Metr return metrics - @staticmethod + @classmethod def _to_metric_class( - metric_entry: MetricEntry, + cls, + metric_entry: SKLearnTypes.MetricEntryType, metric_name: str = None, ) -> Metric: """ Create a Metric instance from a user given metric entry. - :param metric_entry: Metric entry as passed inside of a list or a dictionary. + :param metric_entry: Metric entry as passed inside a list or a dictionary. :param metric_name: The metric name to use (if passed from a dictionary). :return: The metric class instance of this entry. """ - # If its a tuple, unpack it to get the additional arguments: + # If it's a tuple, unpack it to get the additional arguments: if isinstance(metric_entry, tuple): metric, arguments = metric_entry else: @@ -181,9 +268,9 @@ def _to_metric_class( arguments = {} # Check if the 'need_probabilities' attribute is given: - if MetricsLibrary._NEED_PROBABILITIES_KEYWORD in arguments: - need_probabilities = arguments[MetricsLibrary._NEED_PROBABILITIES_KEYWORD] - arguments.pop(MetricsLibrary._NEED_PROBABILITIES_KEYWORD) + if cls._NEED_PROBABILITIES_KEYWORD in arguments: + need_probabilities = arguments[cls._NEED_PROBABILITIES_KEYWORD] + arguments.pop(cls._NEED_PROBABILITIES_KEYWORD) else: need_probabilities = False @@ -194,60 +281,3 @@ def _to_metric_class( additional_arguments=arguments, need_probabilities=need_probabilities, ) - - -# A constant name for the context parameter to use for passing a plans configuration: -METRICS_CONTEXT_PARAMETER = "_metrics" - - -def get_metrics( - metrics_library: Type[MetricsLibrary], - metrics: Union[List[Metric], List[MetricEntry], Dict[str, MetricEntry]] = None, - context: mlrun.MLClientCtx = None, - include_default: bool = True, - **default_kwargs, -) -> List[Metric]: - """ - Get metrics for a run. The metrics will be taken from the provided metrics / configuration via code, from provided - configuration via MLRun context and if the 'include_default' is True, from the framework metric library's defaults. - - :param metrics_library: The framework's metrics library class to get its defaults. - :param metrics: The metrics parameter passed to the function. Can be passed as a dictionary or a list of - metrics. - :param context: A context to look in if the configuration was passed as a parameter. - :param include_default: Whether to include the default in addition to the provided metrics. Defaulted to True. - :param default_kwargs: Additional key word arguments to pass to the 'default' method of the given metrics library - class. - - :return: The metrics list. - - :raise MLRunInvalidArgumentError: If the metrics were not passed in a list or a dictionary. - """ - # Setup the plans list: - parsed_metrics = [] # type: List[Metric] - - # Get the user input metrics: - metrics_from_context = None - if context is not None: - metrics_from_context = context.parameters.get(METRICS_CONTEXT_PARAMETER, None) - for user_input in [metrics, metrics_from_context]: - if user_input is not None: - if isinstance(user_input, dict): - parsed_metrics += metrics_library.from_dict( - metrics_dictionary=user_input - ) - elif isinstance(user_input, list): - parsed_metrics += metrics_library.from_list(metrics_list=user_input) - else: - raise mlrun.errors.MLRunInvalidArgumentError( - f"The metrics are expected to be in a list or a dictionary. Received: {type(user_input)}. A metric " - f"can be a function, callable object, name of an imported function or a module path to import the " - f"function. Arguments can be passed as a tuple: in the following form: (metric, arguments). If " - f"used in a dictionary, each key will be the name to use for logging the metric." - ) - - # Get the library's default: - if include_default: - parsed_metrics += metrics_library.default(**default_kwargs) - - return parsed_metrics diff --git a/mlrun/frameworks/sklearn/mlrun_interface.py b/mlrun/frameworks/sklearn/mlrun_interface.py index 39d665e5999..38d99a36fde 100644 --- a/mlrun/frameworks/sklearn/mlrun_interface.py +++ b/mlrun/frameworks/sklearn/mlrun_interface.py @@ -1,12 +1,325 @@ from abc import ABC +from typing import List -from .._ml_common import MLMLRunInterface +import mlrun +from .._common import LoggingMode, MLRunInterface +from .._ml_common import MLModelHandler, MLPlan, MLPlanStages, MLProducer +from .estimator import Estimator +from .metric import Metric +from .utils import SKLearnTypes, SKLearnUtils -class SKLearnMLRunInterface(MLMLRunInterface, ABC): + +class SKLearnMLRunInterface(MLRunInterface, ABC): """ - Interface for adding MLRun features for SciKit-Learn API. + Interface for adding MLRun features for machine learning SciKit-Learn API. """ # MLRun's context default name: DEFAULT_CONTEXT_NAME = "mlrun-sklearn" + + # Attributes to be inserted so the MLRun interface will be fully enabled. + _PROPERTIES = { + # A model handler instance with the model for logging / updating the model (if not provided the model won't be + # logged / updated at the end of training / testing): + "_model_handler": None, # type: MLModelHandler + # A producer instance for logging this model's training / evaluation artifacts: + "_producer": None, # type: MLProducer + # An estimator instance for logging this model's training / evaluation metrics results: + "_estimator": None, # type: Estimator + # The test set (For validation post training or evaluation post prediction): + "_x_test": None, # type: SKLearnTypes.DatasetType + "_y_test": None, # type: SKLearnTypes.DatasetType + } + _METHODS = [ + "configure_logging", + "_pre_fit", + "_post_fit", + "_pre_predict", + "_post_predict", + ] + + # Attributes to replace so the MLRun interface will be fully enabled. + _REPLACED_METHODS = ["fit", "predict", "predict_proba"] + + @classmethod + def add_interface( + cls, + obj: SKLearnTypes.ModelType, + restoration: SKLearnTypes.MLRunInterfaceRestorationType = None, + ): + """ + Enrich the object with this interface properties, methods and functions, so it will have SciKit-Learn MLRun's + features. + + :param obj: The model object to enrich his interface. + :param restoration: Restoration information tuple as returned from 'remove_interface' in order to + add the interface in a certain state. + """ + # Check if the given model has the 'predict_proba' method to replace: + if not hasattr(obj, "predict_proba"): + cls._REPLACED_METHODS.remove("predict_proba") + + # Add the interface to the model: + super(SKLearnMLRunInterface, cls).add_interface( + obj=obj, restoration=restoration + ) + + # Restore the '_REPLACED_METHODS' list for next models: + if "predict_proba" not in cls._REPLACED_METHODS: + cls._REPLACED_METHODS.append("predict_proba") + + # Setup a default producer and estimator: + if obj._producer is None: + obj._producer = MLProducer() + obj._estimator = Estimator() + + @classmethod + def mlrun_fit(cls): + """ + MLRun's common ML API fit wrapper. It will run a validation post training if provided the required attributes. + + Unsupervised learning algorithms won't be using 'y' in their 'fit' method, but for consistency reasons (as + written in SciKit-Learn's API documentation): the signature of their 'fit' method will still have 'y'. For that + we do not need another wrapper that support unsupervised learning models. + """ + + def wrapper( + self: SKLearnTypes.ModelType, + X: SKLearnTypes.DatasetType, + y: SKLearnTypes.DatasetType = None, + *args, + **kwargs, + ): + # Restore the prediction methods as fit will use them: + cls._restore_attribute(obj=self, attribute_name="predict") + if hasattr(self, "predict_proba"): + cls._restore_attribute(obj=self, attribute_name="predict_proba") + + # Call the pre fit method: + self._pre_fit(x=X, y=y) + + # Call the original fit method: + result = self.original_fit(X, y, *args, **kwargs) + + # Call the post fit method: + self._post_fit(x=X, y=y) + + # Replace the prediction methods again: + cls._replace_function(obj=self, function_name="predict") + if hasattr(self, "predict_proba"): + cls._replace_function(obj=self, function_name="predict_proba") + return result + + return wrapper + + def mlrun_predict(self, X: SKLearnTypes.DatasetType, *args, **kwargs): + """ + MLRun's wrapper for the common ML API predict method. + """ + self._pre_predict(x=X, y=self._y_test) + + y_pred = self.original_predict(X, *args, **kwargs) + + self._post_predict(x=X, y=self._y_test, y_pred=y_pred, is_predict_proba=False) + + return y_pred + + def mlrun_predict_proba(self, X: SKLearnTypes.DatasetType, *args, **kwargs): + """ + MLRun's wrapper for the common ML API predict_proba method. + """ + self._pre_predict(x=X, y=self._y_test) + + y_pred = self.original_predict_proba(X, *args, **kwargs) + + self._post_predict(x=X, y=self._y_test, y_pred=y_pred, is_predict_proba=True) + + return y_pred + + def configure_logging( + self, + context: mlrun.MLClientCtx = None, + plans: List[MLPlan] = None, + metrics: List[Metric] = None, + x_test: SKLearnTypes.DatasetType = None, + y_test: SKLearnTypes.DatasetType = None, + model_handler: MLModelHandler = None, + ): + """ + Configure the MLRun logger for this model using the provided context and artifacts plans, metrics and model + logging attributes. + + :param context: A MLRun context to log to. By default, uses `mlrun.get_or_create_ctx` + :param plans: A list of plans to produce. + :param metrics: A list of metrics to calculate. + :param x_test: The testing data for producing and calculating artifacts and metrics post training or post + prediction. Without this, training validation will not be performed. + :param y_test: The test data for producing and calculating artifacts and metrics post training (calling + 'fit') or post testing (calling 'predict' / 'predict_proba'). + :param model_handler: A model handler for logging the model as a model artifact post training (post calling + 'fit') or update the existing model artifact post testing (calling 'predict' / + 'predict_proba'). + """ + # Store the given model handler: + self._model_handler = model_handler + + # Get the context: + if context is None: + context = ( + mlrun.get_or_create_ctx(name=SKLearnMLRunInterface.DEFAULT_CONTEXT_NAME) + if model_handler.context is None + else model_handler.context + ) + self._producer.set_context(context=context) + self._estimator.set_context(context=context) + self._model_handler.set_context(context=context) + + # Set the logging attributes: + self._producer.set_plans(plans=plans) + self._estimator.set_metrics(metrics=metrics) + + # Validate that if the prediction probabilities are required, this model has the 'predict_proba' method: + if ( + self._producer.is_probabilities_required() + or self._estimator.is_probabilities_required() + ) and not hasattr(self, "predict_proba"): + raise mlrun.errors.MLRunInvalidArgumentError( + f"Some of the metrics and or artifacts required to be calculated and produced require prediction " + f"probabilities yet this model: '{type(self)}' do not has the 'predict_proba' method." + ) + + # Store the given dataset: + self._x_test = x_test + self._y_test = y_test + + def _pre_fit(self, x: SKLearnTypes.DatasetType, y: SKLearnTypes.DatasetType = None): + """ + Method for creating the artifacts before the fit method. + + :param x: The input dataset to the fit method ('x_train'). + :param y: The input dataset to the fit method ('y_train'). + """ + self._producer.produce_stage(stage=MLPlanStages.PRE_FIT, model=self, x=x, y=y) + + def _post_fit( + self, x: SKLearnTypes.DatasetType, y: SKLearnTypes.DatasetType = None + ): + """ + Method for creating the artifacts after the fit method. If a validation set is available, the method will start + a validation process calling predict - creating and calculating validation artifacts and metrics. + + :param x: The input dataset to the fit method ('x_train'). + :param y: The input dataset to the fit method ('y_train'). + """ + # The model is done training, log all artifacts post fit: + self._producer.produce_stage(stage=MLPlanStages.POST_FIT, model=self, x=x, y=y) + + # If there is a validation set, run validation: + if self._x_test is not None: + self._producer.produce_stage( + stage=MLPlanStages.PRE_PREDICT, + model=self, + x=self._x_test, + y=self._y_test, + ) + y_pred = self.predict(self._x_test) + self._post_predict( + x=self._x_test, + y=self._y_test, + y_pred=y_pred, + is_predict_proba=False, + ) + + # Log the model with the given attributes: + if self._model_handler is not None: + # Set the sample set to the training set if None: + if self._model_handler.sample_set is None: + sample_set, target_columns = SKLearnUtils.concatenate_x_y( + x=x, y=y, target_columns_names=self._model_handler.target_columns + ) + self._model_handler.set_target_columns(target_columns=target_columns) + self._model_handler.set_sample_set(sample_set=sample_set) + # Log the model: + self._model_handler.log( + metrics=self._estimator.results, + artifacts=self._producer.artifacts, + ) + self._model_handler.context.commit(completed=False) + + def _pre_predict(self, x: SKLearnTypes.DatasetType, y: SKLearnTypes.DatasetType): + """ + Method for creating the artifacts before the predict method. + + :param x: The input dataset to the predict / predict_proba method ('x_test'). + :param y: The input dataset to the predict / predict_proba method ('y_test'). + """ + # This function is only called for evaluation, then set the mode to the producer and estimator: + self._producer.set_mode(mode=LoggingMode.EVALUATION) + self._estimator.set_mode(mode=LoggingMode.EVALUATION) + + # Produce and log all the artifacts pre prediction: + self._producer.produce_stage( + stage=MLPlanStages.PRE_PREDICT, model=self, x=x, y=y + ) + + def _post_predict( + self, + x: SKLearnTypes.DatasetType, + y: SKLearnTypes.DatasetType, + y_pred: SKLearnTypes.DatasetType, + is_predict_proba: bool, + ): + """ + Method for creating and calculating the artifacts and metrics after the predict method. This method can be + called after a user call to predict as part of testing or as part of validation after training (calling fit). + + :param x: The input dataset to the predict / predict_proba method ('x_test' / 'x_validation'). + :param y: The input dataset to the predict / predict_proba method ('y_test' / 'y_validation'). + :param y_pred: The prediction outcome. + :param is_predict_proba: Whether the prediction returned from predict or predict_proba. + """ + # Produce and log all the artifacts post prediction: + self._producer.produce_stage( + stage=MLPlanStages.POST_PREDICT, + model=self, + x=x, + y=y, + y_pred=y_pred, + is_probabilities=is_predict_proba, + ) + + # Calculate and log the metrics results: + self._estimator.estimate( + y_true=y, y_pred=y_pred, is_probabilities=is_predict_proba + ) + + # If some metrics and / or plans require probabilities, run 'predict_proba': + if not is_predict_proba and ( + self._producer.is_probabilities_required() + or self._estimator.is_probabilities_required() + ): + y_pred_proba = self.predict_proba(x) + self._producer.produce_stage( + stage=MLPlanStages.POST_PREDICT, + is_probabilities=True, + model=self, + x=x, + y=y, + y_pred=y_pred_proba, + ) + self._estimator.estimate( + y_true=y, y_pred=y_pred_proba, is_probabilities=True + ) + + # If its part of validation post training, return: + if self._producer.mode == LoggingMode.TRAINING: + return + + # Update the model with the testing artifacts and results: + if self._model_handler is not None: + self._model_handler.update( + metrics=self._estimator.results, + artifacts=self._producer.artifacts, + ) + self._model_handler.context.commit(completed=False) diff --git a/mlrun/frameworks/sklearn/model_handler.py b/mlrun/frameworks/sklearn/model_handler.py index 584fb5cd4f4..df0bd48b031 100644 --- a/mlrun/frameworks/sklearn/model_handler.py +++ b/mlrun/frameworks/sklearn/model_handler.py @@ -6,8 +6,9 @@ import mlrun from .._common import without_mlrun_interface -from .._ml_common import DatasetType, MLModelHandler +from .._ml_common import MLModelHandler from .mlrun_interface import SKLearnMLRunInterface +from .utils import SKLearnTypes class SKLearnModelHandler(MLModelHandler): @@ -68,7 +69,7 @@ def to_onnx( self, model_name: str = None, optimize: bool = True, - input_sample: DatasetType = None, + input_sample: SKLearnTypes.DatasetType = None, log: bool = None, ): """ diff --git a/mlrun/frameworks/sklearn/utils.py b/mlrun/frameworks/sklearn/utils.py index 9186287b328..0eb2bad5311 100644 --- a/mlrun/frameworks/sklearn/utils.py +++ b/mlrun/frameworks/sklearn/utils.py @@ -2,13 +2,29 @@ import sklearn.base -# A union of all SciKitLearn model base classes: -SKLearnModelType = Union[ - sklearn.base.BaseEstimator, - sklearn.base.BiclusterMixin, - sklearn.base.ClassifierMixin, - sklearn.base.ClusterMixin, - sklearn.base.DensityMixin, - sklearn.base.RegressorMixin, - sklearn.base.TransformerMixin, -] +from .._ml_common import MLTypes, MLUtils + + +class SKLearnTypes(MLTypes): + """ + Typing hints for the SciKit-Learn framework. + """ + + # A union of all SciKitLearn model base classes: + ModelType = Union[ + sklearn.base.BaseEstimator, + sklearn.base.BiclusterMixin, + sklearn.base.ClassifierMixin, + sklearn.base.ClusterMixin, + sklearn.base.DensityMixin, + sklearn.base.RegressorMixin, + sklearn.base.TransformerMixin, + ] + + +class SKLearnUtils(MLUtils): + """ + Utilities functions for the SciKit-Learn framework. + """ + + pass diff --git a/mlrun/frameworks/tf_keras/__init__.py b/mlrun/frameworks/tf_keras/__init__.py index 189919b4244..67d9c3cf6df 100644 --- a/mlrun/frameworks/tf_keras/__init__.py +++ b/mlrun/frameworks/tf_keras/__init__.py @@ -9,6 +9,7 @@ from .mlrun_interface import TFKerasMLRunInterface from .model_handler import TFKerasModelHandler from .model_server import TFKerasModelServer +from .utils import TFKerasTypes, TFKerasUtils def apply_mlrun( diff --git a/mlrun/frameworks/tf_keras/callbacks/__init__.py b/mlrun/frameworks/tf_keras/callbacks/__init__.py index 3ae0969f7d8..57e703efdfb 100644 --- a/mlrun/frameworks/tf_keras/callbacks/__init__.py +++ b/mlrun/frameworks/tf_keras/callbacks/__init__.py @@ -1,4 +1,4 @@ # flake8: noqa - this is until we take care of the F401 violations with respect to __all__ & sphinx -from .logging_callback import LoggingCallback, TrackableType +from .logging_callback import LoggingCallback from .mlrun_logging_callback import MLRunLoggingCallback from .tensorboard_logging_callback import TensorboardLoggingCallback diff --git a/mlrun/frameworks/tf_keras/callbacks/logging_callback.py b/mlrun/frameworks/tf_keras/callbacks/logging_callback.py index 084bd8dbb8c..edfa4eb4a50 100644 --- a/mlrun/frameworks/tf_keras/callbacks/logging_callback.py +++ b/mlrun/frameworks/tf_keras/callbacks/logging_callback.py @@ -7,8 +7,9 @@ import mlrun -from ..._common import TrackableType -from ..._dl_common.loggers import Logger, LoggerMode +from ..._common import LoggingMode +from ..._dl_common.loggers import Logger +from ..utils import TFKerasTypes class LoggingCallback(Callback): @@ -22,10 +23,10 @@ def __init__( self, context: mlrun.MLClientCtx = None, dynamic_hyperparameters: Dict[ - str, Union[List[Union[str, int]], Callable[[], TrackableType]] + str, Union[List[Union[str, int]], Callable[[], TFKerasTypes.TrackableType]] ] = None, static_hyperparameters: Dict[ - str, Union[TrackableType, List[Union[str, int]]] + str, Union[TFKerasTypes.TrackableType, List[Union[str, int]]] ] = None, auto_log: bool = False, ): @@ -116,7 +117,7 @@ def get_validation_summaries(self) -> Dict[str, List[float]]: """ return self._logger.validation_summaries - def get_static_hyperparameters(self) -> Dict[str, TrackableType]: + def get_static_hyperparameters(self) -> Dict[str, TFKerasTypes.TrackableType]: """ Get the static hyperparameters logged. The hyperparameters will be stored in a dictionary where each key is the hyperparameter name and the value is his logged value. @@ -125,7 +126,9 @@ def get_static_hyperparameters(self) -> Dict[str, TrackableType]: """ return self._logger.static_hyperparameters - def get_dynamic_hyperparameters(self) -> Dict[str, List[TrackableType]]: + def get_dynamic_hyperparameters( + self, + ) -> Dict[str, List[TFKerasTypes.TrackableType]]: """ Get the dynamic hyperparameters logged. The hyperparameters will be stored in a dictionary where each key is the hyperparameter name and the value is a list of his logged values per epoch. @@ -179,7 +182,7 @@ def on_test_begin(self, logs: dict = None): # Check if needed to mark this run as evaluation: if self._is_training is None: self._is_training = False - self._logger.set_mode(mode=LoggerMode.EVALUATION) + self._logger.set_mode(mode=LoggingMode.EVALUATION) # If this callback is part of evaluation and not training, need to check if the run was setup: if not self._is_training: @@ -379,8 +382,11 @@ def _add_auto_hyperparameters(self): pass def _get_hyperparameter( - self, key_chain: Union[Callable[[], TrackableType], List[Union[str, int]]] - ) -> TrackableType: + self, + key_chain: Union[ + Callable[[], TFKerasTypes.TrackableType], List[Union[str, int]] + ], + ) -> TFKerasTypes.TrackableType: """ Access the hyperparameter from the model stored in this callback using the given key chain. diff --git a/mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py b/mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py index 8e6b5f7d206..5f2dc211cf2 100644 --- a/mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +++ b/mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py @@ -3,9 +3,10 @@ import mlrun from mlrun.artifacts import Artifact -from ..._common import TrackableType -from ..._dl_common.loggers import LoggerMode, MLRunLogger +from ..._common import LoggingMode +from ..._dl_common.loggers import MLRunLogger from ..model_handler import TFKerasModelHandler +from ..utils import TFKerasTypes from .logging_callback import LoggingCallback @@ -35,14 +36,16 @@ def __init__( context: mlrun.MLClientCtx, model_handler: TFKerasModelHandler, log_model_tag: str = "", - log_model_labels: Dict[str, TrackableType] = None, - log_model_parameters: Dict[str, TrackableType] = None, - log_model_extra_data: Dict[str, Union[TrackableType, Artifact]] = None, + log_model_labels: Dict[str, TFKerasTypes.TrackableType] = None, + log_model_parameters: Dict[str, TFKerasTypes.TrackableType] = None, + log_model_extra_data: Dict[ + str, Union[TFKerasTypes.TrackableType, Artifact] + ] = None, dynamic_hyperparameters: Dict[ - str, Union[List[Union[str, int]], Callable[[], TrackableType]] + str, Union[List[Union[str, int]], Callable[[], TFKerasTypes.TrackableType]] ] = None, static_hyperparameters: Dict[ - str, Union[TrackableType, List[Union[str, int]]] + str, Union[TFKerasTypes, List[Union[str, int]]] ] = None, auto_log: bool = False, ): @@ -120,7 +123,7 @@ def on_test_end(self, logs: dict = None): super(MLRunLoggingCallback, self).on_test_end(logs=logs) # Check if its part of evaluation. If so, end the run: - if self._logger.mode == LoggerMode.EVALUATION: + if self._logger.mode == LoggingMode.EVALUATION: self._logger.log_epoch_to_context(epoch=1) self._end_run() diff --git a/mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py b/mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py index 7bd70cf0ce6..7ba2d0ce3ac 100644 --- a/mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +++ b/mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py @@ -12,8 +12,8 @@ import mlrun -from ..._common import TrackableType from ..._dl_common.loggers import TensorboardLogger +from ..utils import TFKerasTypes from .logging_callback import LoggingCallback @@ -246,10 +246,10 @@ def __init__( Callable[[Union[Variable, Tensor]], Union[float, Tensor]] ] = None, dynamic_hyperparameters: Dict[ - str, Union[List[Union[str, int]], Callable[[], TrackableType]] + str, Union[List[Union[str, int]], Callable[[], TFKerasTypes.TrackableType]] ] = None, static_hyperparameters: Dict[ - str, Union[TrackableType, List[Union[str, int]]] + str, Union[TFKerasTypes.TrackableType, List[Union[str, int]]] ] = None, update_frequency: Union[int, str] = "epoch", auto_log: bool = False, diff --git a/mlrun/frameworks/tf_keras/mlrun_interface.py b/mlrun/frameworks/tf_keras/mlrun_interface.py index 5f368c93a46..f48441a22f3 100644 --- a/mlrun/frameworks/tf_keras/mlrun_interface.py +++ b/mlrun/frameworks/tf_keras/mlrun_interface.py @@ -1,7 +1,8 @@ import importlib import os from abc import ABC -from typing import List, Tuple, Union +from types import ModuleType +from typing import List, Set, Tuple, Union import tensorflow as tf from tensorflow import keras @@ -17,8 +18,9 @@ import mlrun -from .._common import MLRunInterface, RestorationInformation +from .._common import MLRunInterface from .callbacks import LoggingCallback +from .utils import TFKerasTypes class TFKerasMLRunInterface(MLRunInterface, ABC): @@ -32,11 +34,11 @@ class TFKerasMLRunInterface(MLRunInterface, ABC): # Attributes to be inserted so the MLRun interface will be fully enabled. _PROPERTIES = { # Logging callbacks list: - "_logging_callbacks": set(), # > type: Set[Callback] + "_logging_callbacks": set(), # type: Set[Callback] # Variable to hold the horovod module: - "_hvd": None, # > type: ModuleType + "_hvd": None, # type: ModuleType # List of all the callbacks that should only be applied on rank 0 when using horovod: - "_RANK_0_ONLY_CALLBACKS": { # > type: Set[str] + "_RANK_0_ONLY_CALLBACKS": { # type: Set[str] "LoggingCallback", "MLRunLoggingCallback", "TensorboardLoggingCallback", @@ -63,18 +65,18 @@ class TFKerasMLRunInterface(MLRunInterface, ABC): def add_interface( cls, obj: keras.Model, - restoration_information: RestorationInformation = None, + restoration: TFKerasTypes.MLRunInterfaceRestorationType = None, ): """ - Enrich the object with this interface properties, methods and functions so it will have this framework MLRun's - features. + Enrich the object with this interface properties, methods and functions, so it will have this TensorFlow.Keras + MLRun's features. :param obj: The object to enrich his interface. - :param restoration_information: Restoration information tuple as returned from 'remove_interface' in order to + :param restoration: Restoration information tuple as returned from 'remove_interface' in order to add the interface in a certain state. """ super(TFKerasMLRunInterface, cls).add_interface( - obj=obj, restoration_information=restoration_information + obj=obj, restoration=restoration ) def mlrun_compile(self, *args, **kwargs): diff --git a/mlrun/frameworks/tf_keras/model_handler.py b/mlrun/frameworks/tf_keras/model_handler.py index aae8a92cdb4..5e752466318 100644 --- a/mlrun/frameworks/tf_keras/model_handler.py +++ b/mlrun/frameworks/tf_keras/model_handler.py @@ -10,12 +10,12 @@ import mlrun from mlrun.artifacts import Artifact -from mlrun.data_types import ValueType from mlrun.features import Feature from .._common import without_mlrun_interface from .._dl_common import DLModelHandler from .mlrun_interface import TFKerasMLRunInterface +from .utils import TFKerasUtils class TFKerasModelHandler(DLModelHandler): @@ -115,10 +115,10 @@ def __init__( before loading the model). If the model path given is of a store object, the custom objects files will be read from the logged custom object artifact of the model. - :param save_traces: Whether or not to use functions saving (only available for the 'SavedModel' - format) for loading the model later without the custom objects dictionary. Only - from tensorflow version >= 2.4.0. Using this setting will increase the model - saving size. + :param save_traces: Whether to use functions saving (only available for the 'SavedModel' format) + for loading the model later without the custom objects dictionary. Only from + tensorflow version >= 2.4.0. Using this setting will increase the model saving + size. :raise MLRunInvalidArgumentError: In case the input was incorrect: * Model format is unrecognized. @@ -352,7 +352,7 @@ def to_onnx( input_signature = [ tf.TensorSpec( shape=input_feature.dims, - dtype=self.convert_value_type_to_tf_dtype( + dtype=TFKerasUtils.convert_value_type_to_tf_dtype( value_type=input_feature.value_type ), ) @@ -441,89 +441,6 @@ def read_outputs_from_model(self): # Set the outputs: self.set_outputs(from_sample=output_signature) - @staticmethod - def convert_value_type_to_tf_dtype( - value_type: str, - ) -> tf.DType: # TODO: Move to utils - """ - Get the 'tensorflow.DType' equivalent to the given MLRun value type. - - :param value_type: The MLRun value type to convert to tensorflow data type. - - :return: The 'tensorflow.DType' equivalent to the given MLRun data type. - - :raise MLRunInvalidArgumentError: If tensorflow is not supporting the given data type. - """ - # Initialize the mlrun to tensorflow data type conversion map: - conversion_map = { - ValueType.BOOL: tf.bool, - ValueType.INT8: tf.int8, - ValueType.INT16: tf.int16, - ValueType.INT32: tf.int32, - ValueType.INT64: tf.int64, - ValueType.UINT8: tf.uint8, - ValueType.UINT16: tf.uint16, - ValueType.UINT32: tf.uint32, - ValueType.UINT64: tf.uint64, - ValueType.BFLOAT16: tf.bfloat16, - ValueType.FLOAT16: tf.float16, - ValueType.FLOAT: tf.float32, - ValueType.DOUBLE: tf.float64, - } - - # Convert and return: - if value_type in conversion_map: - return conversion_map[value_type] - raise mlrun.errors.MLRunInvalidArgumentError( - f"The ValueType given is not supported in tensorflow: '{value_type}'." - ) - - @staticmethod - def convert_tf_dtype_to_value_type( - tf_dtype: tf.DType, - ) -> str: # TODO: Move to utils - """ - Convert the given tensorflow data type to MLRun data type. All of the CUDA supported data types are supported. - For more information regarding tensorflow data types, - visit: https://www.tensorflow.org/api_docs/python/tf/dtypes - - :param tf_dtype: The tensorflow data type to convert to MLRun's data type. Expected to be a 'tensorflow.dtype' - or 'str'. - - :return: The MLRun value type converted from the given data type. - - :raise MLRunInvalidArgumentError: If the tensorflow data type is not supported by MLRun. - """ - # Initialize the tensorflow to mlrun data type conversion map: - conversion_map = { - tf.bool.name: ValueType.BOOL, - tf.int8.name: ValueType.INT8, - tf.int16.name: ValueType.INT16, - tf.int32.name: ValueType.INT32, - tf.int64.name: ValueType.INT64, - tf.uint8.name: ValueType.UINT8, - tf.uint16.name: ValueType.UINT16, - tf.uint32.name: ValueType.UINT32, - tf.uint64.name: ValueType.UINT64, - tf.bfloat16.name: ValueType.BFLOAT16, - tf.half.name: ValueType.FLOAT16, - tf.float16.name: ValueType.FLOAT16, - tf.float32.name: ValueType.FLOAT, - tf.double.name: ValueType.DOUBLE, - tf.float64.name: ValueType.DOUBLE, - } - - # Parse the given tensorflow data type to string: - if isinstance(tf_dtype, tf.DType): - tf_dtype = tf_dtype.name - - # Convert and return: - if tf_dtype in conversion_map: - return conversion_map[tf_dtype] - raise mlrun.errors.MLRunInvalidArgumentError( - f"MLRun value type is not supporting the given tensorflow data type: '{tf_dtype}'." - ) - def _collect_files_from_store_object(self): """ If the model path given is of a store object, collect the needed model files into this handler for later loading @@ -627,14 +544,14 @@ def _read_sample(self, sample: IOSample) -> Feature: elif isinstance(sample, tf.TensorSpec): return Feature( name=sample.name, - value_type=TFKerasModelHandler.convert_tf_dtype_to_value_type( + value_type=TFKerasUtils.convert_tf_dtype_to_value_type( tf_dtype=sample.dtype ), dims=list(sample.shape), ) elif isinstance(sample, tf.Tensor): return Feature( - value_type=TFKerasModelHandler.convert_tf_dtype_to_value_type( + value_type=TFKerasUtils.convert_tf_dtype_to_value_type( tf_dtype=sample.dtype ), dims=list(sample.shape), diff --git a/mlrun/frameworks/tf_keras/utils.py b/mlrun/frameworks/tf_keras/utils.py new file mode 100644 index 00000000000..716419c7aa2 --- /dev/null +++ b/mlrun/frameworks/tf_keras/utils.py @@ -0,0 +1,105 @@ +import tensorflow as tf +from tensorflow import keras + +import mlrun +from mlrun.data_types import ValueType + +from .._dl_common import DLTypes, DLUtils + + +class TFKerasTypes(DLTypes): + """ + Typing hints for the TensorFlow.Keras framework. + """ + + # Every model in tf.keras must inherit from tf.keras.Model: + ModelType = keras.Model + + +class TFKerasUtils(DLUtils): + """ + Utilities functions for the TensorFlow.Keras framework. + """ + + @staticmethod + def convert_value_type_to_tf_dtype( + value_type: str, + ) -> tf.DType: + """ + Get the 'tensorflow.DType' equivalent to the given MLRun value type. + + :param value_type: The MLRun value type to convert to tensorflow data type. + + :return: The 'tensorflow.DType' equivalent to the given MLRun data type. + + :raise MLRunInvalidArgumentError: If tensorflow is not supporting the given data type. + """ + # Initialize the mlrun to tensorflow data type conversion map: + conversion_map = { + ValueType.BOOL: tf.bool, + ValueType.INT8: tf.int8, + ValueType.INT16: tf.int16, + ValueType.INT32: tf.int32, + ValueType.INT64: tf.int64, + ValueType.UINT8: tf.uint8, + ValueType.UINT16: tf.uint16, + ValueType.UINT32: tf.uint32, + ValueType.UINT64: tf.uint64, + ValueType.BFLOAT16: tf.bfloat16, + ValueType.FLOAT16: tf.float16, + ValueType.FLOAT: tf.float32, + ValueType.DOUBLE: tf.float64, + } + + # Convert and return: + if value_type in conversion_map: + return conversion_map[value_type] + raise mlrun.errors.MLRunInvalidArgumentError( + f"The ValueType given is not supported in tensorflow: '{value_type}'." + ) + + @staticmethod + def convert_tf_dtype_to_value_type( + tf_dtype: tf.DType, + ) -> str: + """ + Convert the given tensorflow data type to MLRun data type. All the CUDA supported data types are supported. + For more information regarding tensorflow data types, + visit: https://www.tensorflow.org/api_docs/python/tf/dtypes + + :param tf_dtype: The tensorflow data type to convert to MLRun's data type. Expected to be a 'tensorflow.dtype' + or 'str'. + + :return: The MLRun value type converted from the given data type. + + :raise MLRunInvalidArgumentError: If the tensorflow data type is not supported by MLRun. + """ + # Initialize the tensorflow to mlrun data type conversion map: + conversion_map = { + tf.bool.name: ValueType.BOOL, + tf.int8.name: ValueType.INT8, + tf.int16.name: ValueType.INT16, + tf.int32.name: ValueType.INT32, + tf.int64.name: ValueType.INT64, + tf.uint8.name: ValueType.UINT8, + tf.uint16.name: ValueType.UINT16, + tf.uint32.name: ValueType.UINT32, + tf.uint64.name: ValueType.UINT64, + tf.bfloat16.name: ValueType.BFLOAT16, + tf.half.name: ValueType.FLOAT16, + tf.float16.name: ValueType.FLOAT16, + tf.float32.name: ValueType.FLOAT, + tf.double.name: ValueType.DOUBLE, + tf.float64.name: ValueType.DOUBLE, + } + + # Parse the given tensorflow data type to string: + if isinstance(tf_dtype, tf.DType): + tf_dtype = tf_dtype.name + + # Convert and return: + if tf_dtype in conversion_map: + return conversion_map[tf_dtype] + raise mlrun.errors.MLRunInvalidArgumentError( + f"MLRun value type is not supporting the given tensorflow data type: '{tf_dtype}'." + ) diff --git a/mlrun/frameworks/xgboost/__init__.py b/mlrun/frameworks/xgboost/__init__.py index b85370ab8e0..bad2dbc9dd6 100644 --- a/mlrun/frameworks/xgboost/__init__.py +++ b/mlrun/frameworks/xgboost/__init__.py @@ -1,27 +1,18 @@ -import warnings +# flake8: noqa - this is until we take care of the F401 violations with respect to __all__ & sphinx from typing import Dict, List, Union import xgboost as xgb import mlrun -from .._common import ExtraDataType, get_plans -from .._ml_common import ( - Metric, - MetricEntry, - MetricsLibrary, - MLArtifactsLibrary, - MLPlan, - PickleModelServer, - get_metrics, -) +from .._ml_common import MLArtifactsLibrary, MLPlan, PickleModelServer +from ..sklearn import Metric, MetricsLibrary from .mlrun_interface import XGBModelMLRunInterface from .model_handler import XGBoostModelHandler -from .utils import DatasetType +from .utils import XGBoostTypes, XGBoostUtils -# Placeholders as the SciKit-Learn API is commonly used among all of the ML frameworks: +# Placeholders as the SciKit-Learn API is commonly used among all ML frameworks: XGBoostArtifactsLibrary = MLArtifactsLibrary -XGBoostMetricsLibrary = MetricsLibrary XGBoostModelServer = PickleModelServer @@ -35,16 +26,20 @@ def apply_mlrun( custom_objects_directory: str = None, context: mlrun.MLClientCtx = None, artifacts: Union[List[MLPlan], List[str], Dict[str, dict]] = None, - metrics: Union[List[Metric], List[MetricEntry], Dict[str, MetricEntry]] = None, - x_test: DatasetType = None, - y_test: DatasetType = None, - sample_set: Union[DatasetType, mlrun.DataItem, str] = None, + metrics: Union[ + List[Metric], + List[XGBoostTypes.MetricEntryType], + Dict[str, XGBoostTypes.MetricEntryType], + ] = None, + x_test: XGBoostTypes.DatasetType = None, + y_test: XGBoostTypes.DatasetType = None, + sample_set: Union[XGBoostTypes.DatasetType, mlrun.DataItem, str] = None, y_columns: Union[List[str], List[int]] = None, feature_vector: str = None, feature_weights: List[float] = None, labels: Dict[str, Union[str, int, float]] = None, parameters: Dict[str, Union[str, int, float]] = None, - extra_data: Dict[str, ExtraDataType] = None, + extra_data: Dict[str, XGBoostTypes.ExtraDataType] = None, auto_log: bool = True, **kwargs ) -> XGBoostModelHandler: @@ -114,26 +109,12 @@ def apply_mlrun( :param labels: Labels to log with the model. :param parameters: Parameters to log with the model. :param extra_data: Extra data to log with the model. - :param auto_log: Whether or not to apply MLRun's auto logging on the model. Auto logging will add - the default artifacts and metrics to the lists of artifacts and metrics. Defaulted - to True. + :param auto_log: Whether to apply MLRun's auto logging on the model. Auto logging will add the + default artifacts and metrics to the lists of artifacts and metrics. Defaulted to + True. :return: The model handler initialized with the provided model. """ - if "X_test" in kwargs: - warnings.warn( - "The attribute 'X_test' was changed to 'x_test' and will be removed next version.", - # TODO: Remove in mlrun 1.0.0 - PendingDeprecationWarning, - ) - x_test = kwargs["X_test"] - if "X_train" in kwargs or "y_train" in kwargs: - warnings.warn( - "The attributes 'X_train' and 'y_train' are no longer required and will be removed next version.", - # TODO: Remove in mlrun 1.0.0 - PendingDeprecationWarning, - ) - # Get the default context: if context is None: context = mlrun.get_or_create_ctx(XGBModelMLRunInterface.DEFAULT_CONTEXT_NAME) @@ -163,7 +144,7 @@ def apply_mlrun( if sample_set is not None: handler.set_sample_set(sample_set=sample_set) if y_columns is not None: - handler.set_y_columns(y_columns=y_columns) + handler.set_target_columns(target_columns=y_columns) if feature_vector is not None: handler.set_feature_vector(feature_vector=feature_vector) if feature_weights is not None: @@ -178,22 +159,17 @@ def apply_mlrun( # Add MLRun's interface to the model: XGBModelMLRunInterface.add_interface(obj=model) - # Set the handler to the model: - model.set_model_handler(model_handler=handler) - # Configure the logger: - model.configure_logger( + model.configure_logging( context=context, - plans=get_plans( - artifacts_library=XGBoostArtifactsLibrary, + plans=XGBoostArtifactsLibrary.get_plans( artifacts=artifacts, context=context, include_default=auto_log, model=model, y=y_test, ), - metrics=get_metrics( - metrics_library=XGBoostMetricsLibrary, + metrics=MetricsLibrary.get_metrics( metrics=metrics, context=context, include_default=auto_log, @@ -202,6 +178,7 @@ def apply_mlrun( ), x_test=x_test, y_test=y_test, + model_handler=handler, ) return handler diff --git a/mlrun/frameworks/xgboost/mlrun_interface.py b/mlrun/frameworks/xgboost/mlrun_interface.py index bdc6549930e..4f5c4104c2b 100644 --- a/mlrun/frameworks/xgboost/mlrun_interface.py +++ b/mlrun/frameworks/xgboost/mlrun_interface.py @@ -1,9 +1,9 @@ from abc import ABC -from .._ml_common import MLMLRunInterface +from ..sklearn import SKLearnMLRunInterface -class XGBModelMLRunInterface(MLMLRunInterface, ABC): +class XGBModelMLRunInterface(SKLearnMLRunInterface, ABC): """ Interface for adding MLRun features for XGBoost models (SciKit-Learn API models). """ diff --git a/mlrun/frameworks/xgboost/model_handler.py b/mlrun/frameworks/xgboost/model_handler.py index d2f8b3d1132..b7662a4fd90 100644 --- a/mlrun/frameworks/xgboost/model_handler.py +++ b/mlrun/frameworks/xgboost/model_handler.py @@ -3,14 +3,13 @@ from typing import Dict, List, Union import cloudpickle -import xgboost as xgb import mlrun from .._common import without_mlrun_interface from .._ml_common import MLModelHandler from .mlrun_interface import XGBModelMLRunInterface -from .utils import DatasetType +from .utils import XGBoostTypes class XGBoostModelHandler(MLModelHandler): @@ -31,7 +30,7 @@ class ModelFormats: def __init__( self, - model: xgb.XGBModel = None, + model: XGBoostTypes.ModelType = None, model_path: str = None, model_name: str = None, modules_map: Union[Dict[str, Union[None, str, List[str]]], str] = None, @@ -175,7 +174,7 @@ def to_onnx( self, model_name: str = None, optimize: bool = True, - input_sample: DatasetType = None, + input_sample: XGBoostTypes = None, log: bool = None, ): """ diff --git a/mlrun/frameworks/xgboost/utils.py b/mlrun/frameworks/xgboost/utils.py index 58405231158..6b14297ed76 100644 --- a/mlrun/frameworks/xgboost/utils.py +++ b/mlrun/frameworks/xgboost/utils.py @@ -2,7 +2,24 @@ import xgboost as xgb -from .._ml_common import DatasetType as MLDatasetType +from .._ml_common import MLTypes, MLUtils -# A type for all the supported dataset types: -DatasetType = Union[MLDatasetType, xgb.DMatrix] + +class XGBoostTypes(MLTypes): + """ + Typing hints for the XGBoost framework. + """ + + # A union of all XGBoost model base classes: + ModelType = Union[xgb.XGBModel, xgb.Booster] + + # A type for all the supported dataset types: + DatasetType = Union[MLTypes.DatasetType, xgb.DMatrix] + + +class XGBoostUtils(MLUtils): + """ + Utilities functions for the XGBoost framework. + """ + + pass diff --git a/tests/frameworks/lgbm/__init__.py b/tests/frameworks/lgbm/__init__.py index c22f5081964..842ceefe605 100644 --- a/tests/frameworks/lgbm/__init__.py +++ b/tests/frameworks/lgbm/__init__.py @@ -1,2 +1 @@ # flake8: noqa - this is until we take care of the F401 violations with respect to __all__ & sphinx -from .functions import LGBMFunctions diff --git a/tests/frameworks/lgbm/functions.py b/tests/frameworks/lgbm/functions.py index 02e4faf49ae..101c69f3eda 100644 --- a/tests/frameworks/lgbm/functions.py +++ b/tests/frameworks/lgbm/functions.py @@ -1,52 +1,96 @@ import lightgbm as lgb -from sklearn.multiclass import OneVsRestClassifier -from sklearn.multioutput import MultiOutputClassifier, MultiOutputRegressor import mlrun -import mlrun.frameworks.lgbm as mlrun_lgbm -from mlrun.frameworks._ml_common.utils import AlgorithmFunctionality, ModelType +from mlrun.frameworks._ml_common import AlgorithmFunctionality +from mlrun.frameworks.lgbm import LGBMMLRunInterface, apply_mlrun -from ..functions import MLFunctions +from ..ml_common import N_CLASSES, get_dataset -class LGBMFunctions(MLFunctions): +def get_model( + algorithm_functionality: AlgorithmFunctionality, n_classes: int = N_CLASSES +) -> lgb.LGBMModel: + if algorithm_functionality.is_classification(): + if algorithm_functionality.is_single_output(): + return lgb.LGBMClassifier() + if algorithm_functionality.is_binary_classification(): + return lgb.LGBMClassifier( + params={"objective": "multiclass", "num_class": 2} + ) + return lgb.LGBMClassifier( + params={"objective": "multiclass", "num_class": n_classes} + ) + return lgb.LGBMRegressor() + + +def get_params( + algorithm_functionality: AlgorithmFunctionality, n_classes: int = N_CLASSES +) -> dict: + if algorithm_functionality.is_classification(): + if algorithm_functionality.is_single_output(): + return {"objective": "binary"} + if algorithm_functionality.is_binary_classification(): + return {"objective": "multiclass", "num_class": 2} + return {"objective": "multiclass", "num_class": n_classes} + return {"objective": "regression"} + + +class LightGBMHandlers: @staticmethod - def train( + def training_api_train( context: mlrun.MLClientCtx, algorithm_functionality: str, model_name: str = None ): algorithm_functionality = AlgorithmFunctionality(algorithm_functionality) - model = LGBMFunctions._get_model( - algorithm_functionality=algorithm_functionality - ) - x_train, x_test, y_train, y_test = LGBMFunctions._get_dataset( + params = get_params(algorithm_functionality=algorithm_functionality) + x_train, x_test, y_train, y_test = get_dataset( algorithm_functionality=algorithm_functionality, for_training=True ) + train_set = lgb.Dataset(x_train, y_train) + validation_set_1 = lgb.Dataset( + x_test[: len(x_test) // 2], y_test[: len(x_test) // 2] + ) + validation_set_2 = lgb.Dataset( + x_test[len(x_test) // 2 :], y_test[len(x_test) // 2 :] + ) + apply_mlrun(model_name=model_name) + + lgb.train( + params=params, + train_set=train_set, + valid_sets=[validation_set_1, validation_set_2], + ) + + # Remove the interface for next test to start from scratch: + LGBMMLRunInterface.remove_interface(obj=lgb) - mlrun_lgbm.apply_mlrun( - model=model, model_name=model_name, x_test=x_test, y_test=y_test + @staticmethod + def training_api_evaluate( + context: mlrun.MLClientCtx, algorithm_functionality: str, model_path: str + ): + # TODO: Finish handler once the evaluation is implemented. + pass + + @staticmethod + def sklearn_api_train( + context: mlrun.MLClientCtx, algorithm_functionality: str, model_name: str = None + ): + algorithm_functionality = AlgorithmFunctionality(algorithm_functionality) + model = get_model(algorithm_functionality=algorithm_functionality) + x_train, x_test, y_train, y_test = get_dataset( + algorithm_functionality=algorithm_functionality, for_training=True ) + + apply_mlrun(model=model, model_name=model_name, x_test=x_test, y_test=y_test) model.fit(x_train, y_train) @staticmethod - def evaluate( + def sklearn_api_evaluate( context: mlrun.MLClientCtx, algorithm_functionality: str, model_path: str ): algorithm_functionality = AlgorithmFunctionality(algorithm_functionality) - x, y = LGBMFunctions._get_dataset( + x, y = get_dataset( algorithm_functionality=algorithm_functionality, for_training=False ) - model_handler = mlrun_lgbm.apply_mlrun(model_path=model_path, y_test=y) + model_handler = apply_mlrun(model_path=model_path, y_test=y) model = model_handler.model model.predict(x) - - @staticmethod - def _get_model(algorithm_functionality: AlgorithmFunctionality) -> ModelType: - if algorithm_functionality.is_classification(): - if algorithm_functionality.is_single_output(): - return lgb.LGBMClassifier() - if algorithm_functionality.is_binary_classification(): - return MultiOutputClassifier(lgb.LGBMClassifier()) - return MultiOutputClassifier(OneVsRestClassifier(lgb.LGBMClassifier())) - if algorithm_functionality.is_single_output(): - return lgb.LGBMRegressor() - return MultiOutputRegressor(lgb.LGBMRegressor()) diff --git a/tests/frameworks/lgbm/test_lgbm.py b/tests/frameworks/lgbm/test_lgbm.py new file mode 100644 index 00000000000..37b7b98bd7c --- /dev/null +++ b/tests/frameworks/lgbm/test_lgbm.py @@ -0,0 +1,113 @@ +import json +from typing import List + +import pytest + +import mlrun +from mlrun.frameworks._ml_common import AlgorithmFunctionality, MLPlanStages +from mlrun.frameworks.lgbm import LGBMArtifactsLibrary +from mlrun.frameworks.sklearn import MetricsLibrary + +from ..ml_common import get_dataset +from .functions import LightGBMHandlers, get_model + +ALGORITHM_FUNCTIONALITIES = [ # type: List[str] + AlgorithmFunctionality.REGRESSION.value, + AlgorithmFunctionality.BINARY_CLASSIFICATION.value, + AlgorithmFunctionality.MULTICLASS_CLASSIFICATION.value, + # Currently, LightGBM do not support multi-output functionalities. +] + + +@pytest.mark.parametrize("algorithm_functionality", ALGORITHM_FUNCTIONALITIES) +def test_training_api_training(algorithm_functionality: str): + # Run training: + train_run = mlrun.new_function().run( + artifact_path="./temp", + handler=LightGBMHandlers.training_api_train, + params={"algorithm_functionality": algorithm_functionality}, + ) + + # Print the outputs for manual validation: + print(json.dumps(train_run.outputs, indent=4)) + + # Validate artifacts (model artifact shouldn't be counted, hence the '-1'): + assert len(train_run.status.artifacts) - 1 == 2 + + # Validate results (context parameters shouldn't be counted, hence the '-1'): + assert len(train_run.status.results) - 1 == 2 + + +@pytest.mark.parametrize("algorithm_functionality", ALGORITHM_FUNCTIONALITIES) +def test_sklearn_api_training(algorithm_functionality: str): + # Run training: + train_run = mlrun.new_function().run( + artifact_path="./temp", + handler=LightGBMHandlers.sklearn_api_train, + params={"algorithm_functionality": algorithm_functionality}, + ) + + # Print the outputs for manual validation: + print(json.dumps(train_run.outputs, indent=4)) + + # Get assertion parameters: + algorithm_functionality = AlgorithmFunctionality(algorithm_functionality) + dummy_model = get_model(algorithm_functionality=algorithm_functionality) + _, dummy_y = get_dataset( + algorithm_functionality=algorithm_functionality, for_training=False + ) + expected_artifacts = LGBMArtifactsLibrary.get_plans(model=dummy_model, y=dummy_y) + expected_results = MetricsLibrary.get_metrics(model=dummy_model, y=dummy_y) + + # Validate artifacts (model artifact shouldn't be counted, hence the '-1'): + assert len(train_run.status.artifacts) - 1 == len(expected_artifacts) + + # Validate results: + assert len(train_run.status.results) == len(expected_results) + + +@pytest.mark.parametrize("algorithm_functionality", ALGORITHM_FUNCTIONALITIES) +def test_sklearn_api_evaluation(algorithm_functionality: str): + # Run training: + train_run = mlrun.new_function().run( + artifact_path="./temp2", + handler=LightGBMHandlers.sklearn_api_train, + params={"algorithm_functionality": algorithm_functionality}, + ) + + # Run evaluation (on the model that was just trained): + evaluate_run = mlrun.new_function().run( + artifact_path="./temp2", + handler=LightGBMHandlers.sklearn_api_evaluate, + params={ + "algorithm_functionality": algorithm_functionality, + "model_path": train_run.outputs["model"], + }, + ) + + # Print the outputs for manual validation: + print(json.dumps(evaluate_run.outputs, indent=4)) + + # Get assertion parameters: + algorithm_functionality = AlgorithmFunctionality(algorithm_functionality) + dummy_model = get_model(algorithm_functionality=algorithm_functionality) + _, dummy_y = get_dataset( + algorithm_functionality=algorithm_functionality, for_training=False + ) + expected_artifacts = ( + [ # Count only pre and post prediction artifacts (evaluation artifacts). + plan + for plan in LGBMArtifactsLibrary.get_plans(model=dummy_model, y=dummy_y) + if not ( + plan.is_ready(stage=MLPlanStages.POST_FIT, is_probabilities=False) + or plan.is_ready(stage=MLPlanStages.PRE_FIT, is_probabilities=False) + ) + ] + ) + expected_results = MetricsLibrary.get_metrics(model=dummy_model, y=dummy_y) + + # Validate artifacts: + assert len(evaluate_run.status.artifacts) == len(expected_artifacts) + + # Validate results: + assert len(evaluate_run.status.results) == len(expected_results) diff --git a/tests/frameworks/ml_common.py b/tests/frameworks/ml_common.py new file mode 100644 index 00000000000..85889ebf30f --- /dev/null +++ b/tests/frameworks/ml_common.py @@ -0,0 +1,56 @@ +from typing import Tuple, Union + +from sklearn.datasets import ( + make_classification, + make_multilabel_classification, + make_regression, +) +from sklearn.model_selection import train_test_split + +from mlrun.frameworks._ml_common import AlgorithmFunctionality, MLTypes + +N_TARGETS = 5 +N_CLASSES = 5 +N_SAMPLES = 200 + + +def get_dataset( + algorithm_functionality: AlgorithmFunctionality, + for_training: bool, + n_targets: int = N_TARGETS, + n_classes: int = N_CLASSES, + n_samples: int = N_SAMPLES, +) -> Union[ + Tuple[MLTypes.DatasetType, MLTypes.DatasetType], + Tuple[ + MLTypes.DatasetType, + MLTypes.DatasetType, + MLTypes.DatasetType, + MLTypes.DatasetType, + ], +]: + if algorithm_functionality.is_regression(): + if algorithm_functionality.is_single_output(): + n_targets = 1 + x, y = make_regression(n_samples=n_samples, n_targets=n_targets) + stratify = None + else: + if algorithm_functionality.is_binary_classification(): + n_classes = 2 + if algorithm_functionality.is_single_output(): + x, y = make_classification( + n_samples=n_samples, n_classes=n_classes, n_informative=n_classes + ) + stratify = y + else: + x, y = make_multilabel_classification( + n_samples=n_samples, n_classes=n_classes + ) + stratify = None + + if not for_training: + return x, y + x_train, x_test, y_train, y_test = train_test_split( + x, y, test_size=0.2, stratify=stratify + ) + return x_train, x_test, y_train, y_test diff --git a/tests/frameworks/functions.py b/tests/frameworks/ml_functions.py similarity index 79% rename from tests/frameworks/functions.py rename to tests/frameworks/ml_functions.py index 91da2193fcd..d7c5c87524e 100644 --- a/tests/frameworks/functions.py +++ b/tests/frameworks/ml_functions.py @@ -9,11 +9,7 @@ from sklearn.model_selection import train_test_split import mlrun -from mlrun.frameworks._ml_common.utils import ( - AlgorithmFunctionality, - DatasetType, - ModelType, -) +from mlrun.frameworks._ml_common.utils import AlgorithmFunctionality, MLTypes class MLFunctions(ABC): @@ -33,15 +29,20 @@ def evaluate( @staticmethod @abstractmethod - def _get_model(algorithm_functionality: AlgorithmFunctionality) -> ModelType: + def get_model(algorithm_functionality: AlgorithmFunctionality) -> MLTypes.ModelType: pass @staticmethod - def _get_dataset( + def get_dataset( algorithm_functionality: AlgorithmFunctionality, for_training: bool ) -> Union[ - Tuple[DatasetType, DatasetType], - Tuple[DatasetType, DatasetType, DatasetType, DatasetType], + Tuple[MLTypes.DatasetType, MLTypes.DatasetType], + Tuple[ + MLTypes.DatasetType, + MLTypes.DatasetType, + MLTypes.DatasetType, + MLTypes.DatasetType, + ], ]: if algorithm_functionality.is_regression(): n_targets = 1 if algorithm_functionality.is_single_output() else 5 diff --git a/tests/frameworks/sklearn/functions.py b/tests/frameworks/sklearn/functions.py index 28e9df2756e..7a94907925c 100644 --- a/tests/frameworks/sklearn/functions.py +++ b/tests/frameworks/sklearn/functions.py @@ -5,10 +5,10 @@ from sklearn.svm import SVC, LinearSVC import mlrun -import mlrun.frameworks.sklearn as mlrun_sklearn -from mlrun.frameworks._ml_common.utils import AlgorithmFunctionality, ModelType +from mlrun.frameworks._ml_common import AlgorithmFunctionality +from mlrun.frameworks.sklearn import SKLearnTypes, apply_mlrun -from ..functions import MLFunctions +from ..ml_functions import MLFunctions class SKLearnFunctions(MLFunctions): @@ -17,16 +17,14 @@ def train( context: mlrun.MLClientCtx, algorithm_functionality: str, model_name: str = None ): algorithm_functionality = AlgorithmFunctionality(algorithm_functionality) - model = SKLearnFunctions._get_model( + model = SKLearnFunctions.get_model( algorithm_functionality=algorithm_functionality ) - x_train, x_test, y_train, y_test = SKLearnFunctions._get_dataset( + x_train, x_test, y_train, y_test = SKLearnFunctions.get_dataset( algorithm_functionality=algorithm_functionality, for_training=True ) - mlrun_sklearn.apply_mlrun( - model=model, model_name=model_name, x_test=x_test, y_test=y_test - ) + apply_mlrun(model=model, model_name=model_name, x_test=x_test, y_test=y_test) model.fit(x_train, y_train) @staticmethod @@ -34,15 +32,17 @@ def evaluate( context: mlrun.MLClientCtx, algorithm_functionality: str, model_path: str ): algorithm_functionality = AlgorithmFunctionality(algorithm_functionality) - x, y = SKLearnFunctions._get_dataset( + x, y = SKLearnFunctions.get_dataset( algorithm_functionality=algorithm_functionality, for_training=False ) - model_handler = mlrun_sklearn.apply_mlrun(model_path=model_path, y_test=y) + model_handler = apply_mlrun(model_path=model_path, y_test=y) model = model_handler.model model.predict(x) @staticmethod - def _get_model(algorithm_functionality: AlgorithmFunctionality) -> ModelType: + def get_model( + algorithm_functionality: AlgorithmFunctionality, + ) -> SKLearnTypes.ModelType: if algorithm_functionality == AlgorithmFunctionality.BINARY_CLASSIFICATION: return RandomForestClassifier() if algorithm_functionality == AlgorithmFunctionality.MULTICLASS_CLASSIFICATION: diff --git a/tests/frameworks/test_ml_frameworks.py b/tests/frameworks/test_ml_frameworks.py index 787d613b096..3f526f3ad1b 100644 --- a/tests/frameworks/test_ml_frameworks.py +++ b/tests/frameworks/test_ml_frameworks.py @@ -1,65 +1,117 @@ import json +from typing import Dict, List, Tuple import pytest import mlrun -from mlrun.frameworks._ml_common.utils import AlgorithmFunctionality +from mlrun.frameworks._common import ArtifactsLibrary +from mlrun.frameworks._ml_common import AlgorithmFunctionality, MLPlanStages +from mlrun.frameworks.sklearn import MetricsLibrary, SKLearnArtifactsLibrary +from mlrun.frameworks.xgboost import XGBoostArtifactsLibrary -from .functions import MLFunctions -from .lgbm import LGBMFunctions +from .ml_functions import MLFunctions from .sklearn import SKLearnFunctions from .xgboost import XGBoostFunctions -ML_FUNCTIONS = [XGBoostFunctions, LGBMFunctions, SKLearnFunctions] -ALGORITHM_FUNCTIONALITIES = [ + +class FrameworkKeys: + XGBOOST = "xgboost" + SKLEARN = "sklearn" + + +FRAMEWORKS = { # type: Dict[str, Tuple[MLFunctions, ArtifactsLibrary, MetricsLibrary]] + FrameworkKeys.XGBOOST: ( + XGBoostFunctions, + XGBoostArtifactsLibrary, + MetricsLibrary, + ), + FrameworkKeys.SKLEARN: ( + SKLearnFunctions, + SKLearnArtifactsLibrary, + MetricsLibrary, + ), +} +FRAMEWORKS_KEYS = [ # type: List[str] + FrameworkKeys.XGBOOST, + FrameworkKeys.SKLEARN, +] +ALGORITHM_FUNCTIONALITIES = [ # type: List[str] algorithm_functionality.value for algorithm_functionality in AlgorithmFunctionality if "Unknown" not in algorithm_functionality.value ] -@pytest.mark.parametrize("functions", ML_FUNCTIONS) +@pytest.mark.parametrize("framework", FRAMEWORKS_KEYS) @pytest.mark.parametrize("algorithm_functionality", ALGORITHM_FUNCTIONALITIES) -def test_training(functions: MLFunctions, algorithm_functionality: str): +def test_training(framework: str, algorithm_functionality: str): + # Unpack the framework classes: + (functions, artifacts_library, metrics_library) = FRAMEWORKS[ + framework + ] # type: MLFunctions, ArtifactsLibrary, MetricsLibrary + + # Skips: if ( - (functions is LGBMFunctions or functions is XGBoostFunctions) + functions is XGBoostFunctions and algorithm_functionality == AlgorithmFunctionality.MULTI_OUTPUT_MULTICLASS_CLASSIFICATION.value ): pytest.skip( - "May be bug in lightgbm and xgboost for multiclass multi output classification." + "multiclass multi output classification are not supported in 'xgboost'." ) + # Run training: train_run = mlrun.new_function().run( artifact_path="./temp", handler=functions.train, params={"algorithm_functionality": algorithm_functionality}, ) + # Print the outputs for manual validation: print(json.dumps(train_run.outputs, indent=4)) - assert len(train_run.status.artifacts) >= 2 - assert len(train_run.status.results) >= 1 + # Get assertion parameters: + algorithm_functionality = AlgorithmFunctionality(algorithm_functionality) + dummy_model = functions.get_model(algorithm_functionality=algorithm_functionality) + _, dummy_y = functions.get_dataset( + algorithm_functionality=algorithm_functionality, for_training=False + ) + expected_artifacts = artifacts_library.get_plans(model=dummy_model, y=dummy_y) + expected_results = metrics_library.get_metrics(model=dummy_model, y=dummy_y) + + # Validate artifacts (model artifact shouldn't be counted, hence the '-1'): + assert len(train_run.status.artifacts) - 1 == len(expected_artifacts) + # Validate results: + assert len(train_run.status.results) == len(expected_results) -@pytest.mark.parametrize("functions", ML_FUNCTIONS) + +@pytest.mark.parametrize("framework", FRAMEWORKS_KEYS) @pytest.mark.parametrize("algorithm_functionality", ALGORITHM_FUNCTIONALITIES) -def test_evaluation(functions: MLFunctions, algorithm_functionality: str): +def test_evaluation(framework: str, algorithm_functionality: str): + # Unpack the framework classes: + (functions, artifacts_library, metrics_library) = FRAMEWORKS[ + framework + ] # type: MLFunctions, ArtifactsLibrary, MetricsLibrary + + # Skips: if ( - (functions is LGBMFunctions or functions is XGBoostFunctions) + functions is XGBoostFunctions and algorithm_functionality == AlgorithmFunctionality.MULTI_OUTPUT_MULTICLASS_CLASSIFICATION.value ): pytest.skip( - "May be bug in lightgbm and xgboost for multiclass multi output classification." + "multiclass multi output classification are not supported in 'xgboost'." ) + # Run training: train_run = mlrun.new_function().run( artifact_path="./temp2", handler=functions.train, params={"algorithm_functionality": algorithm_functionality}, ) + # Run evaluation (on the model that was just trained): evaluate_run = mlrun.new_function().run( artifact_path="./temp2", handler=functions.evaluate, @@ -69,7 +121,27 @@ def test_evaluation(functions: MLFunctions, algorithm_functionality: str): }, ) + # Print the outputs for manual validation: print(json.dumps(evaluate_run.outputs, indent=4)) - assert len(evaluate_run.status.artifacts) >= 1 - assert len(evaluate_run.status.results) >= 1 + # Get assertion parameters: + algorithm_functionality = AlgorithmFunctionality(algorithm_functionality) + dummy_model = functions.get_model(algorithm_functionality=algorithm_functionality) + _, dummy_y = functions.get_dataset( + algorithm_functionality=algorithm_functionality, for_training=False + ) + expected_artifacts = [ + plan + for plan in artifacts_library.get_plans(model=dummy_model, y=dummy_y) + if not ( # Count only pre and post prediction artifacts (evaluation artifacts). + plan.is_ready(stage=MLPlanStages.POST_FIT, is_probabilities=False) + or plan.is_ready(stage=MLPlanStages.PRE_FIT, is_probabilities=False) + ) + ] + expected_results = metrics_library.get_metrics(model=dummy_model, y=dummy_y) + + # Validate artifacts: + assert len(evaluate_run.status.artifacts) == len(expected_artifacts) + + # Validate results: + assert len(evaluate_run.status.results) == len(expected_results) diff --git a/tests/frameworks/xgboost/functions.py b/tests/frameworks/xgboost/functions.py index 16da6b741ec..a12a947c076 100644 --- a/tests/frameworks/xgboost/functions.py +++ b/tests/frameworks/xgboost/functions.py @@ -3,10 +3,10 @@ from sklearn.multioutput import MultiOutputClassifier, MultiOutputRegressor import mlrun -import mlrun.frameworks.xgboost as mlrun_xgboost -from mlrun.frameworks._ml_common.utils import AlgorithmFunctionality, ModelType +from mlrun.frameworks._ml_common import AlgorithmFunctionality +from mlrun.frameworks.xgboost import XGBoostTypes, apply_mlrun -from ..functions import MLFunctions +from ..ml_functions import MLFunctions class XGBoostFunctions(MLFunctions): @@ -15,16 +15,14 @@ def train( context: mlrun.MLClientCtx, algorithm_functionality: str, model_name: str = None ): algorithm_functionality = AlgorithmFunctionality(algorithm_functionality) - model = XGBoostFunctions._get_model( + model = XGBoostFunctions.get_model( algorithm_functionality=algorithm_functionality ) - x_train, x_test, y_train, y_test = XGBoostFunctions._get_dataset( + x_train, x_test, y_train, y_test = XGBoostFunctions.get_dataset( algorithm_functionality=algorithm_functionality, for_training=True ) - mlrun_xgboost.apply_mlrun( - model=model, model_name=model_name, x_test=x_test, y_test=y_test - ) + apply_mlrun(model=model, model_name=model_name, x_test=x_test, y_test=y_test) model.fit(x_train, y_train) @staticmethod @@ -32,15 +30,17 @@ def evaluate( context: mlrun.MLClientCtx, algorithm_functionality: str, model_path: str ): algorithm_functionality = AlgorithmFunctionality(algorithm_functionality) - x, y = XGBoostFunctions._get_dataset( + x, y = XGBoostFunctions.get_dataset( algorithm_functionality=algorithm_functionality, for_training=False ) - model_handler = mlrun_xgboost.apply_mlrun(model_path=model_path, y_test=y) + model_handler = apply_mlrun(model_path=model_path, y_test=y) model = model_handler.model model.predict(x) @staticmethod - def _get_model(algorithm_functionality: AlgorithmFunctionality) -> ModelType: + def get_model( + algorithm_functionality: AlgorithmFunctionality, + ) -> XGBoostTypes.ModelType: if algorithm_functionality.is_classification(): if algorithm_functionality.is_single_output(): return xgb.XGBClassifier()