From e822713ec2bbb8b20000162598a33b3af014fd33 Mon Sep 17 00:00:00 2001 From: Raymond Pan Date: Mon, 3 Feb 2025 23:48:01 -0500 Subject: [PATCH 01/28] combined core --- demo.py | 17 ++ zephyr_ml/core.py | 284 ++++---------------------------- zephyr_ml/core_prev.py | 270 +++++++++++++++++++++++++++++++ zephyr_ml/entityset.py | 174 ++++++++++++-------- zephyr_ml/metadata.py | 358 +++++++++++++++++++++-------------------- 5 files changed, 607 insertions(+), 496 deletions(-) create mode 100644 demo.py create mode 100644 zephyr_ml/core_prev.py diff --git a/demo.py b/demo.py new file mode 100644 index 0000000..1050458 --- /dev/null +++ b/demo.py @@ -0,0 +1,17 @@ +from os import path +import pandas as pd +from zephyr_ml import create_scada_entityset + +data_path = "notebooks/data" + +data = { + "turbines": pd.read_csv(path.join(data_path, "turbines.csv")), + "alarms": pd.read_csv(path.join(data_path, "alarms.csv")), + "work_orders": pd.read_csv(path.join(data_path, "work_orders.csv")), + "stoppages": pd.read_csv(path.join(data_path, "stoppages.csv")), + "notifications": pd.read_csv(path.join(data_path, "notifications.csv")), + "scada": pd.read_csv(path.join(data_path, "scada.csv")), +} +scada_es = create_scada_entityset(data) + +print(scada_es) diff --git a/zephyr_ml/core.py b/zephyr_ml/core.py index e89cb3b..9666fb1 100644 --- a/zephyr_ml/core.py +++ b/zephyr_ml/core.py @@ -1,270 +1,44 @@ -"""Zephyr Core module. - -This module defines the Zephyr Class, which is responsible for the -model training and inference with the underlying MLBlocks pipelines. -""" -import json -import logging -import os -import pickle -from functools import partial -from typing import List, Union - -import numpy as np -import pandas as pd -from mlblocks import MLPipeline -from sklearn import metrics - -LOGGER = logging.getLogger(__name__) - - -_REGRESSION_METRICS = { - 'mae': metrics.mean_absolute_error, - 'mse': metrics.mean_squared_error, - 'r2': metrics.r2_score, -} - -_CLASSIFICATION_METRICS = { - 'accuracy': metrics.accuracy_score, - 'f1': metrics.f1_score, - 'recall': metrics.recall_score, - 'precision': metrics.precision_score, -} - -METRICS = _CLASSIFICATION_METRICS +from zephyr_ml.metadata import get_default_es_type_kwargs +from zephyr_ml.entityset import get_create_entityset_functions class Zephyr: - """Zephyr Class. - - The Zephyr Class provides the main machine learning pipeline functionalities - of Zephyr and is responsible for the interaction with the underlying - MLBlocks pipelines. - - Args: - pipeline (str, dict or MLPipeline): - Pipeline to use. It can be passed as: - * An ``str`` with a path to a JSON file. - * An ``str`` with the name of a registered pipeline. - * An ``MLPipeline`` instance. - * A ``dict`` with an ``MLPipeline`` specification. - hyperparameters (dict): - Additional hyperparameters to set to the Pipeline. - """ - DEFAULT_PIPELINE = 'xgb_classifier' - - def _get_mlpipeline(self): - pipeline = self._pipeline - if isinstance(pipeline, str) and os.path.isfile(pipeline): - with open(pipeline) as json_file: - pipeline = json.load(json_file) - - mlpipeline = MLPipeline(pipeline) - if self._hyperparameters: - mlpipeline.set_hyperparameters(self._hyperparameters) - - return mlpipeline - - def __init__(self, pipeline: Union[str, dict, MLPipeline] = None, - hyperparameters: dict = None): - self._pipeline = pipeline or self.DEFAULT_PIPELINE - self._hyperparameters = hyperparameters - self._mlpipeline = self._get_mlpipeline() - self._fitted = False - - def __eq__(self, other): - return ( - isinstance(other, self.__class__) and - self._pipeline == other._pipeline and - self._hyperparameters == other._hyperparameters and - self._fitted == other._fitted - ) - - def _get_outputs_spec(self, default=True): - outputs_spec = ["default"] if default else [] - - try: - visual_names = self._mlpipeline.get_output_names('visual') - outputs_spec.append('visual') - except ValueError: - visual_names = [] - - return outputs_spec, visual_names - - def fit(self, X: pd.DataFrame, y: Union[pd.Series, np.ndarray], - visual: bool = False, **kwargs): - """Fit the pipeline to the given data. - - Args: - X (DataFrame): - Input data, passed as a ``pandas.DataFrame`` containing - the feature matrix. - y (Series or ndarray): - Target data, passed as a ``pandas.Series`` or ``numpy.ndarray`` - containing the target values. - visual (bool): - If ``True``, capture the ``visual`` named output from the - ``MLPipeline`` and return it as an output. - """ - if not self._fitted: - self._mlpipeline = self._get_mlpipeline() - - if visual: - outputs_spec, visual_names = self._get_outputs_spec(False) - else: - outputs_spec = None - - outputs = self._mlpipeline.fit(X, y, output_=outputs_spec, **kwargs) - self._fitted = True - - if visual and outputs is not None: - return dict(zip(visual_names, outputs)) - def predict(self, X: pd.DataFrame, visual: bool = False, **kwargs) -> pd.Series: - """Predict the pipeline to the given data. + def __init__(self, pipeline, hyperparameters): + self.pipeline = pipeline + self.hyperparameters = hyperparameters - Args: - X (DataFrame): - Input data, passed as a ``pandas.DataFrame`` containing - the feature matrix. - visual (bool): - If ``True``, capture the ``visual`` named output from the - ``MLPipeline`` and return it as an output. - - Returns: - Series or ndarray: - Predictions to the input data. + def get_entityset_types(self): """ - if visual: - outputs_spec, visual_names = self._get_outputs_spec() - else: - outputs_spec = 'default' - - outputs = self._mlpipeline.predict(X, output_=outputs_spec, **kwargs) - - if visual and visual_names: - prediction = outputs[0] - return prediction, dict(zip(visual_names, outputs[-len(visual_names):])) - - return outputs - - def fit_predict(self, X: pd.DataFrame, y: Union[pd.Series, np.ndarray], - **kwargs) -> pd.Series: - """Fit the pipeline to the data and then predict targets. - - This method is functionally equivalent to calling ``fit(X, y)`` - and later on ``predict(X)`` but with the difference that - here the ``MLPipeline`` is called only once, using its ``fit`` - method, and the output is directly captured without having - to execute the whole pipeline again during the ``predict`` phase. - - Args: - X (DataFrame): - Input data, passed as a ``pandas.DataFrame`` containing - the feature matrix. - y (Series or ndarray): - Target data, passed as a ``pandas.Series`` or ``numpy.ndarray`` - containing the target values. - - Returns: - Series or ndarray: - Predictions to the input data. + Returns the supported entityset types (PI/SCADA) and the required dataframes and their columns """ - if not self._fitted: - self._mlpipeline = self._get_mlpipeline() - - result = self._mlpipeline.fit(X, y, output_='default', **kwargs) - self._fitted = True - - return result + return get_default_es_type_kwargs() - def evaluate(self, X: pd.DataFrame, y: Union[pd.Series, np.ndarray], fit: bool = False, - train_X: pd.DataFrame = None, train_y: Union[pd.Series, np.ndarray] = None, - metrics: List[str] = METRICS) -> pd.Series: - """Evaluate the performance of the pipeline. - - Args: - X (DataFrame): - Input data, passed as a ``pandas.DataFrame`` containing - the feature matrix. - y (Series or ndarray): - Target data, passed as a ``pandas.Series`` or ``numpy.ndarray`` - containing the target values. - fit (bool): - Whether to fit the pipeline before evaluating it. - Defaults to ``False``. - train_X (DataFrame): - Training data, passed as a ``pandas.DataFrame`` containing - the feature matrix. - If not given, the pipeline is fitted on ``X``. - train_y (Series or ndarray): - Target data used for training, passed as a ``pandas.Series`` or - ``numpy.ndarray`` containing the target values. - metrics (list): - List of metrics to used passed as a list of strings. - If not given, it defaults to all the metrics. - - Returns: - Series: - ``pandas.Series`` containing one element for each - metric applied, with the metric name as index. + def create_entityset(self, data_paths, es_type="scada", new_kwargs_mapping=None): """ - if not fit: - method = self._mlpipeline.predict - else: - if not self._fitted: - mlpipeline = self._get_mlpipeline() - else: - mlpipeline = self._mlpipeline - - if train_X is not None and train_y is not None: - # fit first and then predict - mlpipeline.fit(train_X, train_y) - method = mlpipeline.predict - else: - # fit and predict at once - method = partial(mlpipeline.fit, y=y, output_='default') - - result = method(X) - - scores = { - metric: METRICS[metric](y, result) - for metric in metrics - } - - return pd.Series(scores) - - def save(self, path: str): - """Save this object using pickle. + Generate an entityset Args: - path (str): - Path to the file where the serialization of - this object will be stored. + data_paths ( dict ): Dictionary mapping entity names to the pandas + dataframe for that that entity + es_type (str): type of signal data , either SCADA or PI + new_kwargs_mapping ( dict ): Updated keyword arguments to be used + during entityset creation + Returns : + featuretools . EntitySet that contains the data passed in and + their relationships """ - os.makedirs(os.path.dirname(path), exist_ok=True) - with open(path, 'wb') as pickle_file: - pickle.dump(self, pickle_file) - - @classmethod - def load(cls, path: str): - """Load an Zephyr instance from a pickle file. + create_entityset_functions = get_create_entityset_functions() + if es_type not in create_entityset_functions: + raise ValueError("Unrecognized es_type argument: {}".format(es_type)) - Args: - path (str): - Path to the file where the instance has been - previously serialized. - - Returns: - Orion + _create_entityset = create_entityset_functions[es_type] + entityset = _create_entityset(data_paths, new_kwargs_mapping) + self.entityset = entityset + return self.entityset - Raises: - ValueError: - If the serialized object is not a Zephyr instance. - """ - with open(path, 'rb') as pickle_file: - zephyr = pickle.load(pickle_file) - if not isinstance(zephyr, cls): - raise ValueError('Serialized object is not a Zephyr instance') + def get_entityset(self): + if self.entityset is None: + raise - return zephyr + return self.entityset diff --git a/zephyr_ml/core_prev.py b/zephyr_ml/core_prev.py new file mode 100644 index 0000000..e89cb3b --- /dev/null +++ b/zephyr_ml/core_prev.py @@ -0,0 +1,270 @@ +"""Zephyr Core module. + +This module defines the Zephyr Class, which is responsible for the +model training and inference with the underlying MLBlocks pipelines. +""" +import json +import logging +import os +import pickle +from functools import partial +from typing import List, Union + +import numpy as np +import pandas as pd +from mlblocks import MLPipeline +from sklearn import metrics + +LOGGER = logging.getLogger(__name__) + + +_REGRESSION_METRICS = { + 'mae': metrics.mean_absolute_error, + 'mse': metrics.mean_squared_error, + 'r2': metrics.r2_score, +} + +_CLASSIFICATION_METRICS = { + 'accuracy': metrics.accuracy_score, + 'f1': metrics.f1_score, + 'recall': metrics.recall_score, + 'precision': metrics.precision_score, +} + +METRICS = _CLASSIFICATION_METRICS + + +class Zephyr: + """Zephyr Class. + + The Zephyr Class provides the main machine learning pipeline functionalities + of Zephyr and is responsible for the interaction with the underlying + MLBlocks pipelines. + + Args: + pipeline (str, dict or MLPipeline): + Pipeline to use. It can be passed as: + * An ``str`` with a path to a JSON file. + * An ``str`` with the name of a registered pipeline. + * An ``MLPipeline`` instance. + * A ``dict`` with an ``MLPipeline`` specification. + hyperparameters (dict): + Additional hyperparameters to set to the Pipeline. + """ + DEFAULT_PIPELINE = 'xgb_classifier' + + def _get_mlpipeline(self): + pipeline = self._pipeline + if isinstance(pipeline, str) and os.path.isfile(pipeline): + with open(pipeline) as json_file: + pipeline = json.load(json_file) + + mlpipeline = MLPipeline(pipeline) + if self._hyperparameters: + mlpipeline.set_hyperparameters(self._hyperparameters) + + return mlpipeline + + def __init__(self, pipeline: Union[str, dict, MLPipeline] = None, + hyperparameters: dict = None): + self._pipeline = pipeline or self.DEFAULT_PIPELINE + self._hyperparameters = hyperparameters + self._mlpipeline = self._get_mlpipeline() + self._fitted = False + + def __eq__(self, other): + return ( + isinstance(other, self.__class__) and + self._pipeline == other._pipeline and + self._hyperparameters == other._hyperparameters and + self._fitted == other._fitted + ) + + def _get_outputs_spec(self, default=True): + outputs_spec = ["default"] if default else [] + + try: + visual_names = self._mlpipeline.get_output_names('visual') + outputs_spec.append('visual') + except ValueError: + visual_names = [] + + return outputs_spec, visual_names + + def fit(self, X: pd.DataFrame, y: Union[pd.Series, np.ndarray], + visual: bool = False, **kwargs): + """Fit the pipeline to the given data. + + Args: + X (DataFrame): + Input data, passed as a ``pandas.DataFrame`` containing + the feature matrix. + y (Series or ndarray): + Target data, passed as a ``pandas.Series`` or ``numpy.ndarray`` + containing the target values. + visual (bool): + If ``True``, capture the ``visual`` named output from the + ``MLPipeline`` and return it as an output. + """ + if not self._fitted: + self._mlpipeline = self._get_mlpipeline() + + if visual: + outputs_spec, visual_names = self._get_outputs_spec(False) + else: + outputs_spec = None + + outputs = self._mlpipeline.fit(X, y, output_=outputs_spec, **kwargs) + self._fitted = True + + if visual and outputs is not None: + return dict(zip(visual_names, outputs)) + + def predict(self, X: pd.DataFrame, visual: bool = False, **kwargs) -> pd.Series: + """Predict the pipeline to the given data. + + Args: + X (DataFrame): + Input data, passed as a ``pandas.DataFrame`` containing + the feature matrix. + visual (bool): + If ``True``, capture the ``visual`` named output from the + ``MLPipeline`` and return it as an output. + + Returns: + Series or ndarray: + Predictions to the input data. + """ + if visual: + outputs_spec, visual_names = self._get_outputs_spec() + else: + outputs_spec = 'default' + + outputs = self._mlpipeline.predict(X, output_=outputs_spec, **kwargs) + + if visual and visual_names: + prediction = outputs[0] + return prediction, dict(zip(visual_names, outputs[-len(visual_names):])) + + return outputs + + def fit_predict(self, X: pd.DataFrame, y: Union[pd.Series, np.ndarray], + **kwargs) -> pd.Series: + """Fit the pipeline to the data and then predict targets. + + This method is functionally equivalent to calling ``fit(X, y)`` + and later on ``predict(X)`` but with the difference that + here the ``MLPipeline`` is called only once, using its ``fit`` + method, and the output is directly captured without having + to execute the whole pipeline again during the ``predict`` phase. + + Args: + X (DataFrame): + Input data, passed as a ``pandas.DataFrame`` containing + the feature matrix. + y (Series or ndarray): + Target data, passed as a ``pandas.Series`` or ``numpy.ndarray`` + containing the target values. + + Returns: + Series or ndarray: + Predictions to the input data. + """ + if not self._fitted: + self._mlpipeline = self._get_mlpipeline() + + result = self._mlpipeline.fit(X, y, output_='default', **kwargs) + self._fitted = True + + return result + + def evaluate(self, X: pd.DataFrame, y: Union[pd.Series, np.ndarray], fit: bool = False, + train_X: pd.DataFrame = None, train_y: Union[pd.Series, np.ndarray] = None, + metrics: List[str] = METRICS) -> pd.Series: + """Evaluate the performance of the pipeline. + + Args: + X (DataFrame): + Input data, passed as a ``pandas.DataFrame`` containing + the feature matrix. + y (Series or ndarray): + Target data, passed as a ``pandas.Series`` or ``numpy.ndarray`` + containing the target values. + fit (bool): + Whether to fit the pipeline before evaluating it. + Defaults to ``False``. + train_X (DataFrame): + Training data, passed as a ``pandas.DataFrame`` containing + the feature matrix. + If not given, the pipeline is fitted on ``X``. + train_y (Series or ndarray): + Target data used for training, passed as a ``pandas.Series`` or + ``numpy.ndarray`` containing the target values. + metrics (list): + List of metrics to used passed as a list of strings. + If not given, it defaults to all the metrics. + + Returns: + Series: + ``pandas.Series`` containing one element for each + metric applied, with the metric name as index. + """ + if not fit: + method = self._mlpipeline.predict + else: + if not self._fitted: + mlpipeline = self._get_mlpipeline() + else: + mlpipeline = self._mlpipeline + + if train_X is not None and train_y is not None: + # fit first and then predict + mlpipeline.fit(train_X, train_y) + method = mlpipeline.predict + else: + # fit and predict at once + method = partial(mlpipeline.fit, y=y, output_='default') + + result = method(X) + + scores = { + metric: METRICS[metric](y, result) + for metric in metrics + } + + return pd.Series(scores) + + def save(self, path: str): + """Save this object using pickle. + + Args: + path (str): + Path to the file where the serialization of + this object will be stored. + """ + os.makedirs(os.path.dirname(path), exist_ok=True) + with open(path, 'wb') as pickle_file: + pickle.dump(self, pickle_file) + + @classmethod + def load(cls, path: str): + """Load an Zephyr instance from a pickle file. + + Args: + path (str): + Path to the file where the instance has been + previously serialized. + + Returns: + Orion + + Raises: + ValueError: + If the serialized object is not a Zephyr instance. + """ + with open(path, 'rb') as pickle_file: + zephyr = pickle.load(pickle_file) + if not isinstance(zephyr, cls): + raise ValueError('Serialized object is not a Zephyr instance') + + return zephyr diff --git a/zephyr_ml/entityset.py b/zephyr_ml/entityset.py index 3d14c66..f3a8ed4 100644 --- a/zephyr_ml/entityset.py +++ b/zephyr_ml/entityset.py @@ -8,30 +8,27 @@ def _create_entityset(entities, es_type, es_kwargs): # filter out stated logical types for missing columns for entity, df in entities.items(): - es_kwargs[entity]['logical_types'] = { - col: t for col, t in es_kwargs[entity]['logical_types'].items() + es_kwargs[entity]["logical_types"] = { + col: t + for col, t in es_kwargs[entity]["logical_types"].items() if col in df.columns } - turbines_index = es_kwargs['turbines']['index'] - work_orders_index = es_kwargs['work_orders']['index'] + turbines_index = es_kwargs["turbines"]["index"] + work_orders_index = es_kwargs["work_orders"]["index"] relationships = [ - ('turbines', turbines_index, 'alarms', turbines_index), - ('turbines', turbines_index, 'stoppages', turbines_index), - ('turbines', turbines_index, 'work_orders', turbines_index), - ('turbines', turbines_index, es_type, turbines_index), - ('work_orders', work_orders_index, 'notifications', work_orders_index) + ("turbines", turbines_index, "alarms", turbines_index), + ("turbines", turbines_index, "stoppages", turbines_index), + ("turbines", turbines_index, "work_orders", turbines_index), + ("turbines", turbines_index, es_type, turbines_index), + ("work_orders", work_orders_index, "notifications", work_orders_index), ] es = ft.EntitySet() for name, df in entities.items(): - es.add_dataframe( - dataframe_name=name, - dataframe=df, - **es_kwargs[name] - ) + es.add_dataframe(dataframe_name=name, dataframe=df, **es_kwargs[name]) for relationship in relationships: parent_df, parent_column, child_df, child_column = relationship @@ -41,134 +38,179 @@ def _create_entityset(entities, es_type, es_kwargs): def create_pidata_entityset(dfs, new_kwargs_mapping=None): - '''Generate an entityset for PI data datasets + """Generate an entityset for PI data datasets Args: data_paths (dict): Dictionary mapping entity names ('alarms', 'notifications', 'stoppages', 'work_orders', 'pidata', 'turbines') to the pandas dataframe for that entity. **kwargs: Updated keyword arguments to be used during entityset creation - ''' - entity_kwargs = get_mapped_kwargs('pidata', new_kwargs_mapping) - _validate_data(dfs, 'pidata', entity_kwargs) + """ + entity_kwargs = get_mapped_kwargs("pidata", new_kwargs_mapping) + _validate_data(dfs, "pidata", entity_kwargs) - es = _create_entityset(dfs, 'pidata', entity_kwargs) - es.id = 'PI data' + es = _create_entityset(dfs, "pidata", entity_kwargs) + es.id = "PI data" return es def create_scada_entityset(dfs, new_kwargs_mapping=None): - '''Generate an entityset for SCADA data datasets + """Generate an entityset for SCADA data datasets Args: data_paths (dict): Dictionary mapping entity names ('alarms', 'notifications', 'stoppages', 'work_orders', 'scada', 'turbines') to the pandas dataframe for that entity. - ''' - entity_kwargs = get_mapped_kwargs('scada', new_kwargs_mapping) - _validate_data(dfs, 'scada', entity_kwargs) + """ + entity_kwargs = get_mapped_kwargs("scada", new_kwargs_mapping) + _validate_data(dfs, "scada", entity_kwargs) - es = _create_entityset(dfs, 'scada', entity_kwargs) - es.id = 'SCADA data' + es = _create_entityset(dfs, "scada", entity_kwargs) + es.id = "SCADA data" return es def create_vibrations_entityset(dfs, new_kwargs_mapping=None): - '''Generate an entityset for Vibrations data datasets + """Generate an entityset for Vibrations data datasets Args: data_paths (dict): Dictionary mapping entity names ('alarms', 'notifications', 'stoppages', 'work_orders', 'vibrations', 'turbines') to the pandas dataframe for that entity. Optionally 'pidata' and 'scada' can be included. - ''' - entities = ['vibrations'] + """ + entities = ["vibrations"] pidata_kwargs, scada_kwargs = {}, {} - if 'pidata' in dfs: - pidata_kwargs = get_mapped_kwargs('pidata', new_kwargs_mapping) - entities.append('pidata') - if 'scada' in dfs: - pidata_kwargs = get_mapped_kwargs('scada', new_kwargs_mapping) - entities.append('scada') + if "pidata" in dfs: + pidata_kwargs = get_mapped_kwargs("pidata", new_kwargs_mapping) + entities.append("pidata") + if "scada" in dfs: + scada_kwargs = get_mapped_kwargs("scada", new_kwargs_mapping) + entities.append("scada") entity_kwargs = { **pidata_kwargs, **scada_kwargs, - **get_mapped_kwargs('vibrations', new_kwargs_mapping), + **get_mapped_kwargs("vibrations", new_kwargs_mapping), } _validate_data(dfs, entities, entity_kwargs) - es = _create_entityset(dfs, 'vibrations', entity_kwargs) - es.id = 'Vibrations data' + es = _create_entityset(dfs, "vibrations", entity_kwargs) + es.id = "Vibrations data" return es def _validate_data(dfs, es_type, es_kwargs): - '''Validate data by checking for required columns in each entity - ''' + """Validate data by checking for required columns in each entity""" if not isinstance(es_type, list): es_type = [es_type] - entities = set(chain( - ['alarms', 'stoppages', 'work_orders', 'notifications', 'turbines', *es_type] - )) + entities = set( + chain( + [ + "alarms", + "stoppages", + "work_orders", + "notifications", + "turbines", + *es_type, + ] + ) + ) if set(dfs.keys()) != entities: missing = entities.difference(set(dfs.keys())) extra = set(dfs.keys()).difference(entities) msg = [] if missing: - msg.append('Missing dataframes for entities {}.'.format(', '.join(missing))) + msg.append("Missing dataframes for entities {}.".format(", ".join(missing))) if extra: - msg.append('Unrecognized entities {} included in dfs.'.format(', '.join(extra))) + msg.append( + "Unrecognized entities {} included in dfs.".format(", ".join(extra)) + ) - raise ValueError(' '.join(msg)) + raise ValueError(" ".join(msg)) - turbines_index = es_kwargs['turbines']['index'] - work_orders_index = es_kwargs['work_orders']['index'] + turbines_index = es_kwargs["turbines"]["index"] + work_orders_index = es_kwargs["work_orders"]["index"] - if work_orders_index not in dfs['work_orders'].columns: + if work_orders_index not in dfs["work_orders"].columns: raise ValueError( - 'Expected index column "{}" missing from work_orders entity'.format(work_orders_index)) + 'Expected index column "{}" missing from work_orders entity'.format( + work_orders_index + ) + ) - if work_orders_index not in dfs['notifications'].columns: + if work_orders_index not in dfs["notifications"].columns: raise ValueError( - 'Expected column "{}" missing from notifications entity'.format(work_orders_index)) + 'Expected column "{}" missing from notifications entity'.format( + work_orders_index + ) + ) - if not dfs['work_orders'][work_orders_index].is_unique: - raise ValueError('Expected index column "{}" of work_orders entity is not ' - 'unique'.format(work_orders_index)) + if not dfs["work_orders"][work_orders_index].is_unique: + raise ValueError( + 'Expected index column "{}" of work_orders entity is not ' + "unique".format(work_orders_index) + ) - if turbines_index not in dfs['turbines'].columns: + if turbines_index not in dfs["turbines"].columns: raise ValueError( - 'Expected index column "{}" missing from turbines entity'.format(turbines_index)) + 'Expected index column "{}" missing from turbines entity'.format( + turbines_index + ) + ) - if not dfs['turbines'][turbines_index].is_unique: + if not dfs["turbines"][turbines_index].is_unique: raise ValueError( - 'Expected index column "{}" of turbines entity is not unique.'.format(turbines_index)) + 'Expected index column "{}" of turbines entity is not unique.'.format( + turbines_index + ) + ) for entity, df in dfs.items(): if turbines_index not in df.columns: raise ValueError( 'Turbines index column "{}" missing from data for {} entity'.format( - turbines_index, entity)) + turbines_index, entity + ) + ) - time_index = es_kwargs[entity].get('time_index', False) + time_index = es_kwargs[entity].get("time_index", False) if time_index and time_index not in df.columns: raise ValueError( 'Missing time index column "{}" from {} entity'.format( - time_index, entity)) + time_index, entity + ) + ) - secondary_time_indices = es_kwargs[entity].get('secondary_time_index', {}) + secondary_time_indices = es_kwargs[entity].get("secondary_time_index", {}) for time_index, cols in secondary_time_indices.items(): if time_index not in df.columns: raise ValueError( 'Secondary time index "{}" missing from {} entity'.format( - time_index, entity)) + time_index, entity + ) + ) for col in cols: if col not in df.columns: - raise ValueError(('Column "{}" associated with secondary time index "{}" ' - 'missing from {} entity').format(col, time_index, entity)) + raise ValueError( + ( + 'Column "{}" associated with secondary time index "{}" ' + "missing from {} entity" + ).format(col, time_index, entity) + ) + + +CREATE_ENTITYSET_FUNCTIONS = { + "scada": create_scada_entityset, + "pidata": create_pidata_entityset, + "vibrations": create_vibrations_entityset, +} + + +def get_create_entityset_functions(): + return CREATE_ENTITYSET_FUNCTIONS diff --git a/zephyr_ml/metadata.py b/zephyr_ml/metadata.py index 30aa97e..56ba702 100644 --- a/zephyr_ml/metadata.py +++ b/zephyr_ml/metadata.py @@ -1,202 +1,210 @@ +import copy + # Default EntitySet keyword arguments for entities DEFAULT_ES_KWARGS = { - 'alarms': { - 'index': '_index', - 'make_index': True, - 'time_index': 'DAT_START', - 'secondary_time_index': {'DAT_END': ['IND_DURATION']}, - 'logical_types': { - 'COD_ELEMENT': 'categorical', # turbine id - 'DAT_START': 'datetime', # start - 'DAT_END': 'datetime', # end - 'IND_DURATION': 'double', # duration - 'COD_ALARM': 'categorical', # alarm code - 'COD_ALARM_INT': 'categorical', # international alarm code - 'DES_NAME': 'categorical', # alarm name - 'DES_TITLE': 'categorical', # alarm description - 'COD_STATUS': 'categorical' # status code - } + "alarms": { + "index": "_index", + "make_index": True, + "time_index": "DAT_START", + "secondary_time_index": {"DAT_END": ["IND_DURATION"]}, + "logical_types": { + "COD_ELEMENT": "categorical", # turbine id + "DAT_START": "datetime", # start + "DAT_END": "datetime", # end + "IND_DURATION": "double", # duration + "COD_ALARM": "categorical", # alarm code + "COD_ALARM_INT": "categorical", # international alarm code + "DES_NAME": "categorical", # alarm name + "DES_TITLE": "categorical", # alarm description + "COD_STATUS": "categorical", # status code + }, + }, + "stoppages": { + "index": "_index", + "make_index": True, + "time_index": "DAT_START", + "secondary_time_index": {"DAT_END": ["IND_DURATION", "IND_LOST_GEN"]}, + "logical_types": { + "COD_ELEMENT": "categorical", # turbine id + "DAT_START": "datetime", # start + "DAT_END": "datetime", # end + "DES_WO_NAME": "natural_language", # work order name + "DES_COMMENTS": "natural_language", # work order comments + "COD_WO": "integer_nullable", # stoppage code + "IND_DURATION": "double", # duration + "IND_LOST_GEN": "double", # generation loss + "COD_ALARM": "categorical", # alarm code + "COD_CAUSE": "categorical", # stoppage cause + "COD_INCIDENCE": "categorical", # incidence code + "COD_ORIGIN": "categorical", # origin code + "DESC_CLASS": "categorical", # ???? + "COD_STATUS": "categorical", # status code + "COD_CODE": "categorical", # stoppage code + "DES_DESCRIPTION": "natural_language", # stoppage description + "DES_TECH_NAME": "categorical", # turbine technology + }, }, - 'stoppages': { - 'index': '_index', - 'make_index': True, - 'time_index': 'DAT_START', - 'secondary_time_index': {'DAT_END': ['IND_DURATION', 'IND_LOST_GEN']}, - 'logical_types': { - 'COD_ELEMENT': 'categorical', # turbine id - 'DAT_START': 'datetime', # start - 'DAT_END': 'datetime', # end - 'DES_WO_NAME': 'natural_language', # work order name - 'DES_COMMENTS': 'natural_language', # work order comments - 'COD_WO': 'integer_nullable', # stoppage code - 'IND_DURATION': 'double', # duration - 'IND_LOST_GEN': 'double', # generation loss - 'COD_ALARM': 'categorical', # alarm code - 'COD_CAUSE': 'categorical', # stoppage cause - 'COD_INCIDENCE': 'categorical', # incidence code - 'COD_ORIGIN': 'categorical', # origin code - 'DESC_CLASS': 'categorical', # ???? - 'COD_STATUS': 'categorical', # status code - 'COD_CODE': 'categorical', # stoppage code - 'DES_DESCRIPTION': 'natural_language', # stoppage description - 'DES_TECH_NAME': 'categorical' # turbine technology - } + "notifications": { + "index": "_index", + "make_index": True, + "time_index": "DAT_POSTING", + "secondary_time_index": {"DAT_MALF_END": ["IND_BREAKDOWN_DUR"]}, + "logical_types": { + "COD_ELEMENT": "categorical", # turbine id + "COD_ORDER": "categorical", + "IND_QUANTITY": "double", + "COD_MATERIAL_SAP": "categorical", + "DAT_POSTING": "datetime", + "COD_MAT_DOC": "categorical", + "DES_MEDIUM": "categorical", + "COD_NOTIF": "categorical", + "DAT_MALF_START": "datetime", + "DAT_MALF_END": "datetime", + "IND_BREAKDOWN_DUR": "double", + "FUNCT_LOC_DES": "categorical", + "COD_ALARM": "categorical", + "DES_ALARM": "categorical", + }, }, - 'notifications': { - 'index': '_index', - 'make_index': True, - 'time_index': 'DAT_POSTING', - 'secondary_time_index': {'DAT_MALF_END': ['IND_BREAKDOWN_DUR']}, - 'logical_types': { - 'COD_ELEMENT': 'categorical', # turbine id - 'COD_ORDER': 'categorical', - 'IND_QUANTITY': 'double', - 'COD_MATERIAL_SAP': 'categorical', - 'DAT_POSTING': 'datetime', - 'COD_MAT_DOC': 'categorical', - 'DES_MEDIUM': 'categorical', - 'COD_NOTIF': 'categorical', - 'DAT_MALF_START': 'datetime', - 'DAT_MALF_END': 'datetime', - 'IND_BREAKDOWN_DUR': 'double', - 'FUNCT_LOC_DES': 'categorical', - 'COD_ALARM': 'categorical', - 'DES_ALARM': 'categorical' - } + "work_orders": { + "index": "COD_ORDER", + "time_index": "DAT_BASIC_START", + "secondary_time_index": {"DAT_VALID_END": []}, + "logical_types": { + "COD_ELEMENT": "categorical", + "COD_ORDER": "categorical", + "DAT_BASIC_START": "datetime", + "DAT_BASIC_END": "datetime", + "COD_EQUIPMENT": "categorical", + "COD_MAINT_PLANT": "categorical", + "COD_MAINT_ACT_TYPE": "categorical", + "COD_CREATED_BY": "categorical", + "COD_ORDER_TYPE": "categorical", + "DAT_REFERENCE": "datetime", + "DAT_CREATED_ON": "datetime", + "DAT_VALID_END": "datetime", + "DAT_VALID_START": "datetime", + "COD_SYSTEM_STAT": "categorical", + "DES_LONG": "natural_language", + "COD_FUNCT_LOC": "categorical", + "COD_NOTIF_OBJ": "categorical", + "COD_MAINT_ITEM": "categorical", + "DES_MEDIUM": "natural_language", + "DES_FUNCT_LOC": "categorical", + }, }, - 'work_orders': { - 'index': 'COD_ORDER', - 'time_index': 'DAT_BASIC_START', - 'secondary_time_index': {'DAT_VALID_END': []}, - 'logical_types': { - 'COD_ELEMENT': 'categorical', - 'COD_ORDER': 'categorical', - 'DAT_BASIC_START': 'datetime', - 'DAT_BASIC_END': 'datetime', - 'COD_EQUIPMENT': 'categorical', - 'COD_MAINT_PLANT': 'categorical', - 'COD_MAINT_ACT_TYPE': 'categorical', - 'COD_CREATED_BY': 'categorical', - 'COD_ORDER_TYPE': 'categorical', - 'DAT_REFERENCE': 'datetime', - 'DAT_CREATED_ON': 'datetime', - 'DAT_VALID_END': 'datetime', - 'DAT_VALID_START': 'datetime', - 'COD_SYSTEM_STAT': 'categorical', - 'DES_LONG': 'natural_language', - 'COD_FUNCT_LOC': 'categorical', - 'COD_NOTIF_OBJ': 'categorical', - 'COD_MAINT_ITEM': 'categorical', - 'DES_MEDIUM': 'natural_language', - 'DES_FUNCT_LOC': 'categorical' - } + "turbines": { + "index": "COD_ELEMENT", + "logical_types": { + "COD_ELEMENT": "categorical", + "TURBINE_PI_ID": "categorical", + "TURBINE_LOCAL_ID": "categorical", + "TURBINE_SAP_COD": "categorical", + "DES_CORE_ELEMENT": "categorical", + "SITE": "categorical", + "DES_CORE_PLANT": "categorical", + "COD_PLANT_SAP": "categorical", + "PI_COLLECTOR_SITE_NAME": "categorical", + "PI_LOCAL_SITE_NAME": "categorical", + }, }, - 'turbines': { - 'index': 'COD_ELEMENT', - 'logical_types': { - 'COD_ELEMENT': 'categorical', - 'TURBINE_PI_ID': 'categorical', - 'TURBINE_LOCAL_ID': 'categorical', - 'TURBINE_SAP_COD': 'categorical', - 'DES_CORE_ELEMENT': 'categorical', - 'SITE': 'categorical', - 'DES_CORE_PLANT': 'categorical', - 'COD_PLANT_SAP': 'categorical', - 'PI_COLLECTOR_SITE_NAME': 'categorical', - 'PI_LOCAL_SITE_NAME': 'categorical' - } - } } DEFAULT_ES_TYPE_KWARGS = { - 'pidata': { - 'index': '_index', - 'make_index': True, - 'time_index': 'time', - 'logical_types': { - 'time': 'datetime', - 'COD_ELEMENT': 'categorical' - } + "pidata": { + "index": "_index", + "make_index": True, + "time_index": "time", + "logical_types": {"time": "datetime", "COD_ELEMENT": "categorical"}, }, - 'scada': { - 'index': '_index', - 'make_index': True, - 'time_index': 'TIMESTAMP', - 'logical_types': { - 'TIMESTAMP': 'datetime', - 'COD_ELEMENT': 'categorical' - } + "scada": { + "index": "_index", + "make_index": True, + "time_index": "TIMESTAMP", + "logical_types": {"TIMESTAMP": "datetime", "COD_ELEMENT": "categorical"}, + }, + "vibrations": { + "index": "_index", + "make_index": True, + "time_index": "timestamp", + "logical_types": { + "COD_ELEMENT": "categorical", + "turbine_id": "categorical", + "signal_id": "categorical", + "timestamp": "datetime", + "sensorName": "categorical", + "sensorType": "categorical", + "sensorSerial": "integer_nullable", + "siteName": "categorical", + "turbineName": "categorical", + "turbineSerial": "integer_nullable", + "configurationName": "natural_language", + "softwareVersion": "categorical", + "rpm": "double", + "rpmStatus": "natural_language", + "duration": "natural_language", + "condition": "categorical", + "maskTime": "datetime", + "Mask Status": "natural_language", + "System Serial": "categorical", + "WPS-ActivePower-Average": "double", + "WPS-ActivePower-Minimum": "double", + "WPS-ActivePower-Maximum": "double", + "WPS-ActivePower-Deviation": "double", + "WPS-ActivePower-StartTime": "datetime", + "WPS-ActivePower-StopTime": "datetime", + "WPS-ActivePower-Counts": "natural_language", + "Measured RPM": "double", + "WPS-ActivePower": "double", + "WPS-Gearoiltemperature": "double", + "WPS-GeneratorRPM": "double", + "WPS-PitchReference": "double", + "WPS-RotorRPM": "double", + "WPS-Windspeed": "double", + "WPS-YawAngle": "double", + "overload warning": "categorical", + "bias warning": "categorical", + "bias voltage": "double", + "xValueOffset": "double", + "xValueDelta": "double", + "xValueUnit": "categorical", + "yValueUnit": "categorical", + "TotalCount-RPM0": "double", + "TotalCount-RPM1": "double", + "TotalCount-RPM2": "double", + "TotalCount-RPM3": "double", + }, }, - 'vibrations': { - 'index': '_index', - 'make_index': True, - 'time_index': 'timestamp', - 'logical_types': { - 'COD_ELEMENT': 'categorical', - 'turbine_id': 'categorical', - 'signal_id': 'categorical', - 'timestamp': 'datetime', - 'sensorName': 'categorical', - 'sensorType': 'categorical', - 'sensorSerial': 'integer_nullable', - 'siteName': 'categorical', - 'turbineName': 'categorical', - 'turbineSerial': 'integer_nullable', - 'configurationName': 'natural_language', - 'softwareVersion': 'categorical', - 'rpm': 'double', - 'rpmStatus': 'natural_language', - 'duration': 'natural_language', - 'condition': 'categorical', - 'maskTime': 'datetime', - 'Mask Status': 'natural_language', - 'System Serial': 'categorical', - 'WPS-ActivePower-Average': 'double', - 'WPS-ActivePower-Minimum': 'double', - 'WPS-ActivePower-Maximum': 'double', - 'WPS-ActivePower-Deviation': 'double', - 'WPS-ActivePower-StartTime': 'datetime', - 'WPS-ActivePower-StopTime': 'datetime', - 'WPS-ActivePower-Counts': 'natural_language', - 'Measured RPM': 'double', - 'WPS-ActivePower': 'double', - 'WPS-Gearoiltemperature': 'double', - 'WPS-GeneratorRPM': 'double', - 'WPS-PitchReference': 'double', - 'WPS-RotorRPM': 'double', - 'WPS-Windspeed': 'double', - 'WPS-YawAngle': 'double', - 'overload warning': 'categorical', - 'bias warning': 'categorical', - 'bias voltage': 'double', - 'xValueOffset': 'double', - 'xValueDelta': 'double', - 'xValueUnit': 'categorical', - 'yValueUnit': 'categorical', - 'TotalCount-RPM0': 'double', - 'TotalCount-RPM1': 'double', - 'TotalCount-RPM2': 'double', - 'TotalCount-RPM3': 'double' - } - } } def get_mapped_kwargs(es_type, new_kwargs=None): if es_type not in DEFAULT_ES_TYPE_KWARGS.keys(): - raise ValueError('Unrecognized es_type argument: {}'.format(es_type)) + raise ValueError("Unrecognized es_type argument: {}".format(es_type)) mapped_kwargs = DEFAULT_ES_KWARGS.copy() mapped_kwargs.update({es_type: DEFAULT_ES_TYPE_KWARGS[es_type]}) if new_kwargs is not None: if not isinstance(new_kwargs, dict): - raise ValueError('new_kwargs must be dictionary mapping entity name to dictionary ' - 'with updated keyword arguments for EntitySet creation.') + raise ValueError( + "new_kwargs must be dictionary mapping entity name to dictionary " + "with updated keyword arguments for EntitySet creation." + ) for entity in new_kwargs: if entity not in mapped_kwargs: - raise ValueError('Unrecognized entity "{}" found in new keyword argument ' - 'mapping.'.format(entity)) + raise ValueError( + 'Unrecognized entity "{}" found in new keyword argument ' + "mapping.".format(entity) + ) mapped_kwargs[entity].update(new_kwargs[entity]) return mapped_kwargs + + +def get_default_es_type_kwargs(): + return copy.deepcopy(DEFAULT_ES_TYPE_KWARGS) + + +def get_es_types(): + return DEFAULT_ES_TYPE_KWARGS.keys() From 0026b0aac52f9b594127cab2f39c825871f4b369 Mon Sep 17 00:00:00 2001 From: Raymond Pan Date: Mon, 17 Feb 2025 12:57:09 -0500 Subject: [PATCH 02/28] implement train_test_split custom primitve --- ...earn.model_selection.train_test_split.json | 69 +++++++++++++++++++ zephyr_ml/primitives/preprocessing.py | 25 +++++++ 2 files changed, 94 insertions(+) create mode 100644 zephyr_ml/primitives/jsons/sklearn.model_selection.train_test_split.json create mode 100644 zephyr_ml/primitives/preprocessing.py diff --git a/zephyr_ml/primitives/jsons/sklearn.model_selection.train_test_split.json b/zephyr_ml/primitives/jsons/sklearn.model_selection.train_test_split.json new file mode 100644 index 0000000..482cf6e --- /dev/null +++ b/zephyr_ml/primitives/jsons/sklearn.model_selection.train_test_split.json @@ -0,0 +1,69 @@ +{ + "name": "zephyr_ml.primitives.preprocessing.train_test_split", + "contributors": [ + "Raymond Pan rpan@mit.edu" + ], + "documentation": "https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.train_test_split.html", + "description": "Split feature matrix and labels into random train and test subsets.", + "classifiers": { + "type": "preprocessor" + }, + "modaliaties": [], + "primitive": "sklearn.model_selection.train_test_split", + "produce": { + "args": [ + { + "name": "X", + "type": "pandas.DataFrame" + }, + { + "name": "y", + "type": "pandas.DataFrame" + } + + ], + "output": [ + { + "name": "X_train", + "type": "pandas.DataFrame" + }, + { + "name": "X_test", + "type": "pandas.DataFrame" + }, + { + "name": "y_train", + "type": "pandas.DataFrame" + }, + { + "name": "y_test", + "type": "pandas.DataFrame" + } + + ] + }, + "hyperparameters": { + "fixed": { + "test_size": { + "type": "float or int", + "default": null + }, + "train_size": { + "type": "float or int", + "default": null + }, + "random_state": { + "type": "int", + "default": null + }, + "shuffle": { + "type": "bool", + "default": true + }, + "stratify": { + "type": "list", + "default": null + } + } + } +} \ No newline at end of file diff --git a/zephyr_ml/primitives/preprocessing.py b/zephyr_ml/primitives/preprocessing.py new file mode 100644 index 0000000..0add5f3 --- /dev/null +++ b/zephyr_ml/primitives/preprocessing.py @@ -0,0 +1,25 @@ +""" +Preprocessing functions +""" + +import sklearn.model_selection + + +def train_test_split( + X, + y, + test_size=None, + train_size=None, + random_state=None, + shuffle=True, + stratify=None, +): + """ + Wrapper over sklearn.model_selection.train_test_split() + Used to split only 2 arrays at once: X (features) and y (labels) + + Split arrays or matrices into random train and test subsets. + """ + return sklearn.model_selection.train_test_split( + X, y, test_size, train_size, random_state, shuffle, stratify + ) From fb74d7429454da8d74bb5c05f4dcbd55b411c5c2 Mon Sep 17 00:00:00 2001 From: Raymond Pan Date: Mon, 24 Feb 2025 22:23:05 -0500 Subject: [PATCH 03/28] added train test split --- zephyr_ml/__init__.py | 17 ++-- zephyr_ml/core.py | 198 +++++++++++++++++++++++++++++++++++++---- zephyr_ml/entityset.py | 79 +++++++++++++++- 3 files changed, 271 insertions(+), 23 deletions(-) diff --git a/zephyr_ml/__init__.py b/zephyr_ml/__init__.py index 7bd1502..589f2ef 100644 --- a/zephyr_ml/__init__.py +++ b/zephyr_ml/__init__.py @@ -2,15 +2,20 @@ """Top-level package for Zephyr.""" -__author__ = 'MIT Data To AI Lab' -__email__ = 'dai-lab@mit.edu' -__version__ = '0.0.4.dev0' +__author__ = "MIT Data To AI Lab" +__email__ = "dai-lab@mit.edu" +__version__ = "0.0.4.dev0" import os from zephyr_ml.core import Zephyr -from zephyr_ml.entityset import create_pidata_entityset, create_scada_entityset +from zephyr_ml.entityset import ( + create_pidata_entityset, + create_scada_entityset, + _create_entityset, + VALIDATE_DATA_FUNCTIONS, +) from zephyr_ml.labeling import DataLabeler -MLBLOCKS_PRIMITIVES = os.path.join(os.path.dirname(__file__), 'primitives', 'jsons') -MLBLOCKS_PIPELINES = os.path.join(os.path.dirname(__file__), 'pipelines') +MLBLOCKS_PRIMITIVES = os.path.join(os.path.dirname(__file__), "primitives", "jsons") +MLBLOCKS_PIPELINES = os.path.join(os.path.dirname(__file__), "pipelines") diff --git a/zephyr_ml/core.py b/zephyr_ml/core.py index 9666fb1..2c39441 100644 --- a/zephyr_ml/core.py +++ b/zephyr_ml/core.py @@ -1,20 +1,36 @@ from zephyr_ml.metadata import get_default_es_type_kwargs -from zephyr_ml.entityset import get_create_entityset_functions +from zephyr_ml.entityset import _create_entityset, VALIDATE_DATA_FUNCTIONS +from zephyr_ml.labeling import get_labeling_functions, LABELING_FUNCTIONS + +import composeml as cp +from inspect import getfullargspec +import featuretools as ft +import numpy as np +import pandas as pd +from sklearn.model_selection import train_test_split class Zephyr: - def __init__(self, pipeline, hyperparameters): - self.pipeline = pipeline - self.hyperparameters = hyperparameters + def __init__(self): + self.entityset = None + self.labeling_function = None + self.label_times = None + self.pipeline = None + self.pipeline_hyperparameters = None + self.feature_matrix_and_labels = None + self.X_train = None + self.X_test = None + self.y_train = None + self.y_test = None def get_entityset_types(self): """ Returns the supported entityset types (PI/SCADA) and the required dataframes and their columns """ - return get_default_es_type_kwargs() + return VALIDATE_DATA_FUNCTIONS.keys() - def create_entityset(self, data_paths, es_type="scada", new_kwargs_mapping=None): + def create_entityset(self, data_paths, es_type, new_kwargs_mapping=None): """ Generate an entityset @@ -24,21 +40,173 @@ def create_entityset(self, data_paths, es_type="scada", new_kwargs_mapping=None) es_type (str): type of signal data , either SCADA or PI new_kwargs_mapping ( dict ): Updated keyword arguments to be used during entityset creation - Returns : - featuretools . EntitySet that contains the data passed in and + Returns: + featuretools.EntitySet that contains the data passed in and their relationships """ - create_entityset_functions = get_create_entityset_functions() - if es_type not in create_entityset_functions: - raise ValueError("Unrecognized es_type argument: {}".format(es_type)) - - _create_entityset = create_entityset_functions[es_type] - entityset = _create_entityset(data_paths, new_kwargs_mapping) + entityset = _create_entityset(data_paths, es_type, new_kwargs_mapping) self.entityset = entityset return self.entityset def get_entityset(self): if self.entityset is None: - raise + raise ValueError("No entityset has been created or set in this instance.") return self.entityset + + def set_entityset(self, entityset, es_type, new_kwargs_mapping=None): + dfs = entityset.to_dictionary() + + validate_func = VALIDATE_DATA_FUNCTIONS[es_type] + validate_func(dfs, new_kwargs_mapping) + + self.entityset = entityset + + def get_predefined_labeling_functions(self): + return get_labeling_functions() + + def set_labeling_functions(self, name=None, func=None): + if name is not None: + if name in LABELING_FUNCTIONS: + self.labeling_function = LABELING_FUNCTIONS[name] + else: + raise ValueError( + f"Unrecognized name argument:{name}. Call get_predefined_labeling_functions to view predefined labeling functions" + ) + elif func is not None: + if callable(func): + self.labeling_function = func + else: + raise ValueError(f"Custom function is not callable") + raise ValueError("No labeling function given.") + + def generate_labeling_times( + self, num_samples=-1, subset=None, column_map={}, verbose=False, **kwargs + ): + assert self.entityset is not None + assert self.labeling_function is not None + + labeling_function, df, meta = self.labeling_function(self.entityset, column_map) + + data = df + if isinstance(subset, float) or isinstance(subset, int): + data = data.sample(subset) + + target_entity_index = meta.get("target_entity_index") + time_index = meta.get("time_index") + thresh = kwargs.get("thresh") or meta.get("thresh") + window_size = kwargs.get("window_size") or meta.get("window_size") + label_maker = cp.LabelMaker( + labeling_function=labeling_function, + target_dataframe_name=target_entity_index, + time_index=time_index, + window_size=window_size, + ) + + kwargs = {**meta, **kwargs} + kwargs = { + k: kwargs.get(k) + for k in set(getfullargspec(label_maker.search)[0]) + if kwargs.get(k) is not None + } + label_times = label_maker.search( + data.sort_values(time_index), num_samples, verbose=verbose, **kwargs + ) + if thresh is not None: + label_times = label_times.threshold(thresh) + + self.label_times = label_times + + return label_times, meta + + def plot_label_times(self): + assert self.label_times is not None + cp.label_times.plots.LabelPlots(self.label_times).distribution() + + def generate_features(self, **kwargs): + + feature_matrix, features = ft.dfs( + entityset=self.entityset, cutoff_time=self.label_times, **kwargs + ) + self.feature_matrix_and_labels = self._clean_feature_matrix(feature_matrix) + self.features = features + return feature_matrix, features + + def get_feature_matrix_and_labels(self): + return self.feature_matrix_and_labels + + def set_feature_matrix_and_labels(self, feature_matrix, label_col_name="label"): + assert label_col_name in feature_matrix.columns + self.feature_matrix_and_labels = self._clean_feature_matrix( + feature_matrix, label_col_name=label_col_name + ) + + def generate_train_test_split( + self, + test_size=None, + train_size=None, + random_state=None, + shuffle=True, + stratify=None, + ): + feature_matrix, labels = self.feature_matrix_and_labels + X_train, X_test, y_train, y_test = train_test_split( + feature_matrix, + labels, + test_size=test_size, + train_size=train_size, + random_state=random_state, + shuffle=shuffle, + stratify=stratify, + ) + self.X_train = X_train + self.X_test = X_test + self.y_train = y_train + self.y_test = y_train + + return + + def set_train_test_split(self, X_train, X_test, y_train, y_test): + self.X_train = X_train + self.X_test = X_test + self.y_train = y_train + self.y_test = y_test + + def get_train_test_split(self): + return self.X_train, self.X_test, self.y_train, self.y_test + + def get_predefined_pipelines(self): + pass + + def set_pipeline(self, pipeline, pipeline_hyperparameters): + self.pipeline = pipeline + self.pipeline_hyperparameters = pipeline_hyperparameters + + def fit(self, **kwargs): # kwargs indicate the parameters of the current pipeline + pass + + def predict(self, **kwargs): + pass + + def fit_predict(self): + pass + + def evaluate(self): + pass + + def _validate_step(self, **kwargs): + for key, value in kwargs: + assert (value is not None, f"{key} has not been set or created") + + def _clean_feature_matrix(self, feature_matrix, label_col_name="label"): + labels = feature_matrix.pop(label_col_name) + + count_cols = feature_matrix.filter(like="COUNT").columns + feature_matrix[count_cols] = feature_matrix[count_cols].apply( + lambda x: x.astype(np.int64) + ) + + string_cols = feature_matrix.select_dtypes(include="category").columns + feature_matrix = pd.get_dummies(feature_matrix, columns=string_cols) + + return feature_matrix, labels diff --git a/zephyr_ml/entityset.py b/zephyr_ml/entityset.py index f3a8ed4..fa995c7 100644 --- a/zephyr_ml/entityset.py +++ b/zephyr_ml/entityset.py @@ -2,10 +2,11 @@ import featuretools as ft -from zephyr_ml.metadata import get_mapped_kwargs +from zephyr_ml.metadata import get_mapped_kwargs, get_es_types def _create_entityset(entities, es_type, es_kwargs): + # filter out stated logical types for missing columns for entity, df in entities.items(): es_kwargs[entity]["logical_types"] = { @@ -205,6 +206,80 @@ def _validate_data(dfs, es_type, es_kwargs): ) +def validate_scada_data(dfs, new_kwargs_mapping=None): + entity_kwargs = get_mapped_kwargs("scada", new_kwargs_mapping) + _validate_data(dfs, "scada", entity_kwargs) + return entity_kwargs + + +def validate_pidata_data(dfs, new_kwargs_mapping=None): + entity_kwargs = get_mapped_kwargs("pidata", new_kwargs_mapping) + _validate_data(dfs, "pidata", entity_kwargs) + + +def validate_vibrations_data(dfs, new_kwargs_mapping=None): + entities = ["vibrations"] + + pidata_kwargs, scada_kwargs = {}, {} + if "pidata" in dfs: + pidata_kwargs = get_mapped_kwargs("pidata", new_kwargs_mapping) + entities.append("pidata") + if "scada" in dfs: + scada_kwargs = get_mapped_kwargs("scada", new_kwargs_mapping) + entities.append("scada") + + entity_kwargs = { + **pidata_kwargs, + **scada_kwargs, + **get_mapped_kwargs("vibrations", new_kwargs_mapping), + } + _validate_data(dfs, entities, entity_kwargs) + return entity_kwargs + + +VALIDATE_DATA_FUNCTIONS = { + "scada": validate_scada_data, + "pidata": validate_pidata_data, + "vibrations": validate_vibrations_data, +} + + +def _create_entityset(entities, es_type, new_kwargs_mapping=None): + validate_func = VALIDATE_DATA_FUNCTIONS[es_type] + es_kwargs = validate_func(entities, new_kwargs_mapping) + + # filter out stated logical types for missing columns + for entity, df in entities.items(): + es_kwargs[entity]["logical_types"] = { + col: t + for col, t in es_kwargs[entity]["logical_types"].items() + if col in df.columns + } + + turbines_index = es_kwargs["turbines"]["index"] + work_orders_index = es_kwargs["work_orders"]["index"] + + relationships = [ + ("turbines", turbines_index, "alarms", turbines_index), + ("turbines", turbines_index, "stoppages", turbines_index), + ("turbines", turbines_index, "work_orders", turbines_index), + ("turbines", turbines_index, es_type, turbines_index), + ("work_orders", work_orders_index, "notifications", work_orders_index), + ] + + es = ft.EntitySet() + es.id = es_type + + for name, df in entities.items(): + es.add_dataframe(dataframe_name=name, dataframe=df, **es_kwargs[name]) + + for relationship in relationships: + parent_df, parent_column, child_df, child_column = relationship + es.add_relationship(parent_df, parent_column, child_df, child_column) + + return es + + CREATE_ENTITYSET_FUNCTIONS = { "scada": create_scada_entityset, "pidata": create_pidata_entityset, @@ -213,4 +288,4 @@ def _validate_data(dfs, es_type, es_kwargs): def get_create_entityset_functions(): - return CREATE_ENTITYSET_FUNCTIONS + return CREATE_ENTITYSET_FUNCTIONS.copy() From 21cea03f77f6bf7ac19446eb74141e1b8ad3290f Mon Sep 17 00:00:00 2001 From: Raymond Pan Date: Sun, 9 Mar 2025 21:25:48 +0000 Subject: [PATCH 04/28] initial evagit status :wq --- zephyr_ml/core.py | 73 ++++++- ...n.ensemble.GradientBoostingClassifier.json | 187 ++++++++++++++++++ .../sklearn.meetrics.precision_score.json | 56 ++++++ .../jsons/sklearn.metrics.accuracy_score.json | 44 +++++ .../jsons/sklearn.metrics.f1_score.json | 56 ++++++ .../jsons/sklearn.metrics.recall_score.json | 56 ++++++ 6 files changed, 463 insertions(+), 9 deletions(-) create mode 100644 zephyr_ml/primitives/jsons/sklearn.ensemble.GradientBoostingClassifier.json create mode 100644 zephyr_ml/primitives/jsons/sklearn.meetrics.precision_score.json create mode 100644 zephyr_ml/primitives/jsons/sklearn.metrics.accuracy_score.json create mode 100644 zephyr_ml/primitives/jsons/sklearn.metrics.f1_score.json create mode 100644 zephyr_ml/primitives/jsons/sklearn.metrics.recall_score.json diff --git a/zephyr_ml/core.py b/zephyr_ml/core.py index 2c39441..d96a0a2 100644 --- a/zephyr_ml/core.py +++ b/zephyr_ml/core.py @@ -8,6 +8,9 @@ import numpy as np import pandas as pd from sklearn.model_selection import train_test_split +import os +import json +from mlblocks import MLPipeline class Zephyr: @@ -162,7 +165,7 @@ def generate_train_test_split( self.X_train = X_train self.X_test = X_test self.y_train = y_train - self.y_test = y_train + self.y_test = y_test return @@ -179,19 +182,49 @@ def get_predefined_pipelines(self): pass def set_pipeline(self, pipeline, pipeline_hyperparameters): - self.pipeline = pipeline + self.pipeline = self._get_mlpipeline(pipeline, pipeline_hyperparameters) self.pipeline_hyperparameters = pipeline_hyperparameters - def fit(self, **kwargs): # kwargs indicate the parameters of the current pipeline - pass + def get_pipeline(self): + return self.pipeline - def predict(self, **kwargs): - pass + def fit( + self, X=None, y=None, visual=False, **kwargs + ): # kwargs indicate the parameters of the current pipeline + if X is None: + X = self.X_train + if y is None: + y = self.y_train - def fit_predict(self): - pass + if visual: + outputs_spec, visual_names = self._get_outputs_spec(False) + else: + outputs_spec = None + + outputs = self.pipeline.fit(X, y, output_=outputs_spec, **kwargs) + + if visual and outputs is not None: + return dict(zip(visual_names, outputs)) + + def predict(self, X=None, visual=False, **kwargs): + if X is None: + X = self.X_test + if visual: + outputs_spec, visual_names = self._get_outputs_spec() + else: + outputs_spec = "default" + + outputs = self.pipeline.predict(X, output_=outputs_spec, **kwargs) + + if visual and visual_names: + prediction = outputs[0] + return prediction, dict(zip(visual_names, outputs[-len(visual_names) :])) + + return outputs + + def evaluate(self, X=None, y=None, metrics=None): + result = self.pipeline.predict(X) - def evaluate(self): pass def _validate_step(self, **kwargs): @@ -210,3 +243,25 @@ def _clean_feature_matrix(self, feature_matrix, label_col_name="label"): feature_matrix = pd.get_dummies(feature_matrix, columns=string_cols) return feature_matrix, labels + + def _get_mlpipeline(self, pipeline, hyperparameters): + if isinstance(pipeline, str) and os.path.isfile(pipeline): + with open(pipeline) as json_file: + pipeline = json.load(json_file) + + mlpipeline = MLPipeline(pipeline) + if hyperparameters: + mlpipeline.set_hyperparameters(hyperparameters) + + return mlpipeline + + def _get_outputs_spec(self, default=True): + outputs_spec = ["default"] if default else [] + + try: + visual_names = self.pipeline.get_output_names("visual") + outputs_spec.append("visual") + except ValueError: + visual_names = [] + + return outputs_spec, visual_names diff --git a/zephyr_ml/primitives/jsons/sklearn.ensemble.GradientBoostingClassifier.json b/zephyr_ml/primitives/jsons/sklearn.ensemble.GradientBoostingClassifier.json new file mode 100644 index 0000000..02a647f --- /dev/null +++ b/zephyr_ml/primitives/jsons/sklearn.ensemble.GradientBoostingClassifier.json @@ -0,0 +1,187 @@ +{ + "name": "sklearn.ensemble.GradientBoostingClassifier", + "contributors": [ + "Carles Sala ", + "Plamen Valentinov " + ], + "documentation": "http://scikit-learn.org/stable/modules/generated/sklearn.ensemble.GradientBoostingClassifier.html", + "description": "Scikit-learn GradientBoostingClassifier.", + "classifiers": { + "type": "estimator", + "subtype": "classifier" + }, + "modalities": [], + "primitive": "sklearn.ensemble.GradientBoostingClassifier", + "fit": { + "method": "fit", + "args": [ + { + "name": "X", + "type": "ndarray" + }, + { + "name": "y", + "type": "ndarray" + } + ] + }, + "produce": { + "method": "predict_proba", + "args": [ + { + "name": "X", + "type": "ndarray" + } + ], + "output": [ + { + "name": "y", + "type": "ndarray" + } + ] + }, + "hyperparameters": { + "fixed": { + "warm_start": { + "type": "bool", + "default": false + }, + "init": { + "type": "object", + "default": null + }, + "verbose": { + "type": "int", + "default": 0 + }, + "presort": { + "type": "bool", + "default": false + } + }, + "tunable": { + "loss": { + "type": "str", + "default": "deviance", + "values": [ + "deviance", + "exponential" + ] + }, + "learning_rate": { + "type": "float", + "default": 0.1, + "range": [ + 0.01, + 10.0 + ] + }, + "n_estimators": { + "type": "int", + "default": 10, + "range": [ + 1, + 500 + ] + }, + "max_depth": { + "type": "int", + "default": 3, + "range": [ + 1, + 30 + ] + }, + "criterion": { + "type": "str", + "default": "friedman_mse", + "values": [ + "friedman_mse", + "friedman_mae" + ] + }, + "min_samples_split": { + "type": "int", + "default": 2, + "range": [ + 2, + 100 + ] + }, + "min_samples_leaf": { + "type": "int", + "default": 1, + "range": [ + 1, + 100 + ] + }, + "min_weight_fraction_leaf": { + "type": "float", + "default": 0.0, + "range": [ + 0.0, + 10.0 + ] + }, + "subsample": { + "type": "float", + "default": 1.0, + "range": [ + 0.001, + 100.0 + ] + }, + "max_features": { + "type": "str", + "default": null, + "values": [ + null, + "auto", + "log2", + "sqrt" + ] + }, + "max_leaf_nodes": { + "type": "int", + "default": null, + "range": [ + 2, + 1000 + ] + }, + "min_impurity_decrease": { + "type": "float", + "default": 0.0, + "range": [ + 0.0, + 1000.0 + ] + }, + "validation_fraction": { + "type": "float", + "default": 0.1, + "range": [ + 0.0, + 1.0 + ] + }, + "n_iter_no_change": { + "type": "int", + "default": null, + "range": [ + 1, + 1000 + ] + }, + "tol": { + "type": "float", + "default": 0.0001, + "range": [ + 0.0, + 2.0 + ] + } + } + } +} \ No newline at end of file diff --git a/zephyr_ml/primitives/jsons/sklearn.meetrics.precision_score.json b/zephyr_ml/primitives/jsons/sklearn.meetrics.precision_score.json new file mode 100644 index 0000000..f5dc1e5 --- /dev/null +++ b/zephyr_ml/primitives/jsons/sklearn.meetrics.precision_score.json @@ -0,0 +1,56 @@ +{ + "name": "sklearn.metrics.precision_score", + "contributors": [ + "Raymond Pan " + ], + "documentation": "https://scikit-learn.org/stable/modules/generated/sklearn.metrics.f1_score.html", + "description": "Compute the precision.", + "classifiers": { + "type": "helper" + }, + "modalities": [], + "primitive": "sklearn.metrics.precision_score", + "produce": { + "args": [ + { + "name": "y_true", + "type": "ndarray" + }, + { + "name": "y_pred", + "type": "ndarray" + } + ] + }, + "output": [ + { + "name": "score", + "type": "float or int" + } + ], + "hyperparameters": { + "fixed": { + "labels": { + "type": "ndarray", + "default": null + }, + "pos_label": { + "type": "int, float, bool or str", + "default": 1 + }, + "average": { + "type": "str", + "default": "binary" + }, + "sample_weight": { + "type": "ndarray", + "default": null + }, + "zero_division": { + "type": "int, float or str", + "default": "warn" + } + } + } + +} \ No newline at end of file diff --git a/zephyr_ml/primitives/jsons/sklearn.metrics.accuracy_score.json b/zephyr_ml/primitives/jsons/sklearn.metrics.accuracy_score.json new file mode 100644 index 0000000..c5483b0 --- /dev/null +++ b/zephyr_ml/primitives/jsons/sklearn.metrics.accuracy_score.json @@ -0,0 +1,44 @@ +{ + "name": "sklearn.metrics.accuracy_score", + "contributors": [ + "Raymond Pan " + ], + "documentation": "https://scikit-learn.org/stable/modules/generated/sklearn.metrics.accuracy_score.html", + "description": "Accuracy classification score.", + "classifiers": { + "type": "helper" + }, + "modalities": [], + "primitive": "sklearn.metrics.accuracy_score", + "produce": { + "args": [ + { + "name": "y_true", + "type": "ndarray" + }, + { + "name": "y_pred", + "type": "ndarray" + } + ] + }, + "output": [ + { + "name": "score", + "type": "float or int" + } + ], + "hyperparameters": { + "fixed": { + "normalize": { + "type": "bool", + "default": true + }, + "sample_weight": { + "type": "ndarray", + "default": null + } + } + } + +} \ No newline at end of file diff --git a/zephyr_ml/primitives/jsons/sklearn.metrics.f1_score.json b/zephyr_ml/primitives/jsons/sklearn.metrics.f1_score.json new file mode 100644 index 0000000..190b3f7 --- /dev/null +++ b/zephyr_ml/primitives/jsons/sklearn.metrics.f1_score.json @@ -0,0 +1,56 @@ +{ + "name": "sklearn.metrics.f1_score", + "contributors": [ + "Raymond Pan " + ], + "documentation": "https://scikit-learn.org/stable/modules/generated/sklearn.metrics.f1_score.html", + "description": "Compute the F1 score, also known as balanced F-score or F-measure.", + "classifiers": { + "type": "helper" + }, + "modalities": [], + "primitive": "sklearn.metrics.f1_score", + "produce": { + "args": [ + { + "name": "y_true", + "type": "ndarray" + }, + { + "name": "y_pred", + "type": "ndarray" + } + ] + }, + "output": [ + { + "name": "score", + "type": "float or int" + } + ], + "hyperparameters": { + "fixed": { + "labels": { + "type": "ndarray", + "default": null + }, + "pos_label": { + "type": "int, float, bool or str", + "default": 1 + }, + "average": { + "type": "str", + "default": "binary" + }, + "sample_weight": { + "type": "ndarray", + "default": null + }, + "zero_division": { + "type": "int, float or str", + "default": "warn" + } + } + } + +} \ No newline at end of file diff --git a/zephyr_ml/primitives/jsons/sklearn.metrics.recall_score.json b/zephyr_ml/primitives/jsons/sklearn.metrics.recall_score.json new file mode 100644 index 0000000..c49453e --- /dev/null +++ b/zephyr_ml/primitives/jsons/sklearn.metrics.recall_score.json @@ -0,0 +1,56 @@ +{ + "name": "sklearn.metrics.recall_score", + "contributors": [ + "Raymond Pan " + ], + "documentation": "https://scikit-learn.org/stable/modules/generated/sklearn.metrics.recall_score.html", + "description": "Compute the recall.", + "classifiers": { + "type": "helper" + }, + "modalities": [], + "primitive": "sklearn.metrics.recall_score", + "produce": { + "args": [ + { + "name": "y_true", + "type": "ndarray" + }, + { + "name": "y_pred", + "type": "ndarray" + } + ] + }, + "output": [ + { + "name": "score", + "type": "float or int" + } + ], + "hyperparameters": { + "fixed": { + "labels": { + "type": "ndarray", + "default": null + }, + "pos_label": { + "type": "int, float, bool or str", + "default": 1 + }, + "average": { + "type": "str", + "default": "binary" + }, + "sample_weight": { + "type": "ndarray", + "default": null + }, + "zero_division": { + "type": "int, float or str", + "default": "warn" + } + } + } + +} \ No newline at end of file From 634f050f065223a1fbe8d094cb7b0d70e81df239 Mon Sep 17 00:00:00 2001 From: Raymond Pan Date: Tue, 11 Mar 2025 12:36:18 +0000 Subject: [PATCH 05/28] end to end --- zephyr_ml/__init__.py | 3 + zephyr_ml/core.py | 704 +++++++++++++++++- zephyr_ml/core_prev.py | 83 ++- zephyr_ml/labeling/__init__.py | 21 +- .../sklearn.meetrics.precision_score.json | 14 +- .../jsons/sklearn.metrics.accuracy_score.json | 9 +- .../jsons/sklearn.metrics.f1_score.json | 12 +- .../jsons/sklearn.metrics.recall_score.json | 9 +- 8 files changed, 784 insertions(+), 71 deletions(-) diff --git a/zephyr_ml/__init__.py b/zephyr_ml/__init__.py index 589f2ef..7da0003 100644 --- a/zephyr_ml/__init__.py +++ b/zephyr_ml/__init__.py @@ -19,3 +19,6 @@ MLBLOCKS_PRIMITIVES = os.path.join(os.path.dirname(__file__), "primitives", "jsons") MLBLOCKS_PIPELINES = os.path.join(os.path.dirname(__file__), "pipelines") +# import os, sys + +# sys.path.append(os.path.dirname(os.path.realpath(__file__))) diff --git a/zephyr_ml/core.py b/zephyr_ml/core.py index d96a0a2..1eb4ffe 100644 --- a/zephyr_ml/core.py +++ b/zephyr_ml/core.py @@ -1,7 +1,10 @@ -from zephyr_ml.metadata import get_default_es_type_kwargs -from zephyr_ml.entityset import _create_entityset, VALIDATE_DATA_FUNCTIONS -from zephyr_ml.labeling import get_labeling_functions, LABELING_FUNCTIONS - +# from zephyr_ml.entityset import _create_entityset, VALIDATE_DATA_FUNCTIONS +# from zephyr_ml.labeling import get_labeling_functions, LABELING_FUNCTIONS +# from zephyr_ml.entityset import _create_entityset, VALIDATE_DATA_FUNCTIONS +from zephyr_ml.labeling import ( + get_labeling_functions, + LABELING_FUNCTIONS, +) import composeml as cp from inspect import getfullargspec import featuretools as ft @@ -10,7 +13,10 @@ from sklearn.model_selection import train_test_split import os import json -from mlblocks import MLPipeline +from mlblocks import MLPipeline, MLBlock, get_primitives_paths, add_primitives_path +from itertools import chain + +DEFAULT_METRICS = ["sklearn.metrics.accuracy_score"] class Zephyr: @@ -68,10 +74,13 @@ def set_entityset(self, entityset, es_type, new_kwargs_mapping=None): def get_predefined_labeling_functions(self): return get_labeling_functions() - def set_labeling_functions(self, name=None, func=None): + def set_labeling_function(self, name=None, func=None): + print(f"labeling fucntion name {name}") if name is not None: - if name in LABELING_FUNCTIONS: - self.labeling_function = LABELING_FUNCTIONS[name] + labeling_fn_map = get_labeling_functions_map() + if name in labeling_fn_map: + self.labeling_function = labeling_fn_map[name] + return else: raise ValueError( f"Unrecognized name argument:{name}. Call get_predefined_labeling_functions to view predefined labeling functions" @@ -79,11 +88,12 @@ def set_labeling_functions(self, name=None, func=None): elif func is not None: if callable(func): self.labeling_function = func + return else: raise ValueError(f"Custom function is not callable") raise ValueError("No labeling function given.") - def generate_labeling_times( + def generate_label_times( self, num_samples=-1, subset=None, column_map={}, verbose=False, **kwargs ): assert self.entityset is not None @@ -133,6 +143,7 @@ def generate_features(self, **kwargs): ) self.feature_matrix_and_labels = self._clean_feature_matrix(feature_matrix) self.features = features + print(feature_matrix) return feature_matrix, features def get_feature_matrix_and_labels(self): @@ -181,7 +192,7 @@ def get_train_test_split(self): def get_predefined_pipelines(self): pass - def set_pipeline(self, pipeline, pipeline_hyperparameters): + def set_pipeline(self, pipeline, pipeline_hyperparameters=None): self.pipeline = self._get_mlpipeline(pipeline, pipeline_hyperparameters) self.pipeline_hyperparameters = pipeline_hyperparameters @@ -223,9 +234,22 @@ def predict(self, X=None, visual=False, **kwargs): return outputs def evaluate(self, X=None, y=None, metrics=None): - result = self.pipeline.predict(X) + if X is None: + X = self.X_test + if y is None: + y = self.y_test - pass + context_0 = self.pipeline.predict(X, output_=0) + y_proba = context_0["y_pred"] + y_pred = self.pipeline.predict(start_=1, **context_0) + if metrics is None: + metrics = DEFAULT_METRICS + + for metric in metrics: + metric_primitive = self._get_ml_primitive(metric) + print(metric_primitive) + res = metric_primitive.produce(y_pred=y_pred, y_proba=y_proba, y_true=y) + print(metric_primitive.name, res) def _validate_step(self, **kwargs): for key, value in kwargs: @@ -244,7 +268,7 @@ def _clean_feature_matrix(self, feature_matrix, label_col_name="label"): return feature_matrix, labels - def _get_mlpipeline(self, pipeline, hyperparameters): + def _get_mlpipeline(self, pipeline, hyperparameters=None): if isinstance(pipeline, str) and os.path.isfile(pipeline): with open(pipeline) as json_file: pipeline = json.load(json_file) @@ -255,6 +279,16 @@ def _get_mlpipeline(self, pipeline, hyperparameters): return mlpipeline + def _get_ml_primitive(self, primitive, hyperparameters=None): + if isinstance(primitive, str) and os.path.isfile(primitive): + with open(primitive) as json_file: + primitive = json.load(json_file) + mlprimitive = MLBlock(primitive) + + if hyperparameters: + mlprimitive.set_hyperparameters(hyperparameters) + return mlprimitive + def _get_outputs_spec(self, default=True): outputs_spec = ["default"] if default else [] @@ -265,3 +299,647 @@ def _get_outputs_spec(self, default=True): visual_names = [] return outputs_spec, visual_names + + +def get_labeling_functions_map(): + functions = {} + for function in LABELING_FUNCTIONS: + name = function.__name__ + functions[name] = function + return functions + + +import copy + + +# Default EntitySet keyword arguments for entities +DEFAULT_ES_KWARGS = { + "alarms": { + "index": "_index", + "make_index": True, + "time_index": "DAT_START", + "secondary_time_index": {"DAT_END": ["IND_DURATION"]}, + "logical_types": { + "COD_ELEMENT": "categorical", # turbine id + "DAT_START": "datetime", # start + "DAT_END": "datetime", # end + "IND_DURATION": "double", # duration + "COD_ALARM": "categorical", # alarm code + "COD_ALARM_INT": "categorical", # international alarm code + "DES_NAME": "categorical", # alarm name + "DES_TITLE": "categorical", # alarm description + "COD_STATUS": "categorical", # status code + }, + }, + "stoppages": { + "index": "_index", + "make_index": True, + "time_index": "DAT_START", + "secondary_time_index": {"DAT_END": ["IND_DURATION", "IND_LOST_GEN"]}, + "logical_types": { + "COD_ELEMENT": "categorical", # turbine id + "DAT_START": "datetime", # start + "DAT_END": "datetime", # end + "DES_WO_NAME": "natural_language", # work order name + "DES_COMMENTS": "natural_language", # work order comments + "COD_WO": "integer_nullable", # stoppage code + "IND_DURATION": "double", # duration + "IND_LOST_GEN": "double", # generation loss + "COD_ALARM": "categorical", # alarm code + "COD_CAUSE": "categorical", # stoppage cause + "COD_INCIDENCE": "categorical", # incidence code + "COD_ORIGIN": "categorical", # origin code + "DESC_CLASS": "categorical", # ???? + "COD_STATUS": "categorical", # status code + "COD_CODE": "categorical", # stoppage code + "DES_DESCRIPTION": "natural_language", # stoppage description + "DES_TECH_NAME": "categorical", # turbine technology + }, + }, + "notifications": { + "index": "_index", + "make_index": True, + "time_index": "DAT_POSTING", + "secondary_time_index": {"DAT_MALF_END": ["IND_BREAKDOWN_DUR"]}, + "logical_types": { + "COD_ELEMENT": "categorical", # turbine id + "COD_ORDER": "categorical", + "IND_QUANTITY": "double", + "COD_MATERIAL_SAP": "categorical", + "DAT_POSTING": "datetime", + "COD_MAT_DOC": "categorical", + "DES_MEDIUM": "categorical", + "COD_NOTIF": "categorical", + "DAT_MALF_START": "datetime", + "DAT_MALF_END": "datetime", + "IND_BREAKDOWN_DUR": "double", + "FUNCT_LOC_DES": "categorical", + "COD_ALARM": "categorical", + "DES_ALARM": "categorical", + }, + }, + "work_orders": { + "index": "COD_ORDER", + "time_index": "DAT_BASIC_START", + "secondary_time_index": {"DAT_VALID_END": []}, + "logical_types": { + "COD_ELEMENT": "categorical", + "COD_ORDER": "categorical", + "DAT_BASIC_START": "datetime", + "DAT_BASIC_END": "datetime", + "COD_EQUIPMENT": "categorical", + "COD_MAINT_PLANT": "categorical", + "COD_MAINT_ACT_TYPE": "categorical", + "COD_CREATED_BY": "categorical", + "COD_ORDER_TYPE": "categorical", + "DAT_REFERENCE": "datetime", + "DAT_CREATED_ON": "datetime", + "DAT_VALID_END": "datetime", + "DAT_VALID_START": "datetime", + "COD_SYSTEM_STAT": "categorical", + "DES_LONG": "natural_language", + "COD_FUNCT_LOC": "categorical", + "COD_NOTIF_OBJ": "categorical", + "COD_MAINT_ITEM": "categorical", + "DES_MEDIUM": "natural_language", + "DES_FUNCT_LOC": "categorical", + }, + }, + "turbines": { + "index": "COD_ELEMENT", + "logical_types": { + "COD_ELEMENT": "categorical", + "TURBINE_PI_ID": "categorical", + "TURBINE_LOCAL_ID": "categorical", + "TURBINE_SAP_COD": "categorical", + "DES_CORE_ELEMENT": "categorical", + "SITE": "categorical", + "DES_CORE_PLANT": "categorical", + "COD_PLANT_SAP": "categorical", + "PI_COLLECTOR_SITE_NAME": "categorical", + "PI_LOCAL_SITE_NAME": "categorical", + }, + }, +} + +DEFAULT_ES_TYPE_KWARGS = { + "pidata": { + "index": "_index", + "make_index": True, + "time_index": "time", + "logical_types": {"time": "datetime", "COD_ELEMENT": "categorical"}, + }, + "scada": { + "index": "_index", + "make_index": True, + "time_index": "TIMESTAMP", + "logical_types": {"TIMESTAMP": "datetime", "COD_ELEMENT": "categorical"}, + }, + "vibrations": { + "index": "_index", + "make_index": True, + "time_index": "timestamp", + "logical_types": { + "COD_ELEMENT": "categorical", + "turbine_id": "categorical", + "signal_id": "categorical", + "timestamp": "datetime", + "sensorName": "categorical", + "sensorType": "categorical", + "sensorSerial": "integer_nullable", + "siteName": "categorical", + "turbineName": "categorical", + "turbineSerial": "integer_nullable", + "configurationName": "natural_language", + "softwareVersion": "categorical", + "rpm": "double", + "rpmStatus": "natural_language", + "duration": "natural_language", + "condition": "categorical", + "maskTime": "datetime", + "Mask Status": "natural_language", + "System Serial": "categorical", + "WPS-ActivePower-Average": "double", + "WPS-ActivePower-Minimum": "double", + "WPS-ActivePower-Maximum": "double", + "WPS-ActivePower-Deviation": "double", + "WPS-ActivePower-StartTime": "datetime", + "WPS-ActivePower-StopTime": "datetime", + "WPS-ActivePower-Counts": "natural_language", + "Measured RPM": "double", + "WPS-ActivePower": "double", + "WPS-Gearoiltemperature": "double", + "WPS-GeneratorRPM": "double", + "WPS-PitchReference": "double", + "WPS-RotorRPM": "double", + "WPS-Windspeed": "double", + "WPS-YawAngle": "double", + "overload warning": "categorical", + "bias warning": "categorical", + "bias voltage": "double", + "xValueOffset": "double", + "xValueDelta": "double", + "xValueUnit": "categorical", + "yValueUnit": "categorical", + "TotalCount-RPM0": "double", + "TotalCount-RPM1": "double", + "TotalCount-RPM2": "double", + "TotalCount-RPM3": "double", + }, + }, +} + + +def get_mapped_kwargs(es_type, new_kwargs=None): + if es_type not in DEFAULT_ES_TYPE_KWARGS.keys(): + raise ValueError("Unrecognized es_type argument: {}".format(es_type)) + mapped_kwargs = DEFAULT_ES_KWARGS.copy() + mapped_kwargs.update({es_type: DEFAULT_ES_TYPE_KWARGS[es_type]}) + + if new_kwargs is not None: + if not isinstance(new_kwargs, dict): + raise ValueError( + "new_kwargs must be dictionary mapping entity name to dictionary " + "with updated keyword arguments for EntitySet creation." + ) + for entity in new_kwargs: + if entity not in mapped_kwargs: + raise ValueError( + 'Unrecognized entity "{}" found in new keyword argument ' + "mapping.".format(entity) + ) + + mapped_kwargs[entity].update(new_kwargs[entity]) + + return mapped_kwargs + + +def get_default_es_type_kwargs(): + return copy.deepcopy(DEFAULT_ES_TYPE_KWARGS) + + +def get_es_types(): + return DEFAULT_ES_TYPE_KWARGS.keys() + + +def create_pidata_entityset(dfs, new_kwargs_mapping=None): + """Generate an entityset for PI data datasets + + Args: + data_paths (dict): Dictionary mapping entity names ('alarms', 'notifications', + 'stoppages', 'work_orders', 'pidata', 'turbines') to the pandas dataframe for + that entity. + **kwargs: Updated keyword arguments to be used during entityset creation + """ + entity_kwargs = get_mapped_kwargs("pidata", new_kwargs_mapping) + _validate_data(dfs, "pidata", entity_kwargs) + + es = _create_entityset(dfs, "pidata", entity_kwargs) + es.id = "PI data" + + return es + + +def create_scada_entityset(dfs, new_kwargs_mapping=None): + """Generate an entityset for SCADA data datasets + + Args: + data_paths (dict): Dictionary mapping entity names ('alarms', 'notifications', + 'stoppages', 'work_orders', 'scada', 'turbines') to the pandas dataframe for + that entity. + """ + entity_kwargs = get_mapped_kwargs("scada", new_kwargs_mapping) + _validate_data(dfs, "scada", entity_kwargs) + + es = _create_entityset(dfs, "scada", entity_kwargs) + es.id = "SCADA data" + + return es + + +def create_vibrations_entityset(dfs, new_kwargs_mapping=None): + """Generate an entityset for Vibrations data datasets + + Args: + data_paths (dict): Dictionary mapping entity names ('alarms', 'notifications', + 'stoppages', 'work_orders', 'vibrations', 'turbines') to the pandas + dataframe for that entity. Optionally 'pidata' and 'scada' can be included. + """ + entities = ["vibrations"] + + pidata_kwargs, scada_kwargs = {}, {} + if "pidata" in dfs: + pidata_kwargs = get_mapped_kwargs("pidata", new_kwargs_mapping) + entities.append("pidata") + if "scada" in dfs: + scada_kwargs = get_mapped_kwargs("scada", new_kwargs_mapping) + entities.append("scada") + + entity_kwargs = { + **pidata_kwargs, + **scada_kwargs, + **get_mapped_kwargs("vibrations", new_kwargs_mapping), + } + _validate_data(dfs, entities, entity_kwargs) + + es = _create_entityset(dfs, "vibrations", entity_kwargs) + es.id = "Vibrations data" + + return es + + +def _validate_data(dfs, es_type, es_kwargs): + """Validate data by checking for required columns in each entity""" + if not isinstance(es_type, list): + es_type = [es_type] + + entities = set( + chain( + [ + "alarms", + "stoppages", + "work_orders", + "notifications", + "turbines", + *es_type, + ] + ) + ) + + if set(dfs.keys()) != entities: + missing = entities.difference(set(dfs.keys())) + extra = set(dfs.keys()).difference(entities) + msg = [] + if missing: + msg.append("Missing dataframes for entities {}.".format(", ".join(missing))) + if extra: + msg.append( + "Unrecognized entities {} included in dfs.".format(", ".join(extra)) + ) + + raise ValueError(" ".join(msg)) + + turbines_index = es_kwargs["turbines"]["index"] + work_orders_index = es_kwargs["work_orders"]["index"] + + if work_orders_index not in dfs["work_orders"].columns: + raise ValueError( + 'Expected index column "{}" missing from work_orders entity'.format( + work_orders_index + ) + ) + + if work_orders_index not in dfs["notifications"].columns: + raise ValueError( + 'Expected column "{}" missing from notifications entity'.format( + work_orders_index + ) + ) + + if not dfs["work_orders"][work_orders_index].is_unique: + raise ValueError( + 'Expected index column "{}" of work_orders entity is not ' + "unique".format(work_orders_index) + ) + + if turbines_index not in dfs["turbines"].columns: + raise ValueError( + 'Expected index column "{}" missing from turbines entity'.format( + turbines_index + ) + ) + + if not dfs["turbines"][turbines_index].is_unique: + raise ValueError( + 'Expected index column "{}" of turbines entity is not unique.'.format( + turbines_index + ) + ) + + for entity, df in dfs.items(): + if turbines_index not in df.columns: + raise ValueError( + 'Turbines index column "{}" missing from data for {} entity'.format( + turbines_index, entity + ) + ) + + time_index = es_kwargs[entity].get("time_index", False) + if time_index and time_index not in df.columns: + raise ValueError( + 'Missing time index column "{}" from {} entity'.format( + time_index, entity + ) + ) + + secondary_time_indices = es_kwargs[entity].get("secondary_time_index", {}) + for time_index, cols in secondary_time_indices.items(): + if time_index not in df.columns: + raise ValueError( + 'Secondary time index "{}" missing from {} entity'.format( + time_index, entity + ) + ) + for col in cols: + if col not in df.columns: + raise ValueError( + ( + 'Column "{}" associated with secondary time index "{}" ' + "missing from {} entity" + ).format(col, time_index, entity) + ) + + +def validate_scada_data(dfs, new_kwargs_mapping=None): + entity_kwargs = get_mapped_kwargs("scada", new_kwargs_mapping) + _validate_data(dfs, "scada", entity_kwargs) + return entity_kwargs + + +def validate_pidata_data(dfs, new_kwargs_mapping=None): + entity_kwargs = get_mapped_kwargs("pidata", new_kwargs_mapping) + _validate_data(dfs, "pidata", entity_kwargs) + return entity_kwargs + + +def validate_vibrations_data(dfs, new_kwargs_mapping=None): + entities = ["vibrations"] + + pidata_kwargs, scada_kwargs = {}, {} + if "pidata" in dfs: + pidata_kwargs = get_mapped_kwargs("pidata", new_kwargs_mapping) + entities.append("pidata") + if "scada" in dfs: + scada_kwargs = get_mapped_kwargs("scada", new_kwargs_mapping) + entities.append("scada") + + entity_kwargs = { + **pidata_kwargs, + **scada_kwargs, + **get_mapped_kwargs("vibrations", new_kwargs_mapping), + } + _validate_data(dfs, entities, entity_kwargs) + return entity_kwargs + + +VALIDATE_DATA_FUNCTIONS = { + "scada": validate_scada_data, + "pidata": validate_pidata_data, + "vibrations": validate_vibrations_data, +} + + +def _create_entityset(entities, es_type, new_kwargs_mapping=None): + validate_func = VALIDATE_DATA_FUNCTIONS[es_type] + es_kwargs = validate_func(entities, new_kwargs_mapping) + + # filter out stated logical types for missing columns + for entity, df in entities.items(): + es_kwargs[entity]["logical_types"] = { + col: t + for col, t in es_kwargs[entity]["logical_types"].items() + if col in df.columns + } + + turbines_index = es_kwargs["turbines"]["index"] + work_orders_index = es_kwargs["work_orders"]["index"] + + relationships = [ + ("turbines", turbines_index, "alarms", turbines_index), + ("turbines", turbines_index, "stoppages", turbines_index), + ("turbines", turbines_index, "work_orders", turbines_index), + ("turbines", turbines_index, es_type, turbines_index), + ("work_orders", work_orders_index, "notifications", work_orders_index), + ] + + es = ft.EntitySet() + es.id = es_type + + for name, df in entities.items(): + es.add_dataframe(dataframe_name=name, dataframe=df, **es_kwargs[name]) + + for relationship in relationships: + parent_df, parent_column, child_df, child_column = relationship + es.add_relationship(parent_df, parent_column, child_df, child_column) + + return es + + +CREATE_ENTITYSET_FUNCTIONS = { + "scada": create_scada_entityset, + "pidata": create_pidata_entityset, + "vibrations": create_vibrations_entityset, +} + + +def get_create_entityset_functions(): + return CREATE_ENTITYSET_FUNCTIONS.copy() + + +if __name__ == "__main__": + obj = Zephyr() + alarms_df = pd.DataFrame( + { + "COD_ELEMENT": [0, 0], + "DAT_START": [ + pd.Timestamp("2022-01-01 00:00:00"), + pd.Timestamp("2022-03-01 11:12:13"), + ], + "DAT_END": [ + pd.Timestamp("2022-01-01 13:00:00"), + pd.Timestamp("2022-03-02 11:12:13"), + ], + "IND_DURATION": [0.5417, 1.0], + "COD_ALARM": [12345, 98754], + "COD_ALARM_INT": [12345, 98754], + "DES_NAME": ["Alarm1", "Alarm2"], + "DES_TITLE": ["Description of alarm 1", "Description of alarm 2"], + } + ) + stoppages_df = pd.DataFrame( + { + "COD_ELEMENT": [0, 0], + "DAT_START": [ + pd.Timestamp("2022-01-01 00:00:00"), + pd.Timestamp("2022-03-01 11:12:13"), + ], + "DAT_END": [ + pd.Timestamp("2022-01-08 11:07:17"), + pd.Timestamp("2022-03-01 17:00:13"), + ], + "DES_WO_NAME": ["stoppage name 1", "stoppage name 2"], + "DES_COMMENTS": ["description of stoppage 1", "description of stoppage 2"], + "COD_WO": [12345, 67890], + "IND_DURATION": [7.4642, 0.2417], + "IND_LOST_GEN": [45678.0, 123.0], + "COD_ALARM": [12345, 12345], + "COD_CAUSE": [32, 48], + "COD_INCIDENCE": [987654, 123450], + "COD_ORIGIN": [6, 23], + "COD_STATUS": ["STOP", "PAUSE"], + "COD_CODE": ["ABC", "XYZ"], + "DES_DESCRIPTION": ["Description 1", "Description 2"], + } + ) + notifications_df = pd.DataFrame( + { + "COD_ELEMENT": [0, 0], + "COD_ORDER": [12345, 67890], + "IND_QUANTITY": [1, -20], + "COD_MATERIAL_SAP": [36052411, 67890], + "DAT_POSTING": [ + pd.Timestamp("2022-01-01 00:00:00"), + pd.Timestamp("2022-03-01 00:00:00"), + ], + "COD_MAT_DOC": [77889900, 12345690], + "DES_MEDIUM": [ + "Description of notification 1", + "Description of notification 2", + ], + "COD_NOTIF": [567890123, 32109877], + "DAT_MALF_START": [ + pd.Timestamp("2021-12-25 18:07:10"), + pd.Timestamp("2022-02-28 06:04:00"), + ], + "DAT_MALF_END": [ + pd.Timestamp("2022-01-08 11:07:17"), + pd.Timestamp("2022-03-01 17:00:13"), + ], + "IND_BREAKDOWN_DUR": [14.1378, 2.4792], + "FUNCT_LOC_DES": ["location description 1", "location description 2"], + "COD_ALARM": [12345, 12345], + "DES_ALARM": ["Alarm description", "Alarm description"], + } + ) + work_orders_df = pd.DataFrame( + { + "COD_ELEMENT": [0, 0], + "COD_ORDER": [12345, 67890], + "DAT_BASIC_START": [ + pd.Timestamp("2022-01-01 00:00:00"), + pd.Timestamp("2022-03-01 00:00:00"), + ], + "DAT_BASIC_END": [ + pd.Timestamp("2022-01-09 00:00:00"), + pd.Timestamp("2022-03-02 00:00:00"), + ], + "COD_EQUIPMENT": [98765, 98765], + "COD_MAINT_PLANT": ["ABC", "ABC"], + "COD_MAINT_ACT_TYPE": ["XYZ", "XYZ"], + "COD_CREATED_BY": ["A1234", "B6789"], + "COD_ORDER_TYPE": ["A", "B"], + "DAT_REFERENCE": [ + pd.Timestamp("2022-01-01 00:00:00"), + pd.Timestamp("2022-03-01 00:00:00"), + ], + "DAT_CREATED_ON": [ + pd.Timestamp("2022-03-01 00:00:00"), + pd.Timestamp("2022-04-18 00:00:00"), + ], + "DAT_VALID_END": [pd.NaT, pd.NaT], + "DAT_VALID_START": [pd.NaT, pd.NaT], + "COD_SYSTEM_STAT": ["ABC XYZ", "LMN OPQ"], + "DES_LONG": ["description of work order", "description of work order"], + "COD_FUNCT_LOC": ["!12345", "?09876"], + "COD_NOTIF_OBJ": ["00112233", "00998877"], + "COD_MAINT_ITEM": ["", "019283"], + "DES_MEDIUM": ["short description", "short description"], + "DES_FUNCT_LOC": ["XYZ1234", "ABC9876"], + } + ) + turbines_df = pd.DataFrame( + { + "COD_ELEMENT": [0], + "TURBINE_PI_ID": ["TA00"], + "TURBINE_LOCAL_ID": ["A0"], + "TURBINE_SAP_COD": ["LOC000"], + "DES_CORE_ELEMENT": ["T00"], + "SITE": ["LOCATION"], + "DES_CORE_PLANT": ["LOC"], + "COD_PLANT_SAP": ["ABC"], + "PI_COLLECTOR_SITE_NAME": ["LOC0"], + "PI_LOCAL_SITE_NAME": ["LOC0"], + } + ) + pidata_df = pd.DataFrame( + { + "time": [ + pd.Timestamp("2022-01-02 13:21:01"), + pd.Timestamp("2022-03-08 13:21:01"), + ], + "COD_ELEMENT": [0, 0], + "val1": [9872.0, 559.0], + "val2": [10.0, -7.0], + } + ) + obj.create_entityset( + { + "alarms": alarms_df, + "stoppages": stoppages_df, + "notifications": notifications_df, + "work_orders": work_orders_df, + "turbines": turbines_df, + "pidata": pidata_df, + }, + "pidata", + ) + obj.set_labeling_function(name="brake_pad_presence") + + obj.generate_label_times(num_samples=35, gap="20d") + obj.plot_label_times() + + obj.generate_features( + target_dataframe_name="turbines", + cutoff_time_in_index=True, + agg_primitives=["count", "sum", "max"], + ) + + obj.generate_train_test_split() + add_primitives_path( + path="/Users/raymondpan/zephyr/Zephyr-repo/zephyr_ml/primitives/jsons" + ) + obj.set_pipeline("xgb_classifier") + + obj.fit() + + obj.evaluate() diff --git a/zephyr_ml/core_prev.py b/zephyr_ml/core_prev.py index e89cb3b..124b70c 100644 --- a/zephyr_ml/core_prev.py +++ b/zephyr_ml/core_prev.py @@ -3,6 +3,7 @@ This module defines the Zephyr Class, which is responsible for the model training and inference with the underlying MLBlocks pipelines. """ + import json import logging import os @@ -19,16 +20,16 @@ _REGRESSION_METRICS = { - 'mae': metrics.mean_absolute_error, - 'mse': metrics.mean_squared_error, - 'r2': metrics.r2_score, + "mae": metrics.mean_absolute_error, + "mse": metrics.mean_squared_error, + "r2": metrics.r2_score, } _CLASSIFICATION_METRICS = { - 'accuracy': metrics.accuracy_score, - 'f1': metrics.f1_score, - 'recall': metrics.recall_score, - 'precision': metrics.precision_score, + "accuracy": metrics.accuracy_score, + "f1": metrics.f1_score, + "recall": metrics.recall_score, + "precision": metrics.precision_score, } METRICS = _CLASSIFICATION_METRICS @@ -51,7 +52,8 @@ class Zephyr: hyperparameters (dict): Additional hyperparameters to set to the Pipeline. """ - DEFAULT_PIPELINE = 'xgb_classifier' + + DEFAULT_PIPELINE = "xgb_classifier" def _get_mlpipeline(self): pipeline = self._pipeline @@ -65,8 +67,11 @@ def _get_mlpipeline(self): return mlpipeline - def __init__(self, pipeline: Union[str, dict, MLPipeline] = None, - hyperparameters: dict = None): + def __init__( + self, + pipeline: Union[str, dict, MLPipeline] = None, + hyperparameters: dict = None, + ): self._pipeline = pipeline or self.DEFAULT_PIPELINE self._hyperparameters = hyperparameters self._mlpipeline = self._get_mlpipeline() @@ -74,25 +79,30 @@ def __init__(self, pipeline: Union[str, dict, MLPipeline] = None, def __eq__(self, other): return ( - isinstance(other, self.__class__) and - self._pipeline == other._pipeline and - self._hyperparameters == other._hyperparameters and - self._fitted == other._fitted + isinstance(other, self.__class__) + and self._pipeline == other._pipeline + and self._hyperparameters == other._hyperparameters + and self._fitted == other._fitted ) def _get_outputs_spec(self, default=True): outputs_spec = ["default"] if default else [] try: - visual_names = self._mlpipeline.get_output_names('visual') - outputs_spec.append('visual') + visual_names = self._mlpipeline.get_output_names("visual") + outputs_spec.append("visual") except ValueError: visual_names = [] return outputs_spec, visual_names - def fit(self, X: pd.DataFrame, y: Union[pd.Series, np.ndarray], - visual: bool = False, **kwargs): + def fit( + self, + X: pd.DataFrame, + y: Union[pd.Series, np.ndarray], + visual: bool = False, + **kwargs + ): """Fit the pipeline to the given data. Args: @@ -125,6 +135,7 @@ def predict(self, X: pd.DataFrame, visual: bool = False, **kwargs) -> pd.Series: Args: X (DataFrame): + Input data, passed as a ``pandas.DataFrame`` containing the feature matrix. visual (bool): @@ -138,18 +149,19 @@ def predict(self, X: pd.DataFrame, visual: bool = False, **kwargs) -> pd.Series: if visual: outputs_spec, visual_names = self._get_outputs_spec() else: - outputs_spec = 'default' + outputs_spec = "default" outputs = self._mlpipeline.predict(X, output_=outputs_spec, **kwargs) if visual and visual_names: prediction = outputs[0] - return prediction, dict(zip(visual_names, outputs[-len(visual_names):])) + return prediction, dict(zip(visual_names, outputs[-len(visual_names) :])) return outputs - def fit_predict(self, X: pd.DataFrame, y: Union[pd.Series, np.ndarray], - **kwargs) -> pd.Series: + def fit_predict( + self, X: pd.DataFrame, y: Union[pd.Series, np.ndarray], **kwargs + ) -> pd.Series: """Fit the pipeline to the data and then predict targets. This method is functionally equivalent to calling ``fit(X, y)`` @@ -173,14 +185,20 @@ def fit_predict(self, X: pd.DataFrame, y: Union[pd.Series, np.ndarray], if not self._fitted: self._mlpipeline = self._get_mlpipeline() - result = self._mlpipeline.fit(X, y, output_='default', **kwargs) + result = self._mlpipeline.fit(X, y, output_="default", **kwargs) self._fitted = True return result - def evaluate(self, X: pd.DataFrame, y: Union[pd.Series, np.ndarray], fit: bool = False, - train_X: pd.DataFrame = None, train_y: Union[pd.Series, np.ndarray] = None, - metrics: List[str] = METRICS) -> pd.Series: + def evaluate( + self, + X: pd.DataFrame, + y: Union[pd.Series, np.ndarray], + fit: bool = False, + train_X: pd.DataFrame = None, + train_y: Union[pd.Series, np.ndarray] = None, + metrics: List[str] = METRICS, + ) -> pd.Series: """Evaluate the performance of the pipeline. Args: @@ -223,14 +241,11 @@ def evaluate(self, X: pd.DataFrame, y: Union[pd.Series, np.ndarray], fit: bool = method = mlpipeline.predict else: # fit and predict at once - method = partial(mlpipeline.fit, y=y, output_='default') + method = partial(mlpipeline.fit, y=y, output_="default") result = method(X) - scores = { - metric: METRICS[metric](y, result) - for metric in metrics - } + scores = {metric: METRICS[metric](y, result) for metric in metrics} return pd.Series(scores) @@ -243,7 +258,7 @@ def save(self, path: str): this object will be stored. """ os.makedirs(os.path.dirname(path), exist_ok=True) - with open(path, 'wb') as pickle_file: + with open(path, "wb") as pickle_file: pickle.dump(self, pickle_file) @classmethod @@ -262,9 +277,9 @@ def load(cls, path: str): ValueError: If the serialized object is not a Zephyr instance. """ - with open(path, 'rb') as pickle_file: + with open(path, "rb") as pickle_file: zephyr = pickle.load(pickle_file) if not isinstance(zephyr, cls): - raise ValueError('Serialized object is not a Zephyr instance') + raise ValueError("Serialized object is not a Zephyr instance") return zephyr diff --git a/zephyr_ml/labeling/__init__.py b/zephyr_ml/labeling/__init__.py index 018f408..b58b5b3 100644 --- a/zephyr_ml/labeling/__init__.py +++ b/zephyr_ml/labeling/__init__.py @@ -1,14 +1,19 @@ from zephyr_ml.labeling import utils from zephyr_ml.labeling.data_labeler import DataLabeler from zephyr_ml.labeling.labeling_functions import ( - brake_pad_presence, converter_replacement_presence, gearbox_replace_presence, total_power_loss) + brake_pad_presence, + converter_replacement_presence, + gearbox_replace_presence, + total_power_loss, +) LABELING_FUNCTIONS = [ brake_pad_presence, converter_replacement_presence, gearbox_replace_presence, - total_power_loss + total_power_loss, ] + UTIL_FUNCTIONS = [ utils.aggregate_by_column, utils.categorical_presence, @@ -23,8 +28,16 @@ def get_labeling_functions(): functions = {} for function in LABELING_FUNCTIONS: name = function.__name__ - functions[name] = function.__doc__.split('\n')[0] + functions[name] = function.__doc__.split("\n")[0] + + return functions + +def get_labeling_functions_map(): + functions = {} + for function in LABELING_FUNCTIONS: + name = function.__name__ + functions[name] = function return functions @@ -32,7 +45,7 @@ def get_helper_functions(): functions = {} for function in UTIL_FUNCTIONS: name = function.__name__ - functions[name] = function.__doc__.split('\n')[0] + functions[name] = function.__doc__.split("\n")[0] return functions diff --git a/zephyr_ml/primitives/jsons/sklearn.meetrics.precision_score.json b/zephyr_ml/primitives/jsons/sklearn.meetrics.precision_score.json index f5dc1e5..9c2bc90 100644 --- a/zephyr_ml/primitives/jsons/sklearn.meetrics.precision_score.json +++ b/zephyr_ml/primitives/jsons/sklearn.meetrics.precision_score.json @@ -20,14 +20,16 @@ "name": "y_pred", "type": "ndarray" } + ], + "output": [ + { + "name": "score", + "type": "float or int" + } ] + }, - "output": [ - { - "name": "score", - "type": "float or int" - } - ], + "hyperparameters": { "fixed": { "labels": { diff --git a/zephyr_ml/primitives/jsons/sklearn.metrics.accuracy_score.json b/zephyr_ml/primitives/jsons/sklearn.metrics.accuracy_score.json index c5483b0..ab0a1da 100644 --- a/zephyr_ml/primitives/jsons/sklearn.metrics.accuracy_score.json +++ b/zephyr_ml/primitives/jsons/sklearn.metrics.accuracy_score.json @@ -20,14 +20,15 @@ "name": "y_pred", "type": "ndarray" } - ] - }, - "output": [ + ], + "output": [ { "name": "score", "type": "float or int" } - ], + ] + }, + "hyperparameters": { "fixed": { "normalize": { diff --git a/zephyr_ml/primitives/jsons/sklearn.metrics.f1_score.json b/zephyr_ml/primitives/jsons/sklearn.metrics.f1_score.json index 190b3f7..4097d21 100644 --- a/zephyr_ml/primitives/jsons/sklearn.metrics.f1_score.json +++ b/zephyr_ml/primitives/jsons/sklearn.metrics.f1_score.json @@ -20,14 +20,14 @@ "name": "y_pred", "type": "ndarray" } + ], + "output": [ + { + "name": "score", + "type": "float or int" + } ] }, - "output": [ - { - "name": "score", - "type": "float or int" - } - ], "hyperparameters": { "fixed": { "labels": { diff --git a/zephyr_ml/primitives/jsons/sklearn.metrics.recall_score.json b/zephyr_ml/primitives/jsons/sklearn.metrics.recall_score.json index c49453e..37d5cce 100644 --- a/zephyr_ml/primitives/jsons/sklearn.metrics.recall_score.json +++ b/zephyr_ml/primitives/jsons/sklearn.metrics.recall_score.json @@ -20,14 +20,15 @@ "name": "y_pred", "type": "ndarray" } - ] - }, - "output": [ + ], + "output": [ { "name": "score", "type": "float or int" } - ], + ] + }, + "hyperparameters": { "fixed": { "labels": { From 2fcecb21ff00843457335c9c06645d3a493909ff Mon Sep 17 00:00:00 2001 From: Raymond Pan Date: Sun, 16 Mar 2025 22:04:53 -0400 Subject: [PATCH 06/28] extra eval --- zephyr_ml/core.py | 7 +++- zephyr_ml/primitives/postprocessing.py | 46 ++++++++++++++++++++++++-- 2 files changed, 49 insertions(+), 4 deletions(-) diff --git a/zephyr_ml/core.py b/zephyr_ml/core.py index 1eb4ffe..a056bec 100644 --- a/zephyr_ml/core.py +++ b/zephyr_ml/core.py @@ -16,7 +16,12 @@ from mlblocks import MLPipeline, MLBlock, get_primitives_paths, add_primitives_path from itertools import chain -DEFAULT_METRICS = ["sklearn.metrics.accuracy_score"] +DEFAULT_METRICS = [ + "sklearn.metrics.accuracy_score", + "sklearn.metrics.precision_score", + "sklearn.metrics.f1_score", + "sklearn.metrics.recall_score", +] class Zephyr: diff --git a/zephyr_ml/primitives/postprocessing.py b/zephyr_ml/primitives/postprocessing.py index 3b7aa60..8c288d1 100644 --- a/zephyr_ml/primitives/postprocessing.py +++ b/zephyr_ml/primitives/postprocessing.py @@ -1,10 +1,14 @@ """ Postprocessing functions. """ + import logging import numpy as np import sklearn +from sklearn import metrics +import seaborn as sns +import matplotlib.pyplot as plt LOGGER = logging.getLogger(__name__) @@ -32,8 +36,8 @@ class FindThreshold: String representing which metric to use. """ - def __init__(self, metric='f1'): - self._metric = 'f1' + def __init__(self, metric="f1"): + self._metric = "f1" self._threshold = None def fit(self, y_true, y_pred): @@ -57,7 +61,7 @@ def fit(self, y_true, y_pred): scores.append(scorer(y_true, y)) threshold = RANGE[np.argmax(scores)] - LOGGER.info(f'best threshold found at {threshold}') + LOGGER.info(f"best threshold found at {threshold}") self._threshold = threshold self._scores = scores @@ -80,3 +84,39 @@ def apply_threshold(self, y_pred): binary = [1 if x else 0 for x in y_pred > self._threshold] return binary, self._threshold, self._scores + + +def confusion_matrix(y_true, y_pred): + conf_matrix = metrics.confusion_matrix(y_true, y_pred) + ax = sns.heatmap(conf_matrix, annot=True, cmap="Blues") + ax.set_title("Confusion Matrix\n") + ax.set_xlabel("\nPredicted Values") + ax.set_ylabel("Actual Values") + + ax.xaxis.set_ticklabels(["False", "True"]) + ax.yaxis.set_ticklabels(["False", "True"]) + + +def roc_auc_score(y_true, y_prob): + auc = metrics.roc_auc_score(y_true, y_prob) + return auc + + +def roc_auc_score_and_curve(y_true, y_prob): + fpr, tpr, _ = metrics.roc_curve(y_true, y_prob) + ns_probs = [0 for _ in range(len(y_true))] + ns_fpr, ns_tpr, _ = metrics.roc_curve(y_true, ns_probs) + + _, _ = plt.subplots(1, 1) + + auc = roc_auc_score(y_true, y_prob) + + plt.plot(fpr, tpr, "ro") + plt.plot(fpr, tpr) + plt.plot(ns_fpr, ns_tpr, linestyle="--", color="green") + + plt.ylabel("True Positive Rate") + plt.xlabel("False Positive Rate") + plt.title("AUC: %.3f" % auc) + + return auc From 4acf849cda257d06411151e8ff21f863ba042183 Mon Sep 17 00:00:00 2001 From: Raymond Pan Date: Sun, 16 Mar 2025 23:16:05 -0400 Subject: [PATCH 07/28] rem roc_auc_score --- zephyr_ml/primitives/postprocessing.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/zephyr_ml/primitives/postprocessing.py b/zephyr_ml/primitives/postprocessing.py index 8c288d1..cc6ce49 100644 --- a/zephyr_ml/primitives/postprocessing.py +++ b/zephyr_ml/primitives/postprocessing.py @@ -97,11 +97,6 @@ def confusion_matrix(y_true, y_pred): ax.yaxis.set_ticklabels(["False", "True"]) -def roc_auc_score(y_true, y_prob): - auc = metrics.roc_auc_score(y_true, y_prob) - return auc - - def roc_auc_score_and_curve(y_true, y_prob): fpr, tpr, _ = metrics.roc_curve(y_true, y_prob) ns_probs = [0 for _ in range(len(y_true))] @@ -109,7 +104,7 @@ def roc_auc_score_and_curve(y_true, y_prob): _, _ = plt.subplots(1, 1) - auc = roc_auc_score(y_true, y_prob) + auc = metrics.roc_auc_score(y_true, y_prob) plt.plot(fpr, tpr, "ro") plt.plot(fpr, tpr) From 6fbf177a81cb3fd6c42637d73b8bd4dade73f015 Mon Sep 17 00:00:00 2001 From: Raymond Pan Date: Mon, 17 Mar 2025 12:43:52 -0400 Subject: [PATCH 08/28] Add json annot for new eval --- ...tives.postprocessing.confusion_matrix.json | 49 +++++++++++++++++++ ...ostprocessing.roc_auc_score_and_curve.json | 48 ++++++++++++++++++ zephyr_ml/primitives/postprocessing.py | 7 ++- 3 files changed, 102 insertions(+), 2 deletions(-) create mode 100644 zephyr_ml/primitives/jsons/zephyr_ml.primitives.postprocessing.confusion_matrix.json create mode 100644 zephyr_ml/primitives/jsons/zephyr_ml.primitives.postprocessing.roc_auc_score_and_curve.json diff --git a/zephyr_ml/primitives/jsons/zephyr_ml.primitives.postprocessing.confusion_matrix.json b/zephyr_ml/primitives/jsons/zephyr_ml.primitives.postprocessing.confusion_matrix.json new file mode 100644 index 0000000..f0b3132 --- /dev/null +++ b/zephyr_ml/primitives/jsons/zephyr_ml.primitives.postprocessing.confusion_matrix.json @@ -0,0 +1,49 @@ +{ + "name": "zephyr_ml.primitives.postprocessing.confusion_matrix", + "contributors": [ + "Raymond Pan " + ], + "description": "Plot confusion matrix.", + "classifiers": { + "type": "helper" + }, + "modalities": [], + "primitive": "zephyr_ml.primitives.postprocessing.confusion_matrix", + "produce": { + "args": [ + { + "name": "y_true", + "type": "ndarray" + }, + { + "name": "y_pred", + "type": "ndarray" + } + ], + "output": [ + { + "name": "confusion_matrix", + "type": "ndarray" + } + ] + + }, + + "hyperparameters": { + "fixed": { + "labels": { + "type": "ndarray", + "default": null + }, + "sample_weight": { + "type": "ndarray", + "default": null + }, + "normalize": { + "type": "str", + "default": null + } + } + } + +} \ No newline at end of file diff --git a/zephyr_ml/primitives/jsons/zephyr_ml.primitives.postprocessing.roc_auc_score_and_curve.json b/zephyr_ml/primitives/jsons/zephyr_ml.primitives.postprocessing.roc_auc_score_and_curve.json new file mode 100644 index 0000000..4a5028a --- /dev/null +++ b/zephyr_ml/primitives/jsons/zephyr_ml.primitives.postprocessing.roc_auc_score_and_curve.json @@ -0,0 +1,48 @@ +{ + "name": "zephyr_ml.primitives.postprocessing.roc_auc_score_and_curve", + "contributors": [ + "Raymond Pan " + ], + "description": "Calculate ROC AUC score and plot curve.", + "classifiers": { + "type": "helper" + }, + "modalities": [], + "primitive": "zephyr_ml.primitives.postprocessing.roc_auc_score_and_curve", + "produce": { + "args": [ + { + "name": "y_true", + "type": "ndarray" + }, + { + "name": "y_pred", + "type": "ndarray" + } + ], + "output": [ + { + "name": "score", + "type": "float" + } + ] + }, + + "hyperparameters": { + "fixed": { + "pos_label": { + "type": "int, float, bool or str", + "default": null + }, + "sample_weight": { + "type": "ndarray", + "default": null + }, + "drop_intermediate": { + "type": "bool", + "default": true + } + } + } + +} \ No newline at end of file diff --git a/zephyr_ml/primitives/postprocessing.py b/zephyr_ml/primitives/postprocessing.py index cc6ce49..cc8b4e0 100644 --- a/zephyr_ml/primitives/postprocessing.py +++ b/zephyr_ml/primitives/postprocessing.py @@ -86,8 +86,10 @@ def apply_threshold(self, y_pred): return binary, self._threshold, self._scores -def confusion_matrix(y_true, y_pred): - conf_matrix = metrics.confusion_matrix(y_true, y_pred) +def confusion_matrix(y_true, y_pred, labels=None, sample_weight=None, normalize=None): + conf_matrix = metrics.confusion_matrix( + y_true, y_pred, labels=labels, sample_weight=sample_weight, normalize=normalize + ) ax = sns.heatmap(conf_matrix, annot=True, cmap="Blues") ax.set_title("Confusion Matrix\n") ax.set_xlabel("\nPredicted Values") @@ -95,6 +97,7 @@ def confusion_matrix(y_true, y_pred): ax.xaxis.set_ticklabels(["False", "True"]) ax.yaxis.set_ticklabels(["False", "True"]) + return conf_matrix def roc_auc_score_and_curve(y_true, y_prob): From 6ad3e5ec0538bceecdf5e4b2866d0378b57a6939 Mon Sep 17 00:00:00 2001 From: Raymond Pan Date: Mon, 17 Mar 2025 14:00:57 -0400 Subject: [PATCH 09/28] bug fix e2e w/ new eval --- zephyr_ml/core.py | 8 ++++++- ...n => sklearn.metrics.precision_score.json} | 0 ...ostprocessing.roc_auc_score_and_curve.json | 2 +- zephyr_ml/primitives/postprocessing.py | 22 +++++++++++++++---- 4 files changed, 26 insertions(+), 6 deletions(-) rename zephyr_ml/primitives/jsons/{sklearn.meetrics.precision_score.json => sklearn.metrics.precision_score.json} (100%) diff --git a/zephyr_ml/core.py b/zephyr_ml/core.py index a056bec..e6d9d8a 100644 --- a/zephyr_ml/core.py +++ b/zephyr_ml/core.py @@ -21,6 +21,8 @@ "sklearn.metrics.precision_score", "sklearn.metrics.f1_score", "sklearn.metrics.recall_score", + "zephyr_ml.primitives.postprocessing.confusion_matrix", + "zephyr_ml.primitives.postprocessing.roc_auc_score_and_curve", ] @@ -245,8 +247,12 @@ def evaluate(self, X=None, y=None, metrics=None): y = self.y_test context_0 = self.pipeline.predict(X, output_=0) - y_proba = context_0["y_pred"] + y_proba = context_0["y_pred"][::, 1] + print("y_proba", y_proba.shape) y_pred = self.pipeline.predict(start_=1, **context_0) + print("y_pred", y_pred) + print("X", X) + print("y_true", y.shape) if metrics is None: metrics = DEFAULT_METRICS diff --git a/zephyr_ml/primitives/jsons/sklearn.meetrics.precision_score.json b/zephyr_ml/primitives/jsons/sklearn.metrics.precision_score.json similarity index 100% rename from zephyr_ml/primitives/jsons/sklearn.meetrics.precision_score.json rename to zephyr_ml/primitives/jsons/sklearn.metrics.precision_score.json diff --git a/zephyr_ml/primitives/jsons/zephyr_ml.primitives.postprocessing.roc_auc_score_and_curve.json b/zephyr_ml/primitives/jsons/zephyr_ml.primitives.postprocessing.roc_auc_score_and_curve.json index 4a5028a..bcc32c7 100644 --- a/zephyr_ml/primitives/jsons/zephyr_ml.primitives.postprocessing.roc_auc_score_and_curve.json +++ b/zephyr_ml/primitives/jsons/zephyr_ml.primitives.postprocessing.roc_auc_score_and_curve.json @@ -16,7 +16,7 @@ "type": "ndarray" }, { - "name": "y_pred", + "name": "y_proba", "type": "ndarray" } ], diff --git a/zephyr_ml/primitives/postprocessing.py b/zephyr_ml/primitives/postprocessing.py index cc8b4e0..2f5ff8d 100644 --- a/zephyr_ml/primitives/postprocessing.py +++ b/zephyr_ml/primitives/postprocessing.py @@ -100,14 +100,28 @@ def confusion_matrix(y_true, y_pred, labels=None, sample_weight=None, normalize= return conf_matrix -def roc_auc_score_and_curve(y_true, y_prob): - fpr, tpr, _ = metrics.roc_curve(y_true, y_prob) +def roc_auc_score_and_curve( + y_true, y_proba, pos_label=None, sample_weight=None, drop_intermediate=True +): + fpr, tpr, _ = metrics.roc_curve( + y_true, + y_proba, + pos_label=pos_label, + sample_weight=sample_weight, + drop_intermediate=drop_intermediate, + ) ns_probs = [0 for _ in range(len(y_true))] - ns_fpr, ns_tpr, _ = metrics.roc_curve(y_true, ns_probs) + ns_fpr, ns_tpr, _ = metrics.roc_curve( + y_true, + ns_probs, + pos_label=pos_label, + sample_weight=sample_weight, + drop_intermediate=drop_intermediate, + ) _, _ = plt.subplots(1, 1) - auc = metrics.roc_auc_score(y_true, y_prob) + auc = metrics.roc_auc_score(y_true, y_proba) plt.plot(fpr, tpr, "ro") plt.plot(fpr, tpr) From f7468fde1963f4f4cd0ca1aacf503a43c66ff38a Mon Sep 17 00:00:00 2001 From: Raymond Pan Date: Mon, 17 Mar 2025 14:01:15 -0400 Subject: [PATCH 10/28] clenaup --- zephyr_ml/core.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/zephyr_ml/core.py b/zephyr_ml/core.py index e6d9d8a..9c9ea6d 100644 --- a/zephyr_ml/core.py +++ b/zephyr_ml/core.py @@ -248,11 +248,8 @@ def evaluate(self, X=None, y=None, metrics=None): context_0 = self.pipeline.predict(X, output_=0) y_proba = context_0["y_pred"][::, 1] - print("y_proba", y_proba.shape) y_pred = self.pipeline.predict(start_=1, **context_0) - print("y_pred", y_pred) - print("X", X) - print("y_true", y.shape) + if metrics is None: metrics = DEFAULT_METRICS From 431d33512e68eef1da1dd4b804c30510e4da2358 Mon Sep 17 00:00:00 2001 From: Raymond Pan Date: Mon, 17 Mar 2025 22:56:50 -0400 Subject: [PATCH 11/28] store res in instance --- zephyr_ml/core.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/zephyr_ml/core.py b/zephyr_ml/core.py index 9c9ea6d..33b0244 100644 --- a/zephyr_ml/core.py +++ b/zephyr_ml/core.py @@ -15,6 +15,7 @@ import json from mlblocks import MLPipeline, MLBlock, get_primitives_paths, add_primitives_path from itertools import chain +import logging DEFAULT_METRICS = [ "sklearn.metrics.accuracy_score", @@ -25,7 +26,7 @@ "zephyr_ml.primitives.postprocessing.roc_auc_score_and_curve", ] - +LOGGER = logging.getLogger(__name__) class Zephyr: def __init__(self): @@ -253,11 +254,17 @@ def evaluate(self, X=None, y=None, metrics=None): if metrics is None: metrics = DEFAULT_METRICS + results = {} for metric in metrics: - metric_primitive = self._get_ml_primitive(metric) - print(metric_primitive) - res = metric_primitive.produce(y_pred=y_pred, y_proba=y_proba, y_true=y) - print(metric_primitive.name, res) + try: + + metric_primitive = self._get_ml_primitive(metric) + res = metric_primitive.produce(y_pred=y_pred, y_proba=y_proba, y_true=y) + results[metric_primitive.name] = res + except Exception as e: + LOGGER.error(f"Unable to run evaluation metric: {metric_primitive.name}", exc_info = e) + return results + def _validate_step(self, **kwargs): for key, value in kwargs: From ed4a0c9ccf0f00c92720e49adb43d7e1a0134f4b Mon Sep 17 00:00:00 2001 From: Raymond Pan Date: Mon, 17 Mar 2025 22:59:29 -0400 Subject: [PATCH 12/28] redo --- zephyr_ml/core.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/zephyr_ml/core.py b/zephyr_ml/core.py index 33b0244..60f3957 100644 --- a/zephyr_ml/core.py +++ b/zephyr_ml/core.py @@ -40,6 +40,7 @@ def __init__(self): self.X_test = None self.y_train = None self.y_test = None + self.results = None def get_entityset_types(self): """ @@ -263,6 +264,7 @@ def evaluate(self, X=None, y=None, metrics=None): results[metric_primitive.name] = res except Exception as e: LOGGER.error(f"Unable to run evaluation metric: {metric_primitive.name}", exc_info = e) + self.results = results return results From a6283a0225db7808b8cc5d10c1103b43a6900772 Mon Sep 17 00:00:00 2001 From: Raymond Pan Date: Wed, 19 Mar 2025 12:10:22 -0400 Subject: [PATCH 13/28] clean improts --- tests/test_core.py | 8 + zephyr_ml/__init__.py | 4 +- zephyr_ml/core.py | 484 +------------------------ zephyr_ml/entityset.py | 165 +++++---- zephyr_ml/metadata.py | 3 +- zephyr_ml/primitives/postprocessing.py | 2 + 6 files changed, 106 insertions(+), 560 deletions(-) diff --git a/tests/test_core.py b/tests/test_core.py index cc747c7..7df538d 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -6,6 +6,7 @@ import pytest from zephyr_ml.core import Zephyr +import logging class TestZephyr: @@ -31,6 +32,13 @@ def setup_class(cls): }) cls.random_y = [1 if x > 0.5 else 0 for x in np.random.random(100)] + + + + + + + def setup_method(self): self.zephyr = Zephyr('xgb_classifier') diff --git a/zephyr_ml/__init__.py b/zephyr_ml/__init__.py index 7da0003..ffb9310 100644 --- a/zephyr_ml/__init__.py +++ b/zephyr_ml/__init__.py @@ -10,8 +10,8 @@ from zephyr_ml.core import Zephyr from zephyr_ml.entityset import ( - create_pidata_entityset, - create_scada_entityset, + # create_pidata_entityset, + # create_scada_entityset, _create_entityset, VALIDATE_DATA_FUNCTIONS, ) diff --git a/zephyr_ml/core.py b/zephyr_ml/core.py index 60f3957..fc76e9d 100644 --- a/zephyr_ml/core.py +++ b/zephyr_ml/core.py @@ -1,8 +1,9 @@ -# from zephyr_ml.entityset import _create_entityset, VALIDATE_DATA_FUNCTIONS -# from zephyr_ml.labeling import get_labeling_functions, LABELING_FUNCTIONS +from zephyr_ml.entityset import _create_entityset, VALIDATE_DATA_FUNCTIONS +from zephyr_ml.labeling import get_labeling_functions, get_labeling_functions_map, LABELING_FUNCTIONS # from zephyr_ml.entityset import _create_entityset, VALIDATE_DATA_FUNCTIONS from zephyr_ml.labeling import ( get_labeling_functions, + get_labeling_functions_map, LABELING_FUNCTIONS, ) import composeml as cp @@ -16,6 +17,7 @@ from mlblocks import MLPipeline, MLBlock, get_primitives_paths, add_primitives_path from itertools import chain import logging +import matplotlib.pyplot as plt DEFAULT_METRICS = [ "sklearn.metrics.accuracy_score", @@ -242,7 +244,7 @@ def predict(self, X=None, visual=False, **kwargs): return outputs - def evaluate(self, X=None, y=None, metrics=None): + def evaluate(self, X=None, y=None, metrics=None, show_plots = True): if X is None: X = self.X_test if y is None: @@ -258,9 +260,10 @@ def evaluate(self, X=None, y=None, metrics=None): results = {} for metric in metrics: try: - metric_primitive = self._get_ml_primitive(metric) res = metric_primitive.produce(y_pred=y_pred, y_proba=y_proba, y_true=y) + if show_plots: + plt.show() results[metric_primitive.name] = res except Exception as e: LOGGER.error(f"Unable to run evaluation metric: {metric_primitive.name}", exc_info = e) @@ -318,479 +321,6 @@ def _get_outputs_spec(self, default=True): return outputs_spec, visual_names -def get_labeling_functions_map(): - functions = {} - for function in LABELING_FUNCTIONS: - name = function.__name__ - functions[name] = function - return functions - - -import copy - - -# Default EntitySet keyword arguments for entities -DEFAULT_ES_KWARGS = { - "alarms": { - "index": "_index", - "make_index": True, - "time_index": "DAT_START", - "secondary_time_index": {"DAT_END": ["IND_DURATION"]}, - "logical_types": { - "COD_ELEMENT": "categorical", # turbine id - "DAT_START": "datetime", # start - "DAT_END": "datetime", # end - "IND_DURATION": "double", # duration - "COD_ALARM": "categorical", # alarm code - "COD_ALARM_INT": "categorical", # international alarm code - "DES_NAME": "categorical", # alarm name - "DES_TITLE": "categorical", # alarm description - "COD_STATUS": "categorical", # status code - }, - }, - "stoppages": { - "index": "_index", - "make_index": True, - "time_index": "DAT_START", - "secondary_time_index": {"DAT_END": ["IND_DURATION", "IND_LOST_GEN"]}, - "logical_types": { - "COD_ELEMENT": "categorical", # turbine id - "DAT_START": "datetime", # start - "DAT_END": "datetime", # end - "DES_WO_NAME": "natural_language", # work order name - "DES_COMMENTS": "natural_language", # work order comments - "COD_WO": "integer_nullable", # stoppage code - "IND_DURATION": "double", # duration - "IND_LOST_GEN": "double", # generation loss - "COD_ALARM": "categorical", # alarm code - "COD_CAUSE": "categorical", # stoppage cause - "COD_INCIDENCE": "categorical", # incidence code - "COD_ORIGIN": "categorical", # origin code - "DESC_CLASS": "categorical", # ???? - "COD_STATUS": "categorical", # status code - "COD_CODE": "categorical", # stoppage code - "DES_DESCRIPTION": "natural_language", # stoppage description - "DES_TECH_NAME": "categorical", # turbine technology - }, - }, - "notifications": { - "index": "_index", - "make_index": True, - "time_index": "DAT_POSTING", - "secondary_time_index": {"DAT_MALF_END": ["IND_BREAKDOWN_DUR"]}, - "logical_types": { - "COD_ELEMENT": "categorical", # turbine id - "COD_ORDER": "categorical", - "IND_QUANTITY": "double", - "COD_MATERIAL_SAP": "categorical", - "DAT_POSTING": "datetime", - "COD_MAT_DOC": "categorical", - "DES_MEDIUM": "categorical", - "COD_NOTIF": "categorical", - "DAT_MALF_START": "datetime", - "DAT_MALF_END": "datetime", - "IND_BREAKDOWN_DUR": "double", - "FUNCT_LOC_DES": "categorical", - "COD_ALARM": "categorical", - "DES_ALARM": "categorical", - }, - }, - "work_orders": { - "index": "COD_ORDER", - "time_index": "DAT_BASIC_START", - "secondary_time_index": {"DAT_VALID_END": []}, - "logical_types": { - "COD_ELEMENT": "categorical", - "COD_ORDER": "categorical", - "DAT_BASIC_START": "datetime", - "DAT_BASIC_END": "datetime", - "COD_EQUIPMENT": "categorical", - "COD_MAINT_PLANT": "categorical", - "COD_MAINT_ACT_TYPE": "categorical", - "COD_CREATED_BY": "categorical", - "COD_ORDER_TYPE": "categorical", - "DAT_REFERENCE": "datetime", - "DAT_CREATED_ON": "datetime", - "DAT_VALID_END": "datetime", - "DAT_VALID_START": "datetime", - "COD_SYSTEM_STAT": "categorical", - "DES_LONG": "natural_language", - "COD_FUNCT_LOC": "categorical", - "COD_NOTIF_OBJ": "categorical", - "COD_MAINT_ITEM": "categorical", - "DES_MEDIUM": "natural_language", - "DES_FUNCT_LOC": "categorical", - }, - }, - "turbines": { - "index": "COD_ELEMENT", - "logical_types": { - "COD_ELEMENT": "categorical", - "TURBINE_PI_ID": "categorical", - "TURBINE_LOCAL_ID": "categorical", - "TURBINE_SAP_COD": "categorical", - "DES_CORE_ELEMENT": "categorical", - "SITE": "categorical", - "DES_CORE_PLANT": "categorical", - "COD_PLANT_SAP": "categorical", - "PI_COLLECTOR_SITE_NAME": "categorical", - "PI_LOCAL_SITE_NAME": "categorical", - }, - }, -} - -DEFAULT_ES_TYPE_KWARGS = { - "pidata": { - "index": "_index", - "make_index": True, - "time_index": "time", - "logical_types": {"time": "datetime", "COD_ELEMENT": "categorical"}, - }, - "scada": { - "index": "_index", - "make_index": True, - "time_index": "TIMESTAMP", - "logical_types": {"TIMESTAMP": "datetime", "COD_ELEMENT": "categorical"}, - }, - "vibrations": { - "index": "_index", - "make_index": True, - "time_index": "timestamp", - "logical_types": { - "COD_ELEMENT": "categorical", - "turbine_id": "categorical", - "signal_id": "categorical", - "timestamp": "datetime", - "sensorName": "categorical", - "sensorType": "categorical", - "sensorSerial": "integer_nullable", - "siteName": "categorical", - "turbineName": "categorical", - "turbineSerial": "integer_nullable", - "configurationName": "natural_language", - "softwareVersion": "categorical", - "rpm": "double", - "rpmStatus": "natural_language", - "duration": "natural_language", - "condition": "categorical", - "maskTime": "datetime", - "Mask Status": "natural_language", - "System Serial": "categorical", - "WPS-ActivePower-Average": "double", - "WPS-ActivePower-Minimum": "double", - "WPS-ActivePower-Maximum": "double", - "WPS-ActivePower-Deviation": "double", - "WPS-ActivePower-StartTime": "datetime", - "WPS-ActivePower-StopTime": "datetime", - "WPS-ActivePower-Counts": "natural_language", - "Measured RPM": "double", - "WPS-ActivePower": "double", - "WPS-Gearoiltemperature": "double", - "WPS-GeneratorRPM": "double", - "WPS-PitchReference": "double", - "WPS-RotorRPM": "double", - "WPS-Windspeed": "double", - "WPS-YawAngle": "double", - "overload warning": "categorical", - "bias warning": "categorical", - "bias voltage": "double", - "xValueOffset": "double", - "xValueDelta": "double", - "xValueUnit": "categorical", - "yValueUnit": "categorical", - "TotalCount-RPM0": "double", - "TotalCount-RPM1": "double", - "TotalCount-RPM2": "double", - "TotalCount-RPM3": "double", - }, - }, -} - - -def get_mapped_kwargs(es_type, new_kwargs=None): - if es_type not in DEFAULT_ES_TYPE_KWARGS.keys(): - raise ValueError("Unrecognized es_type argument: {}".format(es_type)) - mapped_kwargs = DEFAULT_ES_KWARGS.copy() - mapped_kwargs.update({es_type: DEFAULT_ES_TYPE_KWARGS[es_type]}) - - if new_kwargs is not None: - if not isinstance(new_kwargs, dict): - raise ValueError( - "new_kwargs must be dictionary mapping entity name to dictionary " - "with updated keyword arguments for EntitySet creation." - ) - for entity in new_kwargs: - if entity not in mapped_kwargs: - raise ValueError( - 'Unrecognized entity "{}" found in new keyword argument ' - "mapping.".format(entity) - ) - - mapped_kwargs[entity].update(new_kwargs[entity]) - - return mapped_kwargs - - -def get_default_es_type_kwargs(): - return copy.deepcopy(DEFAULT_ES_TYPE_KWARGS) - - -def get_es_types(): - return DEFAULT_ES_TYPE_KWARGS.keys() - - -def create_pidata_entityset(dfs, new_kwargs_mapping=None): - """Generate an entityset for PI data datasets - - Args: - data_paths (dict): Dictionary mapping entity names ('alarms', 'notifications', - 'stoppages', 'work_orders', 'pidata', 'turbines') to the pandas dataframe for - that entity. - **kwargs: Updated keyword arguments to be used during entityset creation - """ - entity_kwargs = get_mapped_kwargs("pidata", new_kwargs_mapping) - _validate_data(dfs, "pidata", entity_kwargs) - - es = _create_entityset(dfs, "pidata", entity_kwargs) - es.id = "PI data" - - return es - - -def create_scada_entityset(dfs, new_kwargs_mapping=None): - """Generate an entityset for SCADA data datasets - - Args: - data_paths (dict): Dictionary mapping entity names ('alarms', 'notifications', - 'stoppages', 'work_orders', 'scada', 'turbines') to the pandas dataframe for - that entity. - """ - entity_kwargs = get_mapped_kwargs("scada", new_kwargs_mapping) - _validate_data(dfs, "scada", entity_kwargs) - - es = _create_entityset(dfs, "scada", entity_kwargs) - es.id = "SCADA data" - - return es - - -def create_vibrations_entityset(dfs, new_kwargs_mapping=None): - """Generate an entityset for Vibrations data datasets - - Args: - data_paths (dict): Dictionary mapping entity names ('alarms', 'notifications', - 'stoppages', 'work_orders', 'vibrations', 'turbines') to the pandas - dataframe for that entity. Optionally 'pidata' and 'scada' can be included. - """ - entities = ["vibrations"] - - pidata_kwargs, scada_kwargs = {}, {} - if "pidata" in dfs: - pidata_kwargs = get_mapped_kwargs("pidata", new_kwargs_mapping) - entities.append("pidata") - if "scada" in dfs: - scada_kwargs = get_mapped_kwargs("scada", new_kwargs_mapping) - entities.append("scada") - - entity_kwargs = { - **pidata_kwargs, - **scada_kwargs, - **get_mapped_kwargs("vibrations", new_kwargs_mapping), - } - _validate_data(dfs, entities, entity_kwargs) - - es = _create_entityset(dfs, "vibrations", entity_kwargs) - es.id = "Vibrations data" - - return es - - -def _validate_data(dfs, es_type, es_kwargs): - """Validate data by checking for required columns in each entity""" - if not isinstance(es_type, list): - es_type = [es_type] - - entities = set( - chain( - [ - "alarms", - "stoppages", - "work_orders", - "notifications", - "turbines", - *es_type, - ] - ) - ) - - if set(dfs.keys()) != entities: - missing = entities.difference(set(dfs.keys())) - extra = set(dfs.keys()).difference(entities) - msg = [] - if missing: - msg.append("Missing dataframes for entities {}.".format(", ".join(missing))) - if extra: - msg.append( - "Unrecognized entities {} included in dfs.".format(", ".join(extra)) - ) - - raise ValueError(" ".join(msg)) - - turbines_index = es_kwargs["turbines"]["index"] - work_orders_index = es_kwargs["work_orders"]["index"] - - if work_orders_index not in dfs["work_orders"].columns: - raise ValueError( - 'Expected index column "{}" missing from work_orders entity'.format( - work_orders_index - ) - ) - - if work_orders_index not in dfs["notifications"].columns: - raise ValueError( - 'Expected column "{}" missing from notifications entity'.format( - work_orders_index - ) - ) - - if not dfs["work_orders"][work_orders_index].is_unique: - raise ValueError( - 'Expected index column "{}" of work_orders entity is not ' - "unique".format(work_orders_index) - ) - - if turbines_index not in dfs["turbines"].columns: - raise ValueError( - 'Expected index column "{}" missing from turbines entity'.format( - turbines_index - ) - ) - - if not dfs["turbines"][turbines_index].is_unique: - raise ValueError( - 'Expected index column "{}" of turbines entity is not unique.'.format( - turbines_index - ) - ) - - for entity, df in dfs.items(): - if turbines_index not in df.columns: - raise ValueError( - 'Turbines index column "{}" missing from data for {} entity'.format( - turbines_index, entity - ) - ) - - time_index = es_kwargs[entity].get("time_index", False) - if time_index and time_index not in df.columns: - raise ValueError( - 'Missing time index column "{}" from {} entity'.format( - time_index, entity - ) - ) - - secondary_time_indices = es_kwargs[entity].get("secondary_time_index", {}) - for time_index, cols in secondary_time_indices.items(): - if time_index not in df.columns: - raise ValueError( - 'Secondary time index "{}" missing from {} entity'.format( - time_index, entity - ) - ) - for col in cols: - if col not in df.columns: - raise ValueError( - ( - 'Column "{}" associated with secondary time index "{}" ' - "missing from {} entity" - ).format(col, time_index, entity) - ) - - -def validate_scada_data(dfs, new_kwargs_mapping=None): - entity_kwargs = get_mapped_kwargs("scada", new_kwargs_mapping) - _validate_data(dfs, "scada", entity_kwargs) - return entity_kwargs - - -def validate_pidata_data(dfs, new_kwargs_mapping=None): - entity_kwargs = get_mapped_kwargs("pidata", new_kwargs_mapping) - _validate_data(dfs, "pidata", entity_kwargs) - return entity_kwargs - - -def validate_vibrations_data(dfs, new_kwargs_mapping=None): - entities = ["vibrations"] - - pidata_kwargs, scada_kwargs = {}, {} - if "pidata" in dfs: - pidata_kwargs = get_mapped_kwargs("pidata", new_kwargs_mapping) - entities.append("pidata") - if "scada" in dfs: - scada_kwargs = get_mapped_kwargs("scada", new_kwargs_mapping) - entities.append("scada") - - entity_kwargs = { - **pidata_kwargs, - **scada_kwargs, - **get_mapped_kwargs("vibrations", new_kwargs_mapping), - } - _validate_data(dfs, entities, entity_kwargs) - return entity_kwargs - - -VALIDATE_DATA_FUNCTIONS = { - "scada": validate_scada_data, - "pidata": validate_pidata_data, - "vibrations": validate_vibrations_data, -} - - -def _create_entityset(entities, es_type, new_kwargs_mapping=None): - validate_func = VALIDATE_DATA_FUNCTIONS[es_type] - es_kwargs = validate_func(entities, new_kwargs_mapping) - - # filter out stated logical types for missing columns - for entity, df in entities.items(): - es_kwargs[entity]["logical_types"] = { - col: t - for col, t in es_kwargs[entity]["logical_types"].items() - if col in df.columns - } - - turbines_index = es_kwargs["turbines"]["index"] - work_orders_index = es_kwargs["work_orders"]["index"] - - relationships = [ - ("turbines", turbines_index, "alarms", turbines_index), - ("turbines", turbines_index, "stoppages", turbines_index), - ("turbines", turbines_index, "work_orders", turbines_index), - ("turbines", turbines_index, es_type, turbines_index), - ("work_orders", work_orders_index, "notifications", work_orders_index), - ] - - es = ft.EntitySet() - es.id = es_type - - for name, df in entities.items(): - es.add_dataframe(dataframe_name=name, dataframe=df, **es_kwargs[name]) - - for relationship in relationships: - parent_df, parent_column, child_df, child_column = relationship - es.add_relationship(parent_df, parent_column, child_df, child_column) - - return es - - -CREATE_ENTITYSET_FUNCTIONS = { - "scada": create_scada_entityset, - "pidata": create_pidata_entityset, - "vibrations": create_vibrations_entityset, -} - - -def get_create_entityset_functions(): - return CREATE_ENTITYSET_FUNCTIONS.copy() if __name__ == "__main__": diff --git a/zephyr_ml/entityset.py b/zephyr_ml/entityset.py index fa995c7..6e7d0a2 100644 --- a/zephyr_ml/entityset.py +++ b/zephyr_ml/entityset.py @@ -5,103 +5,103 @@ from zephyr_ml.metadata import get_mapped_kwargs, get_es_types -def _create_entityset(entities, es_type, es_kwargs): +# def _create_entityset(entities, es_type, es_kwargs): - # filter out stated logical types for missing columns - for entity, df in entities.items(): - es_kwargs[entity]["logical_types"] = { - col: t - for col, t in es_kwargs[entity]["logical_types"].items() - if col in df.columns - } +# # filter out stated logical types for missing columns +# for entity, df in entities.items(): +# es_kwargs[entity]["logical_types"] = { +# col: t +# for col, t in es_kwargs[entity]["logical_types"].items() +# if col in df.columns +# } - turbines_index = es_kwargs["turbines"]["index"] - work_orders_index = es_kwargs["work_orders"]["index"] +# turbines_index = es_kwargs["turbines"]["index"] +# work_orders_index = es_kwargs["work_orders"]["index"] - relationships = [ - ("turbines", turbines_index, "alarms", turbines_index), - ("turbines", turbines_index, "stoppages", turbines_index), - ("turbines", turbines_index, "work_orders", turbines_index), - ("turbines", turbines_index, es_type, turbines_index), - ("work_orders", work_orders_index, "notifications", work_orders_index), - ] +# relationships = [ +# ("turbines", turbines_index, "alarms", turbines_index), +# ("turbines", turbines_index, "stoppages", turbines_index), +# ("turbines", turbines_index, "work_orders", turbines_index), +# ("turbines", turbines_index, es_type, turbines_index), +# ("work_orders", work_orders_index, "notifications", work_orders_index), +# ] - es = ft.EntitySet() +# es = ft.EntitySet() - for name, df in entities.items(): - es.add_dataframe(dataframe_name=name, dataframe=df, **es_kwargs[name]) +# for name, df in entities.items(): +# es.add_dataframe(dataframe_name=name, dataframe=df, **es_kwargs[name]) - for relationship in relationships: - parent_df, parent_column, child_df, child_column = relationship - es.add_relationship(parent_df, parent_column, child_df, child_column) +# for relationship in relationships: +# parent_df, parent_column, child_df, child_column = relationship +# es.add_relationship(parent_df, parent_column, child_df, child_column) - return es +# return es -def create_pidata_entityset(dfs, new_kwargs_mapping=None): - """Generate an entityset for PI data datasets +# def create_pidata_entityset(dfs, new_kwargs_mapping=None): +# """Generate an entityset for PI data datasets - Args: - data_paths (dict): Dictionary mapping entity names ('alarms', 'notifications', - 'stoppages', 'work_orders', 'pidata', 'turbines') to the pandas dataframe for - that entity. - **kwargs: Updated keyword arguments to be used during entityset creation - """ - entity_kwargs = get_mapped_kwargs("pidata", new_kwargs_mapping) - _validate_data(dfs, "pidata", entity_kwargs) +# Args: +# data_paths (dict): Dictionary mapping entity names ('alarms', 'notifications', +# 'stoppages', 'work_orders', 'pidata', 'turbines') to the pandas dataframe for +# that entity. +# **kwargs: Updated keyword arguments to be used during entityset creation +# """ +# entity_kwargs = get_mapped_kwargs("pidata", new_kwargs_mapping) +# _validate_data(dfs, "pidata", entity_kwargs) - es = _create_entityset(dfs, "pidata", entity_kwargs) - es.id = "PI data" +# es = _create_entityset(dfs, "pidata", entity_kwargs) +# es.id = "PI data" - return es +# return es -def create_scada_entityset(dfs, new_kwargs_mapping=None): - """Generate an entityset for SCADA data datasets +# def create_scada_entityset(dfs, new_kwargs_mapping=None): +# """Generate an entityset for SCADA data datasets - Args: - data_paths (dict): Dictionary mapping entity names ('alarms', 'notifications', - 'stoppages', 'work_orders', 'scada', 'turbines') to the pandas dataframe for - that entity. - """ - entity_kwargs = get_mapped_kwargs("scada", new_kwargs_mapping) - _validate_data(dfs, "scada", entity_kwargs) +# Args: +# data_paths (dict): Dictionary mapping entity names ('alarms', 'notifications', +# 'stoppages', 'work_orders', 'scada', 'turbines') to the pandas dataframe for +# that entity. +# """ +# entity_kwargs = get_mapped_kwargs("scada", new_kwargs_mapping) +# _validate_data(dfs, "scada", entity_kwargs) - es = _create_entityset(dfs, "scada", entity_kwargs) - es.id = "SCADA data" +# es = _create_entityset(dfs, "scada", entity_kwargs) +# es.id = "SCADA data" - return es +# return es -def create_vibrations_entityset(dfs, new_kwargs_mapping=None): - """Generate an entityset for Vibrations data datasets +# def create_vibrations_entityset(dfs, new_kwargs_mapping=None): +# """Generate an entityset for Vibrations data datasets - Args: - data_paths (dict): Dictionary mapping entity names ('alarms', 'notifications', - 'stoppages', 'work_orders', 'vibrations', 'turbines') to the pandas - dataframe for that entity. Optionally 'pidata' and 'scada' can be included. - """ - entities = ["vibrations"] +# Args: +# data_paths (dict): Dictionary mapping entity names ('alarms', 'notifications', +# 'stoppages', 'work_orders', 'vibrations', 'turbines') to the pandas +# dataframe for that entity. Optionally 'pidata' and 'scada' can be included. +# """ +# entities = ["vibrations"] - pidata_kwargs, scada_kwargs = {}, {} - if "pidata" in dfs: - pidata_kwargs = get_mapped_kwargs("pidata", new_kwargs_mapping) - entities.append("pidata") - if "scada" in dfs: - scada_kwargs = get_mapped_kwargs("scada", new_kwargs_mapping) - entities.append("scada") +# pidata_kwargs, scada_kwargs = {}, {} +# if "pidata" in dfs: +# pidata_kwargs = get_mapped_kwargs("pidata", new_kwargs_mapping) +# entities.append("pidata") +# if "scada" in dfs: +# scada_kwargs = get_mapped_kwargs("scada", new_kwargs_mapping) +# entities.append("scada") - entity_kwargs = { - **pidata_kwargs, - **scada_kwargs, - **get_mapped_kwargs("vibrations", new_kwargs_mapping), - } - _validate_data(dfs, entities, entity_kwargs) +# entity_kwargs = { +# **pidata_kwargs, +# **scada_kwargs, +# **get_mapped_kwargs("vibrations", new_kwargs_mapping), +# } +# _validate_data(dfs, entities, entity_kwargs) - es = _create_entityset(dfs, "vibrations", entity_kwargs) - es.id = "Vibrations data" +# es = _create_entityset(dfs, "vibrations", entity_kwargs) +# es.id = "Vibrations data" - return es +# return es def _validate_data(dfs, es_type, es_kwargs): @@ -215,6 +215,7 @@ def validate_scada_data(dfs, new_kwargs_mapping=None): def validate_pidata_data(dfs, new_kwargs_mapping=None): entity_kwargs = get_mapped_kwargs("pidata", new_kwargs_mapping) _validate_data(dfs, "pidata", entity_kwargs) + return entity_kwargs def validate_vibrations_data(dfs, new_kwargs_mapping=None): @@ -245,8 +246,12 @@ def validate_vibrations_data(dfs, new_kwargs_mapping=None): def _create_entityset(entities, es_type, new_kwargs_mapping=None): + validate_func = VALIDATE_DATA_FUNCTIONS[es_type] es_kwargs = validate_func(entities, new_kwargs_mapping) + print(entities) + print(es_type) + print(es_kwargs) # filter out stated logical types for missing columns for entity, df in entities.items(): @@ -280,12 +285,12 @@ def _create_entityset(entities, es_type, new_kwargs_mapping=None): return es -CREATE_ENTITYSET_FUNCTIONS = { - "scada": create_scada_entityset, - "pidata": create_pidata_entityset, - "vibrations": create_vibrations_entityset, -} +# CREATE_ENTITYSET_FUNCTIONS = { +# "scada": create_scada_entityset, +# "pidata": create_pidata_entityset, +# "vibrations": create_vibrations_entityset, +# } -def get_create_entityset_functions(): - return CREATE_ENTITYSET_FUNCTIONS.copy() +# def get_create_entityset_functions(): +# return CREATE_ENTITYSET_FUNCTIONS.copy() diff --git a/zephyr_ml/metadata.py b/zephyr_ml/metadata.py index 56ba702..9079957 100644 --- a/zephyr_ml/metadata.py +++ b/zephyr_ml/metadata.py @@ -179,6 +179,7 @@ def get_mapped_kwargs(es_type, new_kwargs=None): + print("getting mapped kwargs") if es_type not in DEFAULT_ES_TYPE_KWARGS.keys(): raise ValueError("Unrecognized es_type argument: {}".format(es_type)) mapped_kwargs = DEFAULT_ES_KWARGS.copy() @@ -198,7 +199,7 @@ def get_mapped_kwargs(es_type, new_kwargs=None): ) mapped_kwargs[entity].update(new_kwargs[entity]) - + print(mapped_kwargs) return mapped_kwargs diff --git a/zephyr_ml/primitives/postprocessing.py b/zephyr_ml/primitives/postprocessing.py index 2f5ff8d..bf7470b 100644 --- a/zephyr_ml/primitives/postprocessing.py +++ b/zephyr_ml/primitives/postprocessing.py @@ -97,6 +97,7 @@ def confusion_matrix(y_true, y_pred, labels=None, sample_weight=None, normalize= ax.xaxis.set_ticklabels(["False", "True"]) ax.yaxis.set_ticklabels(["False", "True"]) + plt.show() return conf_matrix @@ -130,5 +131,6 @@ def roc_auc_score_and_curve( plt.ylabel("True Positive Rate") plt.xlabel("False Positive Rate") plt.title("AUC: %.3f" % auc) + plt.show() return auc From dd7fc95307d1fd640daaf5628656c22e1b170d17 Mon Sep 17 00:00:00 2001 From: Raymond Pan Date: Wed, 19 Mar 2025 12:17:20 -0400 Subject: [PATCH 14/28] make show_plot a hyperparameter for visual eval prims --- ...tives.postprocessing.confusion_matrix.json | 4 ++ ...ostprocessing.roc_auc_score_and_curve.json | 4 ++ zephyr_ml/primitives/postprocessing.py | 42 ++++++++++--------- 3 files changed, 30 insertions(+), 20 deletions(-) diff --git a/zephyr_ml/primitives/jsons/zephyr_ml.primitives.postprocessing.confusion_matrix.json b/zephyr_ml/primitives/jsons/zephyr_ml.primitives.postprocessing.confusion_matrix.json index f0b3132..efce4e3 100644 --- a/zephyr_ml/primitives/jsons/zephyr_ml.primitives.postprocessing.confusion_matrix.json +++ b/zephyr_ml/primitives/jsons/zephyr_ml.primitives.postprocessing.confusion_matrix.json @@ -42,6 +42,10 @@ "normalize": { "type": "str", "default": null + }, + "show_plot": { + "type": "bool", + "default": true } } } diff --git a/zephyr_ml/primitives/jsons/zephyr_ml.primitives.postprocessing.roc_auc_score_and_curve.json b/zephyr_ml/primitives/jsons/zephyr_ml.primitives.postprocessing.roc_auc_score_and_curve.json index bcc32c7..343c1d3 100644 --- a/zephyr_ml/primitives/jsons/zephyr_ml.primitives.postprocessing.roc_auc_score_and_curve.json +++ b/zephyr_ml/primitives/jsons/zephyr_ml.primitives.postprocessing.roc_auc_score_and_curve.json @@ -41,6 +41,10 @@ "drop_intermediate": { "type": "bool", "default": true + }, + "show_plot": { + "type": "bool", + "default": true } } } diff --git a/zephyr_ml/primitives/postprocessing.py b/zephyr_ml/primitives/postprocessing.py index bf7470b..1972625 100644 --- a/zephyr_ml/primitives/postprocessing.py +++ b/zephyr_ml/primitives/postprocessing.py @@ -86,23 +86,25 @@ def apply_threshold(self, y_pred): return binary, self._threshold, self._scores -def confusion_matrix(y_true, y_pred, labels=None, sample_weight=None, normalize=None): +def confusion_matrix(y_true, y_pred, labels=None, sample_weight=None, normalize=None, show_plot = True): conf_matrix = metrics.confusion_matrix( y_true, y_pred, labels=labels, sample_weight=sample_weight, normalize=normalize ) - ax = sns.heatmap(conf_matrix, annot=True, cmap="Blues") - ax.set_title("Confusion Matrix\n") - ax.set_xlabel("\nPredicted Values") - ax.set_ylabel("Actual Values") - - ax.xaxis.set_ticklabels(["False", "True"]) - ax.yaxis.set_ticklabels(["False", "True"]) - plt.show() + if show_plot: + ax = sns.heatmap(conf_matrix, annot=True, cmap="Blues") + ax.set_title("Confusion Matrix\n") + ax.set_xlabel("\nPredicted Values") + ax.set_ylabel("Actual Values") + + ax.xaxis.set_ticklabels(["False", "True"]) + ax.yaxis.set_ticklabels(["False", "True"]) + + plt.show() return conf_matrix def roc_auc_score_and_curve( - y_true, y_proba, pos_label=None, sample_weight=None, drop_intermediate=True + y_true, y_proba, pos_label=None, sample_weight=None, drop_intermediate=True, show_plot = True ): fpr, tpr, _ = metrics.roc_curve( y_true, @@ -120,17 +122,17 @@ def roc_auc_score_and_curve( drop_intermediate=drop_intermediate, ) - _, _ = plt.subplots(1, 1) auc = metrics.roc_auc_score(y_true, y_proba) - - plt.plot(fpr, tpr, "ro") - plt.plot(fpr, tpr) - plt.plot(ns_fpr, ns_tpr, linestyle="--", color="green") - - plt.ylabel("True Positive Rate") - plt.xlabel("False Positive Rate") - plt.title("AUC: %.3f" % auc) - plt.show() + if show_plot: + _, _ = plt.subplots(1, 1) + plt.plot(fpr, tpr, "ro") + plt.plot(fpr, tpr) + plt.plot(ns_fpr, ns_tpr, linestyle="--", color="green") + + plt.ylabel("True Positive Rate") + plt.xlabel("False Positive Rate") + plt.title("AUC: %.3f" % auc) + plt.show() return auc From 8b274d573b326ca5915df51fc7793c8d562a31f3 Mon Sep 17 00:00:00 2001 From: Raymond Pan Date: Wed, 19 Mar 2025 22:07:17 -0400 Subject: [PATCH 15/28] logging & update entityset tests --- tests/test_entityset.py | 6 +- zephyr_ml/core.py | 145 ++++++++++++++++++++++++++++++++++------ zephyr_ml/entityset.py | 3 - 3 files changed, 130 insertions(+), 24 deletions(-) diff --git a/tests/test_entityset.py b/tests/test_entityset.py index ae2dd45..6c7901f 100644 --- a/tests/test_entityset.py +++ b/tests/test_entityset.py @@ -1,7 +1,7 @@ import pandas as pd import pytest -from zephyr_ml import create_pidata_entityset, create_scada_entityset +from zephyr_ml import _create_entityset @pytest.fixture @@ -118,7 +118,11 @@ def scada_dfs(base_dfs): }) return {**base_dfs, 'scada': scada_df} +def create_pidata_entityset(pidata_dfs): + return _create_entityset(pidata_dfs, es_type = "pidata") +def create_scada_entityset(scada_dfs): + return _create_entityset(scada_dfs, es_type = "scada") def test_create_pidata_missing_entities(pidata_dfs): error_msg = 'Missing dataframes for entities notifications.' diff --git a/zephyr_ml/core.py b/zephyr_ml/core.py index fc76e9d..35eed3a 100644 --- a/zephyr_ml/core.py +++ b/zephyr_ml/core.py @@ -18,6 +18,7 @@ from itertools import chain import logging import matplotlib.pyplot as plt +from functools import wraps DEFAULT_METRICS = [ "sklearn.metrics.accuracy_score", @@ -29,6 +30,63 @@ ] LOGGER = logging.getLogger(__name__) + +def guide(method): + + @wraps(method) + def guided_step(self, *method_args, **method_kwargs): + expected_next_step = self.current_step + 1 + method_name = method.__name__ + if method_name in self.producer_to_step_map: + actual_next_step = self.producer_to_step_map[method_name] + if actual_next_step > expected_next_step: + necessary_steps_str = self._get_necessary_steps(actual_next_step) + LOGGER.error(f"Required steps have been skipped! Unable to run {method_name}. Please perform the following steps first {necessary_steps_str}") + return + elif actual_next_step < self.current_step: #regressing, make stale data, warn + try: + res = method(self, *method_args, **method_kwargs) + LOGGER.warning(f"The last run step was {self.current_step}. The following methods will return stale data. Please perform the following steps in order to get up to date.") + self.current_step = actual_next_step + return res + except Exception as e: + LOGGER.error(f"{method_name} threw an exception", exc_info = e) + return + else: + try: + res = method(self, *method_args, **method_kwargs) + self.current_step = actual_next_step + + # do logging here + return res + except Exception as e: + LOGGER.error(f"{method_name} threw an exception", exc_info = e) + + elif method_name in self.getter_to_step_map: + actual_next_step = self.getter_to_step_map[method_name] + if actual_next_step > expected_next_step: + try: + res = method(self, *method_args, **method_kwargs) + if res is None: + LOGGER.error(f"Required steps have been skipped!. {method_name} does not have a value to return. Please perform the following steps in order before running this method.") + else: + LOGGER.warning(f"This data may be stale. Please perform the following steps in order to ensure the response is up to date.") + return res + except Exception as e: + LOGGER.error(f"{method_name} threw an exception", exc_info = e) + return + else: + try: + res = method(self, *method_args, **method_kwargs) + return res + except Exception as e: + LOGGER.error(f"{method_name} threw an exception", exc_info = e) + else: + print(f"Method {method_name} does not need to be wrapped") + + + return guided_step + class Zephyr: def __init__(self): @@ -42,14 +100,40 @@ def __init__(self): self.X_test = None self.y_train = None self.y_test = None + self.is_fitted = None self.results = None + self.current_step = -1 + # tuple of 2 arrays: producers and attributes + self.step_order = [ + ([self.create_entityset, self.set_entityset], [self.get_entityset]), + ([self.set_labeling_function], [self.get_labeling_function]), + ([self.generate_label_times], [self.get_label_times]), + ([self.generate_feature_matrix_and_labels, self.set_feature_matrix_and_labels], [self.get_feature_matrix_and_labels]), + ([self.generate_train_test_split, self.set_train_test_split], [self.get_train_test_split]), + ([self.set_pipeline], [self.get_pipeline]), + ([self.fit], []), + ([self.predict, self.evaluate], []) + ] + + self.producer_to_step_map = {} + self.getter_to_step_map = {} + for idx, (producers, getters) in enumerate(self.step_order): + for prod in producers: + self.producer_to_step_map[prod.__name__] = idx + for get in getters: + self.getter_to_step_map[get.__name__] = idx + + def _get_necessary_steps(self, actual_step): + pass + def get_entityset_types(self): """ - Returns the supported entityset types (PI/SCADA) and the required dataframes and their columns + Returns the supported entityset types (PI/SCADA/Vibrations) and the required dataframes and their columns """ return VALIDATE_DATA_FUNCTIONS.keys() + @guide def create_entityset(self, data_paths, es_type, new_kwargs_mapping=None): """ Generate an entityset @@ -68,12 +152,7 @@ def create_entityset(self, data_paths, es_type, new_kwargs_mapping=None): self.entityset = entityset return self.entityset - def get_entityset(self): - if self.entityset is None: - raise ValueError("No entityset has been created or set in this instance.") - - return self.entityset - + @guide def set_entityset(self, entityset, es_type, new_kwargs_mapping=None): dfs = entityset.to_dictionary() @@ -82,9 +161,18 @@ def set_entityset(self, entityset, es_type, new_kwargs_mapping=None): self.entityset = entityset + @guide + def get_entityset(self): + if self.entityset is None: + raise ValueError("No entityset has been created or set in this instance.") + + return self.entityset + + def get_predefined_labeling_functions(self): return get_labeling_functions() + @guide def set_labeling_function(self, name=None, func=None): print(f"labeling fucntion name {name}") if name is not None: @@ -103,7 +191,12 @@ def set_labeling_function(self, name=None, func=None): else: raise ValueError(f"Custom function is not callable") raise ValueError("No labeling function given.") - + + @guide + def get_labeling_function(self): + return self.labeling_function + + @guide def generate_label_times( self, num_samples=-1, subset=None, column_map={}, verbose=False, **kwargs ): @@ -143,12 +236,14 @@ def generate_label_times( return label_times, meta - def plot_label_times(self): - assert self.label_times is not None - cp.label_times.plots.LabelPlots(self.label_times).distribution() - - def generate_features(self, **kwargs): + @guide + def get_label_times(self, visualize = True): + if visualize: + cp.label_times.plots.LabelPlots(self.label_times).distribution() + return self.label_times + @guide + def generate_feature_matrix_and_labels(self, **kwargs): feature_matrix, features = ft.dfs( entityset=self.entityset, cutoff_time=self.label_times, **kwargs ) @@ -157,15 +252,18 @@ def generate_features(self, **kwargs): print(feature_matrix) return feature_matrix, features + @guide def get_feature_matrix_and_labels(self): return self.feature_matrix_and_labels + @guide def set_feature_matrix_and_labels(self, feature_matrix, label_col_name="label"): assert label_col_name in feature_matrix.columns self.feature_matrix_and_labels = self._clean_feature_matrix( feature_matrix, label_col_name=label_col_name ) + @guide def generate_train_test_split( self, test_size=None, @@ -191,25 +289,32 @@ def generate_train_test_split( return + @guide def set_train_test_split(self, X_train, X_test, y_train, y_test): self.X_train = X_train self.X_test = X_test self.y_train = y_train self.y_test = y_test + @guide def get_train_test_split(self): + if self.X_train is None or self.X_test is None or self.y_train is None or self.y_test is None: + return None return self.X_train, self.X_test, self.y_train, self.y_test def get_predefined_pipelines(self): pass + @guide def set_pipeline(self, pipeline, pipeline_hyperparameters=None): self.pipeline = self._get_mlpipeline(pipeline, pipeline_hyperparameters) self.pipeline_hyperparameters = pipeline_hyperparameters + @guide def get_pipeline(self): return self.pipeline - + + @guide def fit( self, X=None, y=None, visual=False, **kwargs ): # kwargs indicate the parameters of the current pipeline @@ -228,6 +333,7 @@ def fit( if visual and outputs is not None: return dict(zip(visual_names, outputs)) + @guide def predict(self, X=None, visual=False, **kwargs): if X is None: X = self.X_test @@ -244,6 +350,7 @@ def predict(self, X=None, visual=False, **kwargs): return outputs + @guide def evaluate(self, X=None, y=None, metrics=None, show_plots = True): if X is None: X = self.X_test @@ -271,10 +378,6 @@ def evaluate(self, X=None, y=None, metrics=None, show_plots = True): return results - def _validate_step(self, **kwargs): - for key, value in kwargs: - assert (value is not None, f"{key} has not been set or created") - def _clean_feature_matrix(self, feature_matrix, label_col_name="label"): labels = feature_matrix.pop(label_col_name) @@ -473,9 +576,11 @@ def _get_outputs_spec(self, default=True): obj.set_labeling_function(name="brake_pad_presence") obj.generate_label_times(num_samples=35, gap="20d") - obj.plot_label_times() + obj.get_label_times() + + obj.generate_train_test_split() - obj.generate_features( + obj.generate_feature_matrix_and_labels( target_dataframe_name="turbines", cutoff_time_in_index=True, agg_primitives=["count", "sum", "max"], diff --git a/zephyr_ml/entityset.py b/zephyr_ml/entityset.py index 6e7d0a2..9b0a4f3 100644 --- a/zephyr_ml/entityset.py +++ b/zephyr_ml/entityset.py @@ -249,9 +249,6 @@ def _create_entityset(entities, es_type, new_kwargs_mapping=None): validate_func = VALIDATE_DATA_FUNCTIONS[es_type] es_kwargs = validate_func(entities, new_kwargs_mapping) - print(entities) - print(es_type) - print(es_kwargs) # filter out stated logical types for missing columns for entity, df in entities.items(): From d8b2ecae9319fd88ac77f43d9894c3e826adc1f1 Mon Sep 17 00:00:00 2001 From: Raymond Pan Date: Mon, 31 Mar 2025 23:52:59 -0400 Subject: [PATCH 16/28] add tests, merge fit and set pipeline, fix set es --- tests/test_core.py | 477 ++++++++++++++++++++++++++++++----------- zephyr_ml/core.py | 108 ++++++---- zephyr_ml/entityset.py | 1 + zephyr_ml/metadata.py | 2 - 4 files changed, 422 insertions(+), 166 deletions(-) diff --git a/tests/test_core.py b/tests/test_core.py index 7df538d..2e26a83 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -11,6 +11,121 @@ class TestZephyr: + def base_dfs(): + alarms_df = pd.DataFrame({ + 'COD_ELEMENT': [0, 0], + 'DAT_START': [pd.Timestamp('2022-01-01 00:00:00'), pd.Timestamp('2022-03-01 11:12:13')], + 'DAT_END': [pd.Timestamp('2022-01-01 13:00:00'), pd.Timestamp('2022-03-02 11:12:13')], + 'IND_DURATION': [0.5417, 1.0], + 'COD_ALARM': [12345, 98754], + 'COD_ALARM_INT': [12345, 98754], + 'DES_NAME': ['Alarm1', 'Alarm2'], + 'DES_TITLE': ['Description of alarm 1', 'Description of alarm 2'], + }) + stoppages_df = pd.DataFrame({ + 'COD_ELEMENT': [0, 0], + 'DAT_START': [pd.Timestamp('2022-01-01 00:00:00'), pd.Timestamp('2022-03-01 11:12:13')], + 'DAT_END': [pd.Timestamp('2022-01-08 11:07:17'), pd.Timestamp('2022-03-01 17:00:13')], + 'DES_WO_NAME': ['stoppage name 1', 'stoppage name 2'], + 'DES_COMMENTS': ['description of stoppage 1', 'description of stoppage 2'], + 'COD_WO': [12345, 67890], + 'IND_DURATION': [7.4642, 0.2417], + 'IND_LOST_GEN': [45678.0, 123.0], + 'COD_ALARM': [12345, 12345], + 'COD_CAUSE': [32, 48], + 'COD_INCIDENCE': [987654, 123450], + 'COD_ORIGIN': [6, 23], + 'COD_STATUS': ['STOP', 'PAUSE'], + 'COD_CODE': ['ABC', 'XYZ'], + 'DES_DESCRIPTION': ['Description 1', 'Description 2'] + }) + notifications_df = pd.DataFrame({ + 'COD_ELEMENT': [0, 0], + 'COD_ORDER': [12345, 67890], + 'IND_QUANTITY': [1, -20], + 'COD_MATERIAL_SAP': [36052411, 67890], + 'DAT_POSTING': [pd.Timestamp('2022-01-01 00:00:00'), pd.Timestamp('2022-03-01 00:00:00')], + 'COD_MAT_DOC': [77889900, 12345690], + 'DES_MEDIUM': ['Description of notification 1', 'Description of notification 2'], + 'COD_NOTIF': [567890123, 32109877], + 'DAT_MALF_START': [pd.Timestamp('2021-12-25 18:07:10'), + pd.Timestamp('2022-02-28 06:04:00')], + 'DAT_MALF_END': [pd.Timestamp('2022-01-08 11:07:17'), pd.Timestamp('2022-03-01 17:00:13')], + 'IND_BREAKDOWN_DUR': [14.1378, 2.4792], + 'FUNCT_LOC_DES': ['location description 1', 'location description 2'], + 'COD_ALARM': [12345, 12345], + 'DES_ALARM': ['Alarm description', 'Alarm description'], + }) + work_orders_df = pd.DataFrame({ + 'COD_ELEMENT': [0, 0], + 'COD_ORDER': [12345, 67890], + 'DAT_BASIC_START': [pd.Timestamp('2022-01-01 00:00:00'), + pd.Timestamp('2022-03-01 00:00:00')], + 'DAT_BASIC_END': [pd.Timestamp('2022-01-09 00:00:00'), + pd.Timestamp('2022-03-02 00:00:00')], + 'COD_EQUIPMENT': [98765, 98765], + 'COD_MAINT_PLANT': ['ABC', 'ABC'], + 'COD_MAINT_ACT_TYPE': ['XYZ', 'XYZ'], + 'COD_CREATED_BY': ['A1234', 'B6789'], + 'COD_ORDER_TYPE': ['A', 'B'], + 'DAT_REFERENCE': [pd.Timestamp('2022-01-01 00:00:00'), + pd.Timestamp('2022-03-01 00:00:00')], + 'DAT_CREATED_ON': [pd.Timestamp('2022-03-01 00:00:00'), + pd.Timestamp('2022-04-18 00:00:00')], + 'DAT_VALID_END': [pd.NaT, pd.NaT], + 'DAT_VALID_START': [pd.NaT, pd.NaT], + 'COD_SYSTEM_STAT': ['ABC XYZ', 'LMN OPQ'], + 'DES_LONG': ['description of work order', 'description of work order'], + 'COD_FUNCT_LOC': ['!12345', '?09876'], + 'COD_NOTIF_OBJ': ['00112233', '00998877'], + 'COD_MAINT_ITEM': ['', '019283'], + 'DES_MEDIUM': ['short description', 'short description'], + 'DES_FUNCT_LOC': ['XYZ1234', 'ABC9876'], + }) + turbines_df = pd.DataFrame({ + 'COD_ELEMENT': [0], + 'TURBINE_PI_ID': ['TA00'], + 'TURBINE_LOCAL_ID': ['A0'], + 'TURBINE_SAP_COD': ['LOC000'], + 'DES_CORE_ELEMENT': ['T00'], + 'SITE': ['LOCATION'], + 'DES_CORE_PLANT': ['LOC'], + 'COD_PLANT_SAP': ['ABC'], + 'PI_COLLECTOR_SITE_NAME': ['LOC0'], + 'PI_LOCAL_SITE_NAME': ['LOC0'] + }) + pidata_df = pd.DataFrame({ + 'time': [pd.Timestamp('2022-01-02 13:21:01'), pd.Timestamp('2022-03-08 13:21:01')], + 'COD_ELEMENT': [0, 0], + 'val1': [9872.0, 559.0], + 'val2': [10.0, -7.0] + }) + return { + 'alarms': alarms_df, + 'stoppages': stoppages_df, + 'notifications': notifications_df, + 'work_orders': work_orders_df, + 'turbines': turbines_df, + "pidata": pidata_df + } + + + def base_train_test_split(self): + X_train = pd.DataFrame({ + 'feature 1': np.random.random(300), + 'feature 2': [0] * 150 + [1] * 150, + }) + y_train =X_train['feature 2'].to_list() + + X_test = pd.DataFrame({ + 'feature 1': np.random.random((100)), + 'feature 2': [0] * 25 + [1] * 50 + [0] * 25, + }) + y_test = X_test['feature 2'].to_list() + return X_train, X_test, y_train, y_test + + + @classmethod def setup_class(cls): cls.train = pd.DataFrame({ @@ -32,158 +147,278 @@ def setup_class(cls): }) cls.random_y = [1 if x > 0.5 else 0 for x in np.random.random(100)] - - - - - - - - def setup_method(self): - self.zephyr = Zephyr('xgb_classifier') - - def test_hyperparameters(self): - hyperparameters = { - "xgboost.XGBClassifier#1": { - "max_depth": 2 - }, - "zephyr_ml.primitives.postprocessing.FindThreshold#1": { - "metric": "precision" - } + cls.kwargs = { + "create_entityset": {"data_paths": cls.base_dfs(), "es_type": "pidata"}, + "set_labeling_function": {"name": "brake_pad_presence"}, + "generate_label_times": {"num_samples": 10, "gap": "20d"}, + "generate_feature_matrix_and_labels": {"target_dataframe_name": "turbines", "cutoff_time_in_index": True, "agg_primitives": ["count", "sum", "max"], "verbose": True}, + "generate_train_test_split": {}, + "set_and_fit_pipeline": {}, + "evaluate": {} } - zephyr = Zephyr('xgb_classifier', hyperparameters) - - assert zephyr._hyperparameters == hyperparameters - - def test_json(self): - file = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) - json_zephyr = Zephyr(os.path.join(file, 'zephyr_ml', 'pipelines', 'xgb_classifier.json')) - json_zephyr_hyperparameters = json_zephyr._mlpipeline.get_hyperparameters() - zephyr_hyperparameters = self.zephyr._mlpipeline.get_hyperparameters() - assert json_zephyr_hyperparameters == zephyr_hyperparameters - def test_fit(self): - self.zephyr.fit(self.train, self.train_y) - - def test_fit_visual(self): - output = self.zephyr.fit(self.train, self.train_y, visual=True) + def setup_zephyr(self, producer_step_name): + zephyr = Zephyr() + step_num = zephyr.producer_to_step_map[producer_step_name] + for i, (setters, getters) in enumerate(zephyr.step_order): + if i < step_num: + setter = setters[0] + kwargs = self.kwargs[setter.__name__] + getattr(zephyr, setter.__name__)(**kwargs) + else: + break + return zephyr + + def test_initialize_class(self): + zephyr = self.setup_zephyr(0) + + def test_create_entityset(self): + zephyr = self.setup_zephyr(1) + es = zephyr.get_entityset() + assert es is not None + + def test_set_labeling_function(self): + zephyr = self.setup_zephyr(2) + labeling_fn = es = zephyr.get_labeling_function() + assert labeling_fn is not None + + def test_generate_label_times(self): + zephyr = self.setup_zephyr(3) + label_times = zephyr.get_label_times(visualize = False) + assert label_times is not None + + def test_generate_feature_matrix_and_labels(self): + zephyr = self.setup_zephyr(4) + feature_matrix_and_labels = zephyr.get_feature_matrix_and_labels() + assert feature_matrix_and_labels is not None + + def test_generate_train_test_split(self): + zephyr = self.setup_zephyr(5) + train_test_split = zephyr.get_train_test_split() + assert train_test_split is not None + + def setup_zephyr_with_base_split(self, producer_step_name): + zephyr = self.setup_zephyr(4) + zephyr.set_train_test_split(**self.base_train_test_split()) + final_step_num = zephyr.producer_to_step_map[producer_step_name] + for i in range(4, final_step_num): + setters, getters = zephyr.step_order[i] + setter = setters[0] + kwargs = self.kwargs[setter.__name__] + getattr(zephyr, setter.__name__)(**kwargs) + return zephyr + + def test_set_train_test_split(self): + zephyr = self.setup_zephyr_with_base_split(5) + assert zephyr.get_train_test_split is not None + + def test_set_and_fit_pipeline_no_visual(self): + zephyr = self.setup_zephyr_with_base_split(5) + output = zephyr.set_and_fit_pipeline() + assert output is None + pipeline = zephyr.get_pipeline() + assert pipeline is not None + pipeline_hyperparameters = zephyr.get_pipeline_hyperparameters() + assert pipeline_hyperparameters is not None + + def test_set_and_fit_pipeline_visual(self): + zephyr = self.setup_zephyr_with_base_split(5) + output = zephyr.set_and_fit_pipeline(visual = True) assert isinstance(output, dict) assert list(output.keys()) == ['threshold', 'scores'] + + pipeline = zephyr.get_pipeline() + assert pipeline is not None + pipeline_hyperparameters = zephyr.get_pipeline_hyperparameters() + assert pipeline_hyperparameters is not None + - def test_fit_no_visual(self): - zephyr = Zephyr(['xgboost.XGBClassifier']) - - output = zephyr.fit(self.train, self.train_y, visual=True) - assert output is None - - def test_predict(self): - self.zephyr.fit(self.train, self.train_y) - - predicted = self.zephyr.predict(self.test) - - assert self.test_y == predicted + def test_predict_no_visual(self): + zephyr = self.setup_zephyr_with_base_split(6) + predicted = zephyr.predict() + _, _, _, test_y = self.base_train_test_split() + assert test_y == predicted def test_predict_visual(self): - self.zephyr.fit(self.train, self.train_y) - - predicted, output = self.zephyr.predict(self.test, visual=True) + zephyr = self.setup_zephyr_with_base_split(6) + predicted, output = zephyr.predict(visual = True) - # predictions assert self.test_y == predicted # visualization assert isinstance(output, dict) assert list(output.keys()) == ['threshold', 'scores'] + - def test_predict_no_visual(self): - zephyr = Zephyr(['xgboost.XGBClassifier']) + def test_evaluate(self): + zephyr = self.setup_zephyr_with_base_split(6) + scores = pd.Series(zephyr.evaluate(metrics = ["sklearn.metrics.accuracy_score", + "sklearn.metrics.precision_score", + "sklearn.metrics.f1_score", + "sklearn.metrics.recall_score"])) + + expected = pd.Series({ + "sklearn.metrics.accuracy_score": 1.0, + "sklearn.metrics.precision_score": 1.0, + "sklearn.metrics.f1_score": 1.0, + "sklearn.metrics.recall_score": 1.0 + }) + pd.testing.assert_series_equal(expected, scores) + + - zephyr.fit(self.train, self.train_y) - predicted = zephyr.predict(self.test, visual=True) - assert len(self.test_y) == len(predicted) + + - def test_fit_predict(self): - predicted = self.zephyr.fit_predict(self.random, self.random_y) + # def setup_method(self): + # self.zephyr = Zephyr('xgb_classifier') - assert isinstance(predicted, list) + # def test_hyperparameters(self): + # hyperparameters = { + # "xgboost.XGBClassifier#1": { + # "max_depth": 2 + # }, + # "zephyr_ml.primitives.postprocessing.FindThreshold#1": { + # "metric": "precision" + # } + # } - def test_save_load(self, tmpdir): - path = os.path.join(tmpdir, 'some_path.pkl') - self.zephyr.save(path) + # zephyr = Zephyr('xgb_classifier', hyperparameters) - new_zephyr = Zephyr.load(path) - assert new_zephyr == self.zephyr + # assert zephyr._hyperparameters == hyperparameters - def test_load_failed(self, tmpdir): - path = os.path.join(tmpdir, 'some_path.pkl') - os.makedirs(os.path.dirname(path), exist_ok=True) - with open(path, 'wb') as pickle_file: - pickle.dump("something", pickle_file) + # def test_json(self): + # file = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + # json_zephyr = Zephyr(os.path.join(file, 'zephyr_ml', 'pipelines', 'xgb_classifier.json')) - with pytest.raises(ValueError): - Zephyr.load(path) + # json_zephyr_hyperparameters = json_zephyr._mlpipeline.get_hyperparameters() + # zephyr_hyperparameters = self.zephyr._mlpipeline.get_hyperparameters() + # assert json_zephyr_hyperparameters == zephyr_hyperparameters - def test_evaluate(self): - self.zephyr.fit(self.test, self.test_y) - scores = self.zephyr.evaluate(X=self.test, y=self.test_y) + # def test_fit(self): + # self.zephyr.fit(self.train, self.train_y) - expected = pd.Series({ - 'accuracy': 1.0, - 'f1': 1.0, - 'recall': 1.0, - 'precision': 1.0, - }) - pd.testing.assert_series_equal(expected, scores) + # def test_fit_visual(self): + # output = self.zephyr.fit(self.train, self.train_y, visual=True) - def test_evaluate_fit(self): - scores = self.zephyr.evaluate( - X=self.test, - y=self.test_y, - fit=True, - ) + # assert isinstance(output, dict) + # assert list(output.keys()) == ['threshold', 'scores'] - expected = pd.Series({ - 'accuracy': 1.0, - 'f1': 1.0, - 'recall': 1.0, - 'precision': 1.0, - }) - pd.testing.assert_series_equal(expected, scores) + # def test_fit_no_visual(self): + # zephyr = Zephyr(['xgboost.XGBClassifier']) - def test_evaluate_previously_fitted_with_fit_true(self): - self.zephyr.fit(self.train, self.train_y) + # output = zephyr.fit(self.train, self.train_y, visual=True) + # assert output is None - scores = self.zephyr.evaluate( - X=self.test, - y=self.test_y, - fit=True - ) + # def test_predict(self): + # self.zephyr.fit(self.train, self.train_y) - expected = pd.Series({ - 'accuracy': 1.0, - 'f1': 1.0, - 'recall': 1.0, - 'precision': 1.0, - }) - pd.testing.assert_series_equal(expected, scores) + # predicted = self.zephyr.predict(self.test) - def test_evaluate_train_data(self): - scores = self.zephyr.evaluate( - X=self.test, - y=self.test_y, - fit=True, - train_X=self.train, - train_y=self.train_y - ) + # assert self.test_y == predicted + + # def test_predict_visual(self): + # self.zephyr.fit(self.train, self.train_y) + + # predicted, output = self.zephyr.predict(self.test, visual=True) + + # # predictions + # assert self.test_y == predicted + + # # visualization + # assert isinstance(output, dict) + # assert list(output.keys()) == ['threshold', 'scores'] + + # def test_predict_no_visual(self): + # zephyr = Zephyr(['xgboost.XGBClassifier']) + + # zephyr.fit(self.train, self.train_y) + + # predicted = zephyr.predict(self.test, visual=True) + # assert len(self.test_y) == len(predicted) + + # def test_fit_predict(self): + # predicted = self.zephyr.fit_predict(self.random, self.random_y) + + # assert isinstance(predicted, list) + + # def test_save_load(self, tmpdir): + # path = os.path.join(tmpdir, 'some_path.pkl') + # self.zephyr.save(path) + + # new_zephyr = Zephyr.load(path) + # assert new_zephyr == self.zephyr + + # def test_load_failed(self, tmpdir): + # path = os.path.join(tmpdir, 'some_path.pkl') + # os.makedirs(os.path.dirname(path), exist_ok=True) + # with open(path, 'wb') as pickle_file: + # pickle.dump("something", pickle_file) + + # with pytest.raises(ValueError): + # Zephyr.load(path) + + # def test_evaluate(self): + # self.zephyr.fit(self.test, self.test_y) + # scores = self.zephyr.evaluate(X=self.test, y=self.test_y) + + # expected = pd.Series({ + # 'accuracy': 1.0, + # 'f1': 1.0, + # 'recall': 1.0, + # 'precision': 1.0, + # }) + # pd.testing.assert_series_equal(expected, scores) + + # def test_evaluate_fit(self): + # scores = self.zephyr.evaluate( + # X=self.test, + # y=self.test_y, + # fit=True, + # ) + + # expected = pd.Series({ + # 'accuracy': 1.0, + # 'f1': 1.0, + # 'recall': 1.0, + # 'precision': 1.0, + # }) + # pd.testing.assert_series_equal(expected, scores) - expected = pd.Series({ - 'accuracy': 1.0, - 'f1': 1.0, - 'recall': 1.0, - 'precision': 1.0, - }) - pd.testing.assert_series_equal(expected, scores) + # def test_evaluate_previously_fitted_with_fit_true(self): + # self.zephyr.fit(self.train, self.train_y) + + # scores = self.zephyr.evaluate( + # X=self.test, + # y=self.test_y, + # fit=True + # ) + + # expected = pd.Series({ + # 'accuracy': 1.0, + # 'f1': 1.0, + # 'recall': 1.0, + # 'precision': 1.0, + # }) + # pd.testing.assert_series_equal(expected, scores) + + # def test_evaluate_train_data(self): + # scores = self.zephyr.evaluate( + # X=self.test, + # y=self.test_y, + # fit=True, + # train_X=self.train, + # train_y=self.train_y + # ) + + # expected = pd.Series({ + # 'accuracy': 1.0, + # 'f1': 1.0, + # 'recall': 1.0, + # 'precision': 1.0, + # }) + # pd.testing.assert_series_equal(expected, scores) diff --git a/zephyr_ml/core.py b/zephyr_ml/core.py index 35eed3a..f6dafc3 100644 --- a/zephyr_ml/core.py +++ b/zephyr_ml/core.py @@ -41,12 +41,13 @@ def guided_step(self, *method_args, **method_kwargs): actual_next_step = self.producer_to_step_map[method_name] if actual_next_step > expected_next_step: necessary_steps_str = self._get_necessary_steps(actual_next_step) - LOGGER.error(f"Required steps have been skipped! Unable to run {method_name}. Please perform the following steps first {necessary_steps_str}") + LOGGER.error(f"Required steps have been SKIPPED! Unable to run {method_name}. Please perform the following steps first:\n{necessary_steps_str}") return elif actual_next_step < self.current_step: #regressing, make stale data, warn try: + necessary_steps_str = self._get_necessary_steps(actual_next_step) res = method(self, *method_args, **method_kwargs) - LOGGER.warning(f"The last run step was {self.current_step}. The following methods will return stale data. Please perform the following steps in order to get up to date.") + LOGGER.warning(f"The last run step was {self.current_step}. The following methods will return STALE data. Please perform the following steps in order to get up to date:\n{necessary_steps_str}") self.current_step = actual_next_step return res except Exception as e: @@ -57,7 +58,6 @@ def guided_step(self, *method_args, **method_kwargs): res = method(self, *method_args, **method_kwargs) self.current_step = actual_next_step - # do logging here return res except Exception as e: LOGGER.error(f"{method_name} threw an exception", exc_info = e) @@ -67,10 +67,11 @@ def guided_step(self, *method_args, **method_kwargs): if actual_next_step > expected_next_step: try: res = method(self, *method_args, **method_kwargs) + necessary_steps_str = self._get_necessary_steps(actual_next_step) if res is None: - LOGGER.error(f"Required steps have been skipped!. {method_name} does not have a value to return. Please perform the following steps in order before running this method.") + LOGGER.error(f"Required steps have been SKIPPED!. {method_name} does not have a value to return. Please perform the following steps in order before running this method:\n{necessary_steps_str}") else: - LOGGER.warning(f"This data may be stale. Please perform the following steps in order to ensure the response is up to date.") + LOGGER.warning(f"This data may be STALE. Please perform the following steps in order to ensure the response is up to date:\n{necessary_steps_str}") return res except Exception as e: LOGGER.error(f"{method_name} threw an exception", exc_info = e) @@ -111,8 +112,7 @@ def __init__(self): ([self.generate_label_times], [self.get_label_times]), ([self.generate_feature_matrix_and_labels, self.set_feature_matrix_and_labels], [self.get_feature_matrix_and_labels]), ([self.generate_train_test_split, self.set_train_test_split], [self.get_train_test_split]), - ([self.set_pipeline], [self.get_pipeline]), - ([self.fit], []), + ([self.set_and_fit_pipeline], [self.get_pipeline, self.get_pipeline_hyperparameters]), ([self.predict, self.evaluate], []) ] @@ -125,7 +125,15 @@ def __init__(self): self.getter_to_step_map[get.__name__] = idx def _get_necessary_steps(self, actual_step): - pass + step_strs = [] + for step in range(self.current_step, actual_step): + option_strs = [] + for opt in self.step_order[step][0]: + option_strs.append(opt.__name__) + step_strs.append(f"{step}. {' or '.join(option_strs)}") + return "\n".join(step_strs) + + def get_entityset_types(self): """ @@ -134,7 +142,7 @@ def get_entityset_types(self): return VALIDATE_DATA_FUNCTIONS.keys() @guide - def create_entityset(self, data_paths, es_type, new_kwargs_mapping=None): + def create_entityset(self, data_paths, es_type, custom_kwargs_mapping=None): """ Generate an entityset @@ -142,22 +150,28 @@ def create_entityset(self, data_paths, es_type, new_kwargs_mapping=None): data_paths ( dict ): Dictionary mapping entity names to the pandas dataframe for that that entity es_type (str): type of signal data , either SCADA or PI - new_kwargs_mapping ( dict ): Updated keyword arguments to be used + custom_kwargs_mapping ( dict ): Updated keyword arguments to be used during entityset creation Returns: featuretools.EntitySet that contains the data passed in and their relationships """ - entityset = _create_entityset(data_paths, es_type, new_kwargs_mapping) + entityset = _create_entityset(data_paths, es_type, custom_kwargs_mapping) self.entityset = entityset return self.entityset @guide - def set_entityset(self, entityset, es_type, new_kwargs_mapping=None): - dfs = entityset.to_dictionary() + def set_entityset(self, entityset=None, es_type=None, entityset_path = None, custom_kwargs_mapping=None): + if entityset_path is not None: + entityset = ft.read_entityset(entityset_path) + + if entityset is None: + raise ValueError("No entityset passed in. Please pass in an entityset object via the entityest parameter or an entityset path via the entityset_path parameter.") + + dfs = entityset.dataframe_dict validate_func = VALIDATE_DATA_FUNCTIONS[es_type] - validate_func(dfs, new_kwargs_mapping) + validate_func(dfs, custom_kwargs_mapping) self.entityset = entityset @@ -174,7 +188,6 @@ def get_predefined_labeling_functions(self): @guide def set_labeling_function(self, name=None, func=None): - print(f"labeling fucntion name {name}") if name is not None: labeling_fn_map = get_labeling_functions_map() if name in labeling_fn_map: @@ -249,13 +262,13 @@ def generate_feature_matrix_and_labels(self, **kwargs): ) self.feature_matrix_and_labels = self._clean_feature_matrix(feature_matrix) self.features = features - print(feature_matrix) - return feature_matrix, features + return self.feature_matrix_and_labels, features @guide def get_feature_matrix_and_labels(self): return self.feature_matrix_and_labels + @guide def set_feature_matrix_and_labels(self, feature_matrix, label_col_name="label"): assert label_col_name in feature_matrix.columns @@ -305,19 +318,15 @@ def get_train_test_split(self): def get_predefined_pipelines(self): pass - @guide - def set_pipeline(self, pipeline, pipeline_hyperparameters=None): - self.pipeline = self._get_mlpipeline(pipeline, pipeline_hyperparameters) - self.pipeline_hyperparameters = pipeline_hyperparameters - - @guide - def get_pipeline(self): - return self.pipeline + @guide - def fit( - self, X=None, y=None, visual=False, **kwargs + def set_and_fit_pipeline( + self, pipeline = "xgb_classifier", pipeline_hyperparameters=None, X=None, y=None, visual=False, **kwargs ): # kwargs indicate the parameters of the current pipeline + self.pipeline = self._get_mlpipeline(pipeline, pipeline_hyperparameters) + self.pipeline_hyperparameters = self.pipeline.get_hyperparameters() + if X is None: X = self.X_train if y is None: @@ -332,6 +341,14 @@ def fit( if visual and outputs is not None: return dict(zip(visual_names, outputs)) + + @guide + def get_pipeline(self): + return self.pipeline + + @guide + def get_pipeline_hyperparameters(self): + return self.pipeline_hyperparameters @guide def predict(self, X=None, visual=False, **kwargs): @@ -562,36 +579,41 @@ def _get_outputs_spec(self, default=True): "val2": [10.0, -7.0], } ) - obj.create_entityset( - { - "alarms": alarms_df, - "stoppages": stoppages_df, - "notifications": notifications_df, - "work_orders": work_orders_df, - "turbines": turbines_df, - "pidata": pidata_df, - }, - "pidata", - ) + + # obj.create_entityset( + # { + # "alarms": alarms_df, + # "stoppages": stoppages_df, + # "notifications": notifications_df, + # "work_orders": work_orders_df, + # "turbines": turbines_df, + # "pidata": pidata_df, + # }, + # "pidata", + # ) + + obj.set_entityset(entityset_path = "/Users/raymondpan/zephyr/Zephyr-repo/brake_pad_es", es_type = 'scada') + obj.set_labeling_function(name="brake_pad_presence") - obj.generate_label_times(num_samples=35, gap="20d") - obj.get_label_times() + obj.generate_label_times(num_samples=10, gap="20d") + print(obj.get_label_times()) - obj.generate_train_test_split() obj.generate_feature_matrix_and_labels( target_dataframe_name="turbines", cutoff_time_in_index=True, agg_primitives=["count", "sum", "max"], + verbose = True ) + print(obj.get_feature_matrix_and_labels) + obj.generate_train_test_split() add_primitives_path( path="/Users/raymondpan/zephyr/Zephyr-repo/zephyr_ml/primitives/jsons" ) - obj.set_pipeline("xgb_classifier") + obj.set_and_fit_pipeline() - obj.fit() obj.evaluate() diff --git a/zephyr_ml/entityset.py b/zephyr_ml/entityset.py index 9b0a4f3..d71eb72 100644 --- a/zephyr_ml/entityset.py +++ b/zephyr_ml/entityset.py @@ -109,6 +109,7 @@ def _validate_data(dfs, es_type, es_kwargs): if not isinstance(es_type, list): es_type = [es_type] + entities = set( chain( [ diff --git a/zephyr_ml/metadata.py b/zephyr_ml/metadata.py index 9079957..9482d8e 100644 --- a/zephyr_ml/metadata.py +++ b/zephyr_ml/metadata.py @@ -179,7 +179,6 @@ def get_mapped_kwargs(es_type, new_kwargs=None): - print("getting mapped kwargs") if es_type not in DEFAULT_ES_TYPE_KWARGS.keys(): raise ValueError("Unrecognized es_type argument: {}".format(es_type)) mapped_kwargs = DEFAULT_ES_KWARGS.copy() @@ -199,7 +198,6 @@ def get_mapped_kwargs(es_type, new_kwargs=None): ) mapped_kwargs[entity].update(new_kwargs[entity]) - print(mapped_kwargs) return mapped_kwargs From 2a065dc6c4eed7a6315f429a73a4abb6e6951bed Mon Sep 17 00:00:00 2001 From: Raymond Pan Date: Tue, 1 Apr 2025 01:14:42 -0400 Subject: [PATCH 17/28] pass tests --- tests/test_core.py | 12 +++++------- tests/test_entityset.py | 4 ++-- tests/test_feature_engineering.py | 6 +++--- zephyr_ml/core.py | 6 ++++++ 4 files changed, 16 insertions(+), 12 deletions(-) diff --git a/tests/test_core.py b/tests/test_core.py index 2e26a83..22d3651 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -159,9 +159,8 @@ def setup_class(cls): - def setup_zephyr(self, producer_step_name): + def setup_zephyr(self, step_num): zephyr = Zephyr() - step_num = zephyr.producer_to_step_map[producer_step_name] for i, (setters, getters) in enumerate(zephyr.step_order): if i < step_num: @@ -173,7 +172,7 @@ def setup_zephyr(self, producer_step_name): return zephyr def test_initialize_class(self): - zephyr = self.setup_zephyr(0) + zephyr = self.setup_zephyr(1) def test_create_entityset(self): zephyr = self.setup_zephyr(1) @@ -200,11 +199,10 @@ def test_generate_train_test_split(self): train_test_split = zephyr.get_train_test_split() assert train_test_split is not None - def setup_zephyr_with_base_split(self, producer_step_name): + def setup_zephyr_with_base_split(self, step_num): zephyr = self.setup_zephyr(4) - zephyr.set_train_test_split(**self.base_train_test_split()) - final_step_num = zephyr.producer_to_step_map[producer_step_name] - for i in range(4, final_step_num): + zephyr.set_train_test_split(*self.base_train_test_split()) + for i in range(5, step_num): setters, getters = zephyr.step_order[i] setter = setters[0] kwargs = self.kwargs[setter.__name__] diff --git a/tests/test_entityset.py b/tests/test_entityset.py index 6c7901f..6d7901f 100644 --- a/tests/test_entityset.py +++ b/tests/test_entityset.py @@ -210,7 +210,7 @@ def test_missing_time_indices(pidata_dfs): def test_default_create_pidata_entityset(pidata_dfs): es = create_pidata_entityset(pidata_dfs) - assert es.id == 'PI data' + assert es.id == 'pidata' assert set(es.dataframe_dict.keys()) == set( ['alarms', 'turbines', 'stoppages', 'work_orders', 'notifications', 'pidata']) @@ -218,6 +218,6 @@ def test_default_create_pidata_entityset(pidata_dfs): def test_default_create_scada_entityset(scada_dfs): es = create_scada_entityset(scada_dfs) - assert es.id == 'SCADA data' + assert es.id == 'scada' assert set(es.dataframe_dict.keys()) == set( ['alarms', 'turbines', 'stoppages', 'work_orders', 'notifications', 'scada']) diff --git a/tests/test_feature_engineering.py b/tests/test_feature_engineering.py index 324e82f..17d72e6 100644 --- a/tests/test_feature_engineering.py +++ b/tests/test_feature_engineering.py @@ -1,7 +1,7 @@ import pandas as pd import pytest -from zephyr_ml import create_pidata_entityset, create_scada_entityset +from zephyr_ml import _create_entityset from zephyr_ml.feature_engineering import process_signals @@ -122,12 +122,12 @@ def scada_dfs(base_dfs): @pytest.fixture def pidata_es(pidata_dfs): - return create_pidata_entityset(pidata_dfs) + return _create_entityset(pidata_dfs, "pidata") @pytest.fixture def scada_es(scada_dfs): - return create_scada_entityset(scada_dfs) + return _create_entityset(scada_dfs, "scada") @pytest.fixture diff --git a/zephyr_ml/core.py b/zephyr_ml/core.py index f6dafc3..c432e51 100644 --- a/zephyr_ml/core.py +++ b/zephyr_ml/core.py @@ -332,6 +332,9 @@ def set_and_fit_pipeline( if y is None: y = self.y_train + print(X) + print(y) + if visual: outputs_spec, visual_names = self._get_outputs_spec(False) else: @@ -359,7 +362,10 @@ def predict(self, X=None, visual=False, **kwargs): else: outputs_spec = "default" + print(X) + outputs = self.pipeline.predict(X, output_=outputs_spec, **kwargs) + print(outputs) if visual and visual_names: prediction = outputs[0] From 609bbcc3d6b9143bf519f58fae8a4f10ebdf8e77 Mon Sep 17 00:00:00 2001 From: Raymond Pan Date: Mon, 21 Apr 2025 16:45:28 -0400 Subject: [PATCH 18/28] combined set_labeling_function and generate_labeling_times --- zephyr_ml/core.py | 66 +++++++++++++++++++++++++++-------------------- 1 file changed, 38 insertions(+), 28 deletions(-) diff --git a/zephyr_ml/core.py b/zephyr_ml/core.py index c432e51..58891c0 100644 --- a/zephyr_ml/core.py +++ b/zephyr_ml/core.py @@ -108,7 +108,7 @@ def __init__(self): # tuple of 2 arrays: producers and attributes self.step_order = [ ([self.create_entityset, self.set_entityset], [self.get_entityset]), - ([self.set_labeling_function], [self.get_labeling_function]), + # ([self.set_labeling_function], [self.get_labeling_function]), ([self.generate_label_times], [self.get_label_times]), ([self.generate_feature_matrix_and_labels, self.set_feature_matrix_and_labels], [self.get_feature_matrix_and_labels]), ([self.generate_train_test_split, self.set_train_test_split], [self.get_train_test_split]), @@ -186,37 +186,49 @@ def get_entityset(self): def get_predefined_labeling_functions(self): return get_labeling_functions() - @guide - def set_labeling_function(self, name=None, func=None): - if name is not None: - labeling_fn_map = get_labeling_functions_map() - if name in labeling_fn_map: - self.labeling_function = labeling_fn_map[name] - return - else: - raise ValueError( - f"Unrecognized name argument:{name}. Call get_predefined_labeling_functions to view predefined labeling functions" - ) - elif func is not None: - if callable(func): - self.labeling_function = func - return - else: - raise ValueError(f"Custom function is not callable") - raise ValueError("No labeling function given.") + # @guide + # def set_labeling_function(self, name=None, func=None): + # if name is not None: + # labeling_fn_map = get_labeling_functions_map() + # if name in labeling_fn_map: + # self.labeling_function = labeling_fn_map[name] + # return + # else: + # raise ValueError( + # f"Unrecognized name argument:{name}. Call get_predefined_labeling_functions to view predefined labeling functions" + # ) + # elif func is not None: + # if callable(func): + # self.labeling_function = func + # return + # else: + # raise ValueError(f"Custom function is not callable") + # raise ValueError("No labeling function given.") - @guide - def get_labeling_function(self): - return self.labeling_function + # @guide + # def get_labeling_function(self): + # return self.labeling_function @guide def generate_label_times( - self, num_samples=-1, subset=None, column_map={}, verbose=False, **kwargs + self, labeling_fn, num_samples=-1, subset=None, column_map={}, verbose=False, **kwargs ): - assert self.entityset is not None - assert self.labeling_function is not None + assert self.entityset is not None, "entityset has not been set" + + if isinstance(labeling_fn, str): # get predefined labeling function + labeling_fn_map = get_labeling_functions_map() + if labeling_fn in labeling_fn_map: + labeling_fn = labeling_fn_map[labeling_fn] + else: + raise ValueError( + f"Unrecognized name argument:{labeling_fn}. Call get_predefined_labeling_functions to view predefined labeling functions" + ) - labeling_function, df, meta = self.labeling_function(self.entityset, column_map) + + assert callable(labeling_fn), "Labeling function is not callable" + + + labeling_function, df, meta = labeling_fn(self.entityset, column_map) data = df if isinstance(subset, float) or isinstance(subset, int): @@ -332,8 +344,6 @@ def set_and_fit_pipeline( if y is None: y = self.y_train - print(X) - print(y) if visual: outputs_spec, visual_names = self._get_outputs_spec(False) From b42b048bd0c5d6a0e42e55d60dd34d6c43b53abc Mon Sep 17 00:00:00 2001 From: Raymond Pan Date: Fri, 9 May 2025 22:09:43 -0400 Subject: [PATCH 19/28] added guide handler and updated mlblocks --- zephyr_ml/core.py | 373 +++++++++++++----- zephyr_ml/pipelines/xgb_classifier.json | 5 - .../jsons/xgboost.XGBClassifier.json | 2 +- ...imitives.postprocessing.FindThreshold.json | 6 +- ...tives.postprocessing.confusion_matrix.json | 2 +- zephyr_ml/primitives/postprocessing.py | 24 +- 6 files changed, 289 insertions(+), 123 deletions(-) diff --git a/zephyr_ml/core.py b/zephyr_ml/core.py index 58891c0..f00338e 100644 --- a/zephyr_ml/core.py +++ b/zephyr_ml/core.py @@ -31,61 +31,225 @@ LOGGER = logging.getLogger(__name__) -def guide(method): - @wraps(method) - def guided_step(self, *method_args, **method_kwargs): - expected_next_step = self.current_step + 1 +class GuideHandler: + + def __init__(self, producers_and_getters, set_methods): + self.cur_term = 0 + self.producers_and_getters = producers_and_getters + self.set_methods = set_methods + + self.producer_to_step_map = {} + self.getter_to_step_map = {} + self.terms = [] + + for idx, (producers, getters) in enumerate(self.producers_and_getters): + self.terms.append(-1) + + for prod in producers: + self.producer_to_step_map[prod.__name__] = idx + + for get in getters: + self.getter_to_step_map[get.__name__] = idx + + def get_necessary_steps(self, actual_next_step): + step_strs = [] + for step in range(self.current_step, actual_next_step): + option_strs = [] + for opt in self.producers_and_getters[step][0]: + option_strs.append(opt.__name__) + step_strs.append(f"{step}. {' or '.join(option_strs)}") + return "\n".join(step_strs) + + def get_get_steps_in_between(self, cur_step, next_step): + step_strs = [] + for step in range(cur_step + 1, next_step): + step_strs.append(f"{step} {self.producers_and_getters[step][1][0]}") + return step_strs + + def get_last_up_to_date(self, next_step): + latest_up_to_date = 0 + for step in range(next_step): + if self.terms[step] == self.cur_term: + latest_up_to_date = step + return latest_up_to_date + + + def join_steps(self, step_strs): + return "\n".join(step_strs) + + def get_steps_in_between(self, cur_step, next_step): + step_strs = [] + for step in range(cur_step+1, next_step): + option_strs = [] + for opt in self.producers_and_getters[step][0]: + option_strs.append(opt.__name__) + step_strs.append(f"{step}. {' or '.join(option_strs)}") + return step_strs + + def perform_producer_step(self, method, *method_args, **method_kwargs): + step_num = self.producer_to_step_map[method.__name__] + res = method(*method_args, **method_kwargs) + self.current_step = step_num + self.terms[step_num] = self.cur_term + return res + + + def try_log_skipping_steps_warning(self, name, next_step): + steps_skipped = self.get_steps_in_between(self.current_step, next_step) + if len(steps_skipped) > 0: + necc_steps = self.join_steps(steps_skipped) + LOGGER.warning(f"Performing {name}. You are skipping the following steps:\n{necc_steps}") + + + def try_log_using_stale_warning(self, name, next_step): + latest_up_to_date = self.get_last_up_to_date(next_step) + steps_needed = self.get_steps_in_between(latest_up_to_date-1, next_step) + if len(steps_needed) >0: + necc_steps = self.join_steps(steps_needed) + LOGGER.warning(f"Performing {name}. You are in a stale state and \ + using potentially stale data to perform this step. \ + Re-run the following steps to return to a present state:\n: \ + {steps_needed}") + + + def try_log_making_stale_warning(self, name, next_step): + next_next_step = next_step + 1 + prod_steps = f"{next_next_step}. {" or ".join(self.producers_and_getters[next_next_step][0])}" + # add later set methods + get_steps = self.join_steps(self.get_get_steps_in_between(next_step, self.current_step + 1)) + + + LOGGER.warning(f"Performing {name}. You are beginning a new iteration. Any data returned \ + by the following get methods will be considered stale:\n{get_steps}. To continue with this iteration, please perform:\n{prod_steps}") + + # stale must be before b/c user must have regressed with progress that contains skips + # return set method, and next possible up to date key method + def try_log_inconsistent_warning(self, name, next_step): + set_method_str= f"{self.producers_and_getters[next_step][0][1].__name__}" + latest_up_to_date = self.get_last_up_to_date(next_step) + LOGGER.warning(f"Unable to perform {name} because some steps have been skipped. \ + You can call the corresponding set method: {set_method_str} or re run steps \ + starting at or before {latest_up_to_date}") + + def log_get_inconsistent_warning(self, name, next_step): + prod_steps = f"{next_step}. {" or ".join(self.producers_and_getters[next_step][0])}" + latest_up_to_date = self.get_last_up_to_date(next_step) + LOGGER.warning(f"Unable to perform {name} because {prod_steps} has not been run yet. Run steps starting at or before {latest_up_to_date} ") + + + def log_get_stale_warning(self, name, next_step): + latest_up_to_date = self.get_last_up_to_date(next_step) + LOGGER.warning(f"Performing {name}. This data is potentially stale. \ + Re-run steps starting at or before {latest_up_to_date} to ensure data is up to date.") + + + # tries to perform step if possible -> warns that data might be stale + def try_perform_forward_producer_step(self, method, *method_args, **method_kwargs): + name = method.__name__ + next_step = self.producer_to_step_map[name] + if name in self.set_methods: + self.try_log_skipping_steps_warning(name, next_step) + # next_step == 0, set method (already warned), or previous step is up to term + res = self.perform_producer_step(method, *method_args, **method_kwargs) + return res + + + # next_step == 0, set method, or previous step is up to term + def try_perform_backward_producer_step(self, method, *method_args, **method_kwargs): + name = method.__name__ + next_step = self.producer_to_step_map[name] + self.try_log_making_stale_warning(next_step) + self.cur_term +=1 + for i in range(0, next_step): + if self.terms[i] != -1: + self.terms[i] = self.cur_term + res = self.perform_producer_step(method, *method_args, **method_kwargs) + return res + + + def try_perform_producer_step(self, method, *method_args, **method_kwargs): + name = method.__name__ + next_step = self.producer_to_step_map[name] + if next_step >= self.current_step: + res = self.try_perform_forward_producer_step(method, *method_args, **method_kwargs) + return res + else: + res = self.try_perform_backward_producer_step(method, *method_args, **method_kwargs) + return res + + + # dont update current step or terms + def try_perform_stale_or_inconsistent_producer_step(self, method, *method_args, **method_kwargs): + name = method.__name__ + next_step = self.producer_to_step_map[name] + if self.terms[next_step-1] == -1: #inconsistent + self.try_log_inconsistent_warning(name, next_step) + else: + self.try_log_using_stale_warning(name, next_step) + res = self.perform_producer_step(method, *method_args, **method_kwargs) + return res + + + + + + + def try_perform_getter_step(self, method, *method_args, **method_kwargs): + name = method.__name__ + # either inconsistent, stale, or up to date + step_num = self.getter_to_step_map[name] + step_term = self.terms[step_num] + if step_term == -1: + self.log_get_inconsistent_warning(step_num) + elif step_term == self.cur_term: + res = method(*method_args, **method_kwargs) + return res + else: + self.log_get_stale_warning(step_num) + res = method(*method_args, **method_kwargs) + return res + + + + + + + def guide_step(self, method, *method_args, **method_kwargs): method_name = method.__name__ if method_name in self.producer_to_step_map: - actual_next_step = self.producer_to_step_map[method_name] - if actual_next_step > expected_next_step: - necessary_steps_str = self._get_necessary_steps(actual_next_step) - LOGGER.error(f"Required steps have been SKIPPED! Unable to run {method_name}. Please perform the following steps first:\n{necessary_steps_str}") - return - elif actual_next_step < self.current_step: #regressing, make stale data, warn - try: - necessary_steps_str = self._get_necessary_steps(actual_next_step) - res = method(self, *method_args, **method_kwargs) - LOGGER.warning(f"The last run step was {self.current_step}. The following methods will return STALE data. Please perform the following steps in order to get up to date:\n{necessary_steps_str}") - self.current_step = actual_next_step - return res - except Exception as e: - LOGGER.error(f"{method_name} threw an exception", exc_info = e) - return - else: - try: - res = method(self, *method_args, **method_kwargs) - self.current_step = actual_next_step - - return res - except Exception as e: - LOGGER.error(f"{method_name} threw an exception", exc_info = e) - + #up-todate + next_step = self.producer_to_step_map[method_name] + if method_name in self.set_methods or next_step == 0 or self.terms[next_step-1] == self.cur_term: + res = self.try_perform_producer_step(method, *method_args, **method_kwargs) + return res + else: #stale or inconsistent + res = self.try_perform_stale_or_inconsistent_producer_step(method, *method_args, **method_kwargs) + return res elif method_name in self.getter_to_step_map: - actual_next_step = self.getter_to_step_map[method_name] - if actual_next_step > expected_next_step: - try: - res = method(self, *method_args, **method_kwargs) - necessary_steps_str = self._get_necessary_steps(actual_next_step) - if res is None: - LOGGER.error(f"Required steps have been SKIPPED!. {method_name} does not have a value to return. Please perform the following steps in order before running this method:\n{necessary_steps_str}") - else: - LOGGER.warning(f"This data may be STALE. Please perform the following steps in order to ensure the response is up to date:\n{necessary_steps_str}") - return res - except Exception as e: - LOGGER.error(f"{method_name} threw an exception", exc_info = e) - return - else: - try: - res = method(self, *method_args, **method_kwargs) - return res - except Exception as e: - LOGGER.error(f"{method_name} threw an exception", exc_info = e) + res = self.try_perform_getter_step(method, *method_args, **method_kwargs) + return res else: print(f"Method {method_name} does not need to be wrapped") + + + + + + + + + + + +def guide(method): + + @wraps(method) + def guided_step(self, *method_args, **method_kwargs): + return self.guide_handler.guide_step(method, *method_args, **method_kwargs) + return guided_step class Zephyr: @@ -104,50 +268,38 @@ def __init__(self): self.is_fitted = None self.results = None + + self.current_step = -1 # tuple of 2 arrays: producers and attributes self.step_order = [ - ([self.create_entityset, self.set_entityset], [self.get_entityset]), + ([self.generate_entityset, self.set_entityset], [self.get_entityset]), # ([self.set_labeling_function], [self.get_labeling_function]), - ([self.generate_label_times], [self.get_label_times]), + ([self.generate_label_times, self.set_label_times], [self.get_label_times]), ([self.generate_feature_matrix_and_labels, self.set_feature_matrix_and_labels], [self.get_feature_matrix_and_labels]), ([self.generate_train_test_split, self.set_train_test_split], [self.get_train_test_split]), - ([self.set_and_fit_pipeline], [self.get_pipeline, self.get_pipeline_hyperparameters]), + ([self.fit_pipeline, self.set_fitted_pipeline], [self.get_fitted_pipeline]), ([self.predict, self.evaluate], []) ] - - self.producer_to_step_map = {} - self.getter_to_step_map = {} - for idx, (producers, getters) in enumerate(self.step_order): - for prod in producers: - self.producer_to_step_map[prod.__name__] = idx - for get in getters: - self.getter_to_step_map[get.__name__] = idx + self.set_methods = set([self.set_entityset.__name__, self.set_label_times.__name__, self.set_feature_matrix_and_labels.__name__, self.set_train_test_split.__name__, self.set_fitted_pipeline.__name__]) + self.guide_handler = GuideHandler(self.step_order, self.set_methods) - def _get_necessary_steps(self, actual_step): - step_strs = [] - for step in range(self.current_step, actual_step): - option_strs = [] - for opt in self.step_order[step][0]: - option_strs.append(opt.__name__) - step_strs.append(f"{step}. {' or '.join(option_strs)}") - return "\n".join(step_strs) - def get_entityset_types(self): + def GET_ENTITYSET_TYPES(self): """ Returns the supported entityset types (PI/SCADA/Vibrations) and the required dataframes and their columns """ return VALIDATE_DATA_FUNCTIONS.keys() @guide - def create_entityset(self, data_paths, es_type, custom_kwargs_mapping=None): + def generate_entityset(self, dfs, es_type, custom_kwargs_mapping=None): """ Generate an entityset Args: - data_paths ( dict ): Dictionary mapping entity names to the pandas + dfs ( dict ): Dictionary mapping entity names to the pandas dataframe for that that entity es_type (str): type of signal data , either SCADA or PI custom_kwargs_mapping ( dict ): Updated keyword arguments to be used @@ -156,7 +308,7 @@ def create_entityset(self, data_paths, es_type, custom_kwargs_mapping=None): featuretools.EntitySet that contains the data passed in and their relationships """ - entityset = _create_entityset(data_paths, es_type, custom_kwargs_mapping) + entityset = _create_entityset(dfs, es_type, custom_kwargs_mapping) self.entityset = entityset return self.entityset @@ -183,7 +335,7 @@ def get_entityset(self): return self.entityset - def get_predefined_labeling_functions(self): + def GET_LABELING_FUNCTIONS(self): return get_labeling_functions() # @guide @@ -260,6 +412,11 @@ def generate_label_times( self.label_times = label_times return label_times, meta + + @guide + def set_label_times(self, label_times): + assert(isinstance(label_times, cp.LabelTimes)) + self.label_times = label_times @guide def get_label_times(self, visualize = True): @@ -295,9 +452,16 @@ def generate_train_test_split( train_size=None, random_state=None, shuffle=True, - stratify=None, + stratify=False, ): feature_matrix, labels = self.feature_matrix_and_labels + + if not isinstance(stratify, list): + if stratify: + stratify = labels + else: + stratify = None + X_train, X_test, y_train, y_test = train_test_split( feature_matrix, labels, @@ -327,17 +491,17 @@ def get_train_test_split(self): return None return self.X_train, self.X_test, self.y_train, self.y_test - def get_predefined_pipelines(self): - pass - @guide - def set_and_fit_pipeline( + def set_fitted_pipeline(self, pipeline): + self.pipeline = pipeline + + @guide + def fit_pipeline( self, pipeline = "xgb_classifier", pipeline_hyperparameters=None, X=None, y=None, visual=False, **kwargs ): # kwargs indicate the parameters of the current pipeline self.pipeline = self._get_mlpipeline(pipeline, pipeline_hyperparameters) - self.pipeline_hyperparameters = self.pipeline.get_hyperparameters() if X is None: X = self.X_train @@ -356,7 +520,7 @@ def set_and_fit_pipeline( return dict(zip(visual_names, outputs)) @guide - def get_pipeline(self): + def get_fitted_pipeline(self): return self.pipeline @guide @@ -372,7 +536,6 @@ def predict(self, X=None, visual=False, **kwargs): else: outputs_spec = "default" - print(X) outputs = self.pipeline.predict(X, output_=outputs_spec, **kwargs) print(outputs) @@ -384,26 +547,32 @@ def predict(self, X=None, visual=False, **kwargs): return outputs @guide - def evaluate(self, X=None, y=None, metrics=None, show_plots = True): + def evaluate(self, X=None, y=None,metrics=None, additional_args = None, context_mapping = None, metric_args_mapping = None): if X is None: X = self.X_test if y is None: y = self.y_test - context_0 = self.pipeline.predict(X, output_=0) - y_proba = context_0["y_pred"][::, 1] - y_pred = self.pipeline.predict(start_=1, **context_0) + # may have multiple proba_steps and multiple produce args + + # context_0 = self.pipeline.predict(X, output_=0) + # y_proba = context_0["y_pred"][::, 1] + final_context = self.pipeline.predict(X, output_=-1) if metrics is None: metrics = DEFAULT_METRICS + if metric_args is None: + metric_args = {} results = {} for metric in metrics: try: metric_primitive = self._get_ml_primitive(metric) - res = metric_primitive.produce(y_pred=y_pred, y_proba=y_proba, y_true=y) - if show_plots: - plt.show() + additional_kwargs = {} + if metric_primitive.name in metric_args: + additional_kwargs = metric_args[metric_primitive.name] + + res = metric_primitive.produce(y_true = self.y_test, **final_context, **additional_kwargs) results[metric_primitive.name] = res except Exception as e: LOGGER.error(f"Unable to run evaluation metric: {metric_primitive.name}", exc_info = e) @@ -596,24 +765,24 @@ def _get_outputs_spec(self, default=True): } ) - # obj.create_entityset( - # { - # "alarms": alarms_df, - # "stoppages": stoppages_df, - # "notifications": notifications_df, - # "work_orders": work_orders_df, - # "turbines": turbines_df, - # "pidata": pidata_df, - # }, - # "pidata", - # ) - - obj.set_entityset(entityset_path = "/Users/raymondpan/zephyr/Zephyr-repo/brake_pad_es", es_type = 'scada') + obj.create_entityset( + { + "alarms": alarms_df, + "stoppages": stoppages_df, + "notifications": notifications_df, + "work_orders": work_orders_df, + "turbines": turbines_df, + "pidata": pidata_df, + }, + "pidata", + ) + + # obj.set_entityset(entityset_path = "/Users/raymondpan/zephyr/Zephyr-repo/brake_pad_es", es_type = 'scada') - obj.set_labeling_function(name="brake_pad_presence") + # obj.set_labeling_function(name="brake_pad_presence") - obj.generate_label_times(num_samples=10, gap="20d") - print(obj.get_label_times()) + obj.generate_label_times(labeling_fn="brake_pad_presence", num_samples=10, gap="20d") + # print(obj.get_label_times()) obj.generate_feature_matrix_and_labels( diff --git a/zephyr_ml/pipelines/xgb_classifier.json b/zephyr_ml/pipelines/xgb_classifier.json index 26059dc..47a0bb1 100644 --- a/zephyr_ml/pipelines/xgb_classifier.json +++ b/zephyr_ml/pipelines/xgb_classifier.json @@ -13,11 +13,6 @@ "y_true": "y" } }, - "output_names": { - "xgboost.XGBClassifier#1": { - "y": "y_pred" - } - }, "outputs": { "default": [ { diff --git a/zephyr_ml/primitives/jsons/xgboost.XGBClassifier.json b/zephyr_ml/primitives/jsons/xgboost.XGBClassifier.json index a831109..0440aac 100644 --- a/zephyr_ml/primitives/jsons/xgboost.XGBClassifier.json +++ b/zephyr_ml/primitives/jsons/xgboost.XGBClassifier.json @@ -36,7 +36,7 @@ ], "output": [ { - "name": "y", + "name": "y_proba", "type": "array" } ] diff --git a/zephyr_ml/primitives/jsons/zephyr_ml.primitives.postprocessing.FindThreshold.json b/zephyr_ml/primitives/jsons/zephyr_ml.primitives.postprocessing.FindThreshold.json index e522be5..7420061 100644 --- a/zephyr_ml/primitives/jsons/zephyr_ml.primitives.postprocessing.FindThreshold.json +++ b/zephyr_ml/primitives/jsons/zephyr_ml.primitives.postprocessing.FindThreshold.json @@ -18,7 +18,7 @@ "type": "ndarray" }, { - "name": "y_pred", + "name": "y_proba", "type": "ndarray" } ] @@ -27,13 +27,13 @@ "method": "apply_threshold", "args": [ { - "name": "y_pred", + "name": "y_proba", "type": "ndarray" } ], "output": [ { - "name": "y", + "name": "y_pred", "type": "ndarray" }, { diff --git a/zephyr_ml/primitives/jsons/zephyr_ml.primitives.postprocessing.confusion_matrix.json b/zephyr_ml/primitives/jsons/zephyr_ml.primitives.postprocessing.confusion_matrix.json index efce4e3..5770c5b 100644 --- a/zephyr_ml/primitives/jsons/zephyr_ml.primitives.postprocessing.confusion_matrix.json +++ b/zephyr_ml/primitives/jsons/zephyr_ml.primitives.postprocessing.confusion_matrix.json @@ -3,7 +3,7 @@ "contributors": [ "Raymond Pan " ], - "description": "Plot confusion matrix.", + "description": "Create and plot confusion matrix.", "classifiers": { "type": "helper" }, diff --git a/zephyr_ml/primitives/postprocessing.py b/zephyr_ml/primitives/postprocessing.py index 1972625..7498509 100644 --- a/zephyr_ml/primitives/postprocessing.py +++ b/zephyr_ml/primitives/postprocessing.py @@ -40,24 +40,24 @@ def __init__(self, metric="f1"): self._metric = "f1" self._threshold = None - def fit(self, y_true, y_pred): + def fit(self, y_true, y_proba): """Find the threshold that obtains the best metric value. Args: y_true (Series or ndarray): ``pandas.Series`` or ``numpy.ndarray`` ground truth target values. - y_pred (Series or ndarray): - ``pandas.Series`` or ``numpy.ndarray`` predicted target valeus. + y_proba (Series or ndarray): + ``pandas.Series`` or ``numpy.ndarray`` predicted target values' probabilities. """ - if y_pred.ndim > 1: - y_pred = y_pred[:, 1] + if y_proba.ndim > 1: + y_proba = y_proba[:, 1] RANGE = np.arange(0, 1, 0.01) scores = list() scorer = METRICS[self._metric] for thresh in RANGE: - y = [1 if x else 0 for x in y_pred > thresh] + y = [1 if x else 0 for x in y_proba > thresh] scores.append(scorer(y_true, y)) threshold = RANGE[np.argmax(scores)] @@ -66,12 +66,12 @@ def fit(self, y_true, y_pred): self._threshold = threshold self._scores = scores - def apply_threshold(self, y_pred): + def apply_threshold(self, y_proba): """Apply threshold on predicted values. Args: y_pred (Series): - ``pandas.Series`` predicted target valeus. + ``pandas.Series`` predicted target values' probabilities. Return: tuple: @@ -79,10 +79,10 @@ def apply_threshold(self, y_pred): * detected float value for threshold. * list of scores obtained at each threshold. """ - if y_pred.ndim > 1: - y_pred = y_pred[:, 1] + if y_proba.ndim > 1: + y_proba = y_proba[:, 1] - binary = [1 if x else 0 for x in y_pred > self._threshold] + binary = [1 if x else 0 for x in y_proba > self._threshold] return binary, self._threshold, self._scores @@ -106,6 +106,8 @@ def confusion_matrix(y_true, y_pred, labels=None, sample_weight=None, normalize= def roc_auc_score_and_curve( y_true, y_proba, pos_label=None, sample_weight=None, drop_intermediate=True, show_plot = True ): + if y_proba.ndim > 1: + y_proba = y_proba[:,1] fpr, tpr, _ = metrics.roc_curve( y_true, y_proba, From 102229efbe3ec4ba59726926f823f7aea79907fb Mon Sep 17 00:00:00 2001 From: Raymond Pan Date: Sat, 10 May 2025 12:26:50 -0400 Subject: [PATCH 20/28] Update evaluate w/ global and local args and mapping --- zephyr_ml/core.py | 48 ++++++++++++++++++++++++++++++++++++----------- 1 file changed, 37 insertions(+), 11 deletions(-) diff --git a/zephyr_ml/core.py b/zephyr_ml/core.py index f00338e..eb40c7c 100644 --- a/zephyr_ml/core.py +++ b/zephyr_ml/core.py @@ -545,34 +545,60 @@ def predict(self, X=None, visual=False, **kwargs): return prediction, dict(zip(visual_names, outputs[-len(visual_names) :])) return outputs + + @guide - def evaluate(self, X=None, y=None,metrics=None, additional_args = None, context_mapping = None, metric_args_mapping = None): + def evaluate(self, X=None, y=None,metrics=None, global_args = None, local_args = None, global_mapping = None, local_mapping = None): if X is None: X = self.X_test if y is None: y = self.y_test - # may have multiple proba_steps and multiple produce args - - # context_0 = self.pipeline.predict(X, output_=0) - # y_proba = context_0["y_pred"][::, 1] final_context = self.pipeline.predict(X, output_=-1) + # remap items, if any + if global_mapping is not None: + for cur, new in global_mapping.items(): + if cur in final_context: + cur_item = final_context.pop(cur) + final_context[new] = cur_item + + if metrics is None: metrics = DEFAULT_METRICS - if metric_args is None: - metric_args = {} + + if global_args is None: + global_args = {} + + if local_args is None: + local_args = {} + + if local_mapping is None: + local_mapping = {} + results = {} for metric in metrics: try: metric_primitive = self._get_ml_primitive(metric) - additional_kwargs = {} - if metric_primitive.name in metric_args: - additional_kwargs = metric_args[metric_primitive.name] + + if metric in local_mapping: + metric_context = {} + metric_mapping = local_mapping[metric] + for cur, item in final_context.items(): + new = metric_mapping.get(cur, cur) + metric_context[new] = item + else: + metric_context = final_context + + + if metric in local_args: + metric_args = local_args[metric] + else: + metric_args = {} - res = metric_primitive.produce(y_true = self.y_test, **final_context, **additional_kwargs) + res = metric_primitive.produce(y_true = self.y_test, **metric_context, **metric_args) results[metric_primitive.name] = res except Exception as e: LOGGER.error(f"Unable to run evaluation metric: {metric_primitive.name}", exc_info = e) From 2f5e925c485e4ceb6772791e9e6922d375598588 Mon Sep 17 00:00:00 2001 From: Raymond Pan Date: Sat, 10 May 2025 18:35:07 -0400 Subject: [PATCH 21/28] Add static help and signal processing --- zephyr_ml/core.py | 140 ++++++++++++++++++++++--------- zephyr_ml/entityset.py | 10 +++ zephyr_ml/feature_engineering.py | 2 +- zephyr_ml/labeling/__init__.py | 4 +- 4 files changed, 116 insertions(+), 40 deletions(-) diff --git a/zephyr_ml/core.py b/zephyr_ml/core.py index eb40c7c..0a0e169 100644 --- a/zephyr_ml/core.py +++ b/zephyr_ml/core.py @@ -6,6 +6,7 @@ get_labeling_functions_map, LABELING_FUNCTIONS, ) +from zephyr_ml.feature_engineering import process_signals import composeml as cp from inspect import getfullargspec import featuretools as ft @@ -19,7 +20,7 @@ import logging import matplotlib.pyplot as plt from functools import wraps - +import inspect DEFAULT_METRICS = [ "sklearn.metrics.accuracy_score", "sklearn.metrics.precision_score", @@ -36,15 +37,18 @@ class GuideHandler: def __init__(self, producers_and_getters, set_methods): self.cur_term = 0 + self.current_step = -1 self.producers_and_getters = producers_and_getters self.set_methods = set_methods self.producer_to_step_map = {} self.getter_to_step_map = {} + self.terms = [] - + self.skipped = [] for idx, (producers, getters) in enumerate(self.producers_and_getters): self.terms.append(-1) + self.skipped.append(False) for prod in producers: self.producer_to_step_map[prod.__name__] = idx @@ -98,6 +102,8 @@ def perform_producer_step(self, method, *method_args, **method_kwargs): def try_log_skipping_steps_warning(self, name, next_step): steps_skipped = self.get_steps_in_between(self.current_step, next_step) if len(steps_skipped) > 0: + for step in range(self.current_step + 1, next_step): + self.skipped[step] = True necc_steps = self.join_steps(steps_skipped) LOGGER.warning(f"Performing {name}. You are skipping the following steps:\n{necc_steps}") @@ -115,7 +121,7 @@ def try_log_using_stale_warning(self, name, next_step): def try_log_making_stale_warning(self, name, next_step): next_next_step = next_step + 1 - prod_steps = f"{next_next_step}. {" or ".join(self.producers_and_getters[next_next_step][0])}" + prod_steps = f"{next_next_step}. {' or '.join(self.producers_and_getters[next_next_step][0])}" # add later set methods get_steps = self.join_steps(self.get_get_steps_in_between(next_step, self.current_step + 1)) @@ -133,7 +139,7 @@ def try_log_inconsistent_warning(self, name, next_step): starting at or before {latest_up_to_date}") def log_get_inconsistent_warning(self, name, next_step): - prod_steps = f"{next_step}. {" or ".join(self.producers_and_getters[next_step][0])}" + prod_steps = f"{next_step}. {' or '.join(self.producers_and_getters[next_step][0])}" latest_up_to_date = self.get_last_up_to_date(next_step) LOGGER.warning(f"Unable to perform {name} because {prod_steps} has not been run yet. Run steps starting at or before {latest_up_to_date} ") @@ -186,6 +192,10 @@ def try_perform_stale_or_inconsistent_producer_step(self, method, *method_args, if self.terms[next_step-1] == -1: #inconsistent self.try_log_inconsistent_warning(name, next_step) else: + # need to include a case where performing using stale data that was skipped in current iteration + # overwrite current iteration's ? + # no not possible b/c if there is a current iteration after this step, it must have updated this step's iteration + # self.try_log_using_stale_warning(name, next_step) res = self.perform_producer_step(method, *method_args, **method_kwargs) return res @@ -274,7 +284,6 @@ def __init__(self): # tuple of 2 arrays: producers and attributes self.step_order = [ ([self.generate_entityset, self.set_entityset], [self.get_entityset]), - # ([self.set_labeling_function], [self.get_labeling_function]), ([self.generate_label_times, self.set_label_times], [self.get_label_times]), ([self.generate_feature_matrix_and_labels, self.set_feature_matrix_and_labels], [self.get_feature_matrix_and_labels]), ([self.generate_train_test_split, self.set_train_test_split], [self.get_train_test_split]), @@ -291,10 +300,26 @@ def GET_ENTITYSET_TYPES(self): """ Returns the supported entityset types (PI/SCADA/Vibrations) and the required dataframes and their columns """ - return VALIDATE_DATA_FUNCTIONS.keys() + info_map = {} + for es_type, val_fn in VALIDATE_DATA_FUNCTIONS.items(): + info_map[es_type] = {"obj": es_type, "desc": " ".join((val_fn.__doc__.split()))} + + return info_map + + def GET_LABELING_FUNCTIONS(self): + return get_labeling_functions() + + def GET_EVALUATION_METRICS(self): + info_map = {} + for metric in DEFAULT_METRICS: + primitive = self._get_ml_primitive(metric) + info_map[metric] = {"obj": primitive, "desc": primitive.metadata["description"] } + return info_map @guide - def generate_entityset(self, dfs, es_type, custom_kwargs_mapping=None): + def generate_entityset(self, dfs, es_type, custom_kwargs_mapping=None, + signal_dataframe_name = None, signal_column = None, signal_transformations = None, + signal_aggregations = None, signal_window_size = None, signal_replace_dataframe = False, **sigpro_kwargs): """ Generate an entityset @@ -309,6 +334,16 @@ def generate_entityset(self, dfs, es_type, custom_kwargs_mapping=None): their relationships """ entityset = _create_entityset(dfs, es_type, custom_kwargs_mapping) + + #perform signal processing + if signal_dataframe_name is not None and signal_column is not None: + if signal_transformations is None: + signal_transformations = [] + if signal_aggregations is None: + signal_aggregations = [] + process_signals(entityset, signal_dataframe_name, signal_column, signal_transformations, + signal_aggregations, signal_window_size, signal_replace_dataframe, **sigpro_kwargs) + self.entityset = entityset return self.entityset @@ -335,8 +370,7 @@ def get_entityset(self): return self.entityset - def GET_LABELING_FUNCTIONS(self): - return get_labeling_functions() + # @guide # def set_labeling_function(self, name=None, func=None): @@ -425,9 +459,37 @@ def get_label_times(self, visualize = True): return self.label_times @guide - def generate_feature_matrix_and_labels(self, **kwargs): + def generate_feature_matrix_and_labels(self, target_dataframe_name = None, instance_ids = None, + agg_primitives = None, trans_primitives = None, groupby_trans_primitives = None, + allowed_paths = None, max_depth = 2, ignore_dataframes = None, ignore_columns=None, + primitive_options=None, seed_features=None, + drop_contains=None, drop_exact=None, where_primitives=None, max_features=-1, + cutoff_time_in_index=False, save_progress=None, features_only=False, training_window=None, + approximate=None, chunk_size=None, n_jobs=1, dask_kwargs=None, verbose=False, return_types=None, + progress_callback=None, include_cutoff_time=True, + + signal_dataframe_name = None, signal_column = None, signal_transformations = None, + signal_aggregations = None, signal_window_size = None, signal_replace_dataframe = False, **sigpro_kwargs): + + # perform signal processing + if signal_dataframe_name is not None and signal_column is not None: + if signal_transformations is None: + signal_transformations = [] + if signal_aggregations is None: + signal_aggregations = [] + process_signals(self.entityset, signal_dataframe_name, signal_column, signal_transformations, + signal_aggregations, signal_window_size, signal_replace_dataframe, **sigpro_kwargs) + feature_matrix, features = ft.dfs( - entityset=self.entityset, cutoff_time=self.label_times, **kwargs + entityset=self.entityset, cutoff_time=self.label_times, + target_dataframe_name = target_dataframe_name, instance_ids =instance_ids, + agg_primitives = agg_primitives, trans_primitives = trans_primitives, groupby_trans_primitives = groupby_trans_primitives, + allowed_paths = allowed_paths, max_depth = max_depth, ignore_dataframes = ignore_dataframes, ignore_columns=ignore_columns, + primitive_options=primitive_options, seed_features=seed_features, + drop_contains=drop_contains, drop_exact=drop_exact, where_primitives=where_primitives, max_features=max_features, + cutoff_time_in_index=cutoff_time_in_index, save_progress=save_progress, features_only=features_only, training_window=training_window, + approximate=approximate, chunk_size=chunk_size, n_jobs=n_jobs, dask_kwargs=dask_kwargs, verbose=verbose, return_types=return_types, + progress_callback=progress_callback, include_cutoff_time=include_cutoff_time, ) self.feature_matrix_and_labels = self._clean_feature_matrix(feature_matrix) self.features = features @@ -546,7 +608,8 @@ def predict(self, X=None, visual=False, **kwargs): return outputs - + + @guide def evaluate(self, X=None, y=None,metrics=None, global_args = None, local_args = None, global_mapping = None, local_mapping = None): @@ -656,6 +719,7 @@ def _get_outputs_spec(self, default=True): if __name__ == "__main__": obj = Zephyr() + print(obj.GET_EVALUATION_METRICS()) alarms_df = pd.DataFrame( { "COD_ELEMENT": [0, 0], @@ -791,40 +855,40 @@ def _get_outputs_spec(self, default=True): } ) - obj.create_entityset( - { - "alarms": alarms_df, - "stoppages": stoppages_df, - "notifications": notifications_df, - "work_orders": work_orders_df, - "turbines": turbines_df, - "pidata": pidata_df, - }, - "pidata", - ) + # obj.create_entityset( + # { + # "alarms": alarms_df, + # "stoppages": stoppages_df, + # "notifications": notifications_df, + # "work_orders": work_orders_df, + # "turbines": turbines_df, + # "pidata": pidata_df, + # }, + # "pidata", + # ) # obj.set_entityset(entityset_path = "/Users/raymondpan/zephyr/Zephyr-repo/brake_pad_es", es_type = 'scada') # obj.set_labeling_function(name="brake_pad_presence") - obj.generate_label_times(labeling_fn="brake_pad_presence", num_samples=10, gap="20d") - # print(obj.get_label_times()) + # obj.generate_label_times(labeling_fn="brake_pad_presence", num_samples=10, gap="20d") + # # print(obj.get_label_times()) - obj.generate_feature_matrix_and_labels( - target_dataframe_name="turbines", - cutoff_time_in_index=True, - agg_primitives=["count", "sum", "max"], - verbose = True - ) + # obj.generate_feature_matrix_and_labels( + # target_dataframe_name="turbines", + # cutoff_time_in_index=True, + # agg_primitives=["count", "sum", "max"], + # verbose = True + # ) - print(obj.get_feature_matrix_and_labels) + # print(obj.get_feature_matrix_and_labels) - obj.generate_train_test_split() - add_primitives_path( - path="/Users/raymondpan/zephyr/Zephyr-repo/zephyr_ml/primitives/jsons" - ) - obj.set_and_fit_pipeline() + # obj.generate_train_test_split() + # add_primitives_path( + # path="/Users/raymondpan/zephyr/Zephyr-repo/zephyr_ml/primitives/jsons" + # ) + # obj.set_and_fit_pipeline() - obj.evaluate() + # obj.evaluate() diff --git a/zephyr_ml/entityset.py b/zephyr_ml/entityset.py index d71eb72..620abb5 100644 --- a/zephyr_ml/entityset.py +++ b/zephyr_ml/entityset.py @@ -208,18 +208,28 @@ def _validate_data(dfs, es_type, es_kwargs): def validate_scada_data(dfs, new_kwargs_mapping=None): + """ + SCADA data is signal data from the Original Equipment Manufacturer Supervisory Control + And Data Acquisition (OEM-SCADA) system, a signal data source. + """ entity_kwargs = get_mapped_kwargs("scada", new_kwargs_mapping) _validate_data(dfs, "scada", entity_kwargs) return entity_kwargs def validate_pidata_data(dfs, new_kwargs_mapping=None): + """ + PI data is signal data from the operator's historical Plant Information (PI) system. + """ entity_kwargs = get_mapped_kwargs("pidata", new_kwargs_mapping) _validate_data(dfs, "pidata", entity_kwargs) return entity_kwargs def validate_vibrations_data(dfs, new_kwargs_mapping=None): + """ + Vibrations data is vibrations data collected on Planetary gearboxes in turbines. + """ entities = ["vibrations"] pidata_kwargs, scada_kwargs = {}, {} diff --git a/zephyr_ml/feature_engineering.py b/zephyr_ml/feature_engineering.py index a9bc3fd..7683200 100644 --- a/zephyr_ml/feature_engineering.py +++ b/zephyr_ml/feature_engineering.py @@ -2,7 +2,7 @@ def process_signals(es, signal_dataframe_name, signal_column, transformations, aggregations, - window_size, replace_dataframe=False, **kwargs): + window_size = None, replace_dataframe=False, **kwargs): ''' Process signals using SigPro. diff --git a/zephyr_ml/labeling/__init__.py b/zephyr_ml/labeling/__init__.py index b58b5b3..ec58696 100644 --- a/zephyr_ml/labeling/__init__.py +++ b/zephyr_ml/labeling/__init__.py @@ -28,7 +28,7 @@ def get_labeling_functions(): functions = {} for function in LABELING_FUNCTIONS: name = function.__name__ - functions[name] = function.__doc__.split("\n")[0] + functions[name] = {"obj": function, "desc": function.__doc__.split("\n")[0]} return functions @@ -41,6 +41,8 @@ def get_labeling_functions_map(): return functions + + def get_helper_functions(): functions = {} for function in UTIL_FUNCTIONS: From 9031c6f7fdc498fcd44c3790e735b9685d2e8ef5 Mon Sep 17 00:00:00 2001 From: Raymond Pan Date: Sun, 11 May 2025 16:02:05 -0400 Subject: [PATCH 22/28] fix tests --- tests/test_core.py | 420 ++++++++++-------------- zephyr_ml/core.py | 46 ++- zephyr_ml/pipelines/xgb_classifier.json | 62 ++-- 3 files changed, 234 insertions(+), 294 deletions(-) diff --git a/tests/test_core.py b/tests/test_core.py index 22d3651..f8240cb 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -1,16 +1,18 @@ import os import pickle +from mlblocks import MLBlock import numpy as np import pandas as pd import pytest -from zephyr_ml.core import Zephyr +from zephyr_ml.core import DEFAULT_METRICS, Zephyr import logging class TestZephyr: + @staticmethod def base_dfs(): alarms_df = pd.DataFrame({ 'COD_ELEMENT': [0, 0], @@ -109,13 +111,12 @@ def base_dfs(): "pidata": pidata_df } - def base_train_test_split(self): X_train = pd.DataFrame({ 'feature 1': np.random.random(300), 'feature 2': [0] * 150 + [1] * 150, }) - y_train =X_train['feature 2'].to_list() + y_train = X_train['feature 2'].to_list() X_test = pd.DataFrame({ 'feature 1': np.random.random((100)), @@ -124,8 +125,6 @@ def base_train_test_split(self): y_test = X_test['feature 2'].to_list() return X_train, X_test, y_train, y_test - - @classmethod def setup_class(cls): cls.train = pd.DataFrame({ @@ -148,275 +147,218 @@ def setup_class(cls): cls.random_y = [1 if x > 0.5 else 0 for x in np.random.random(100)] cls.kwargs = { - "create_entityset": {"data_paths": cls.base_dfs(), "es_type": "pidata"}, - "set_labeling_function": {"name": "brake_pad_presence"}, - "generate_label_times": {"num_samples": 10, "gap": "20d"}, + "generate_entityset": {"dfs": TestZephyr.base_dfs(), "es_type": "pidata"}, + "generate_label_times": {"labeling_fn": "brake_pad_presence", "num_samples": 10, "gap": "20d"}, "generate_feature_matrix_and_labels": {"target_dataframe_name": "turbines", "cutoff_time_in_index": True, "agg_primitives": ["count", "sum", "max"], "verbose": True}, "generate_train_test_split": {}, - "set_and_fit_pipeline": {}, + "fit_pipeline": {}, "evaluate": {} } - - - def setup_zephyr(self, step_num): - zephyr = Zephyr() - - for i, (setters, getters) in enumerate(zephyr.step_order): - if i < step_num: - setter = setters[0] - kwargs = self.kwargs[setter.__name__] - getattr(zephyr, setter.__name__)(**kwargs) - else: - break - return zephyr - def test_initialize_class(self): - zephyr = self.setup_zephyr(1) - - def test_create_entityset(self): - zephyr = self.setup_zephyr(1) + zephyr = Zephyr() + assert zephyr.entityset is None + assert zephyr.labeling_function is None + assert zephyr.label_times is None + assert zephyr.pipeline is None + assert zephyr.pipeline_hyperparameters is None + assert zephyr.feature_matrix_and_labels is None + assert zephyr.X_train is None + assert zephyr.X_test is None + assert zephyr.y_train is None + assert zephyr.y_test is None + assert zephyr.is_fitted is None + assert zephyr.results is None + + def test_generate_entityset(self): + zephyr = Zephyr() + zephyr.generate_entityset(**self.__class__.kwargs["generate_entityset"]) es = zephyr.get_entityset() assert es is not None - - def test_set_labeling_function(self): - zephyr = self.setup_zephyr(2) - labeling_fn = es = zephyr.get_labeling_function() - assert labeling_fn is not None - + assert es.id == 'pidata' + def test_generate_label_times(self): - zephyr = self.setup_zephyr(3) - label_times = zephyr.get_label_times(visualize = False) + zephyr = Zephyr() + zephyr.generate_entityset(**self.__class__.kwargs["generate_entityset"]) + zephyr.generate_label_times(**self.__class__.kwargs["generate_label_times"]) + label_times = zephyr.get_label_times(visualize=False) assert label_times is not None - + def test_generate_feature_matrix_and_labels(self): - zephyr = self.setup_zephyr(4) + zephyr = Zephyr() + zephyr.generate_entityset(**self.__class__.kwargs["generate_entityset"]) + zephyr.generate_label_times(**self.__class__.kwargs["generate_label_times"]) + zephyr.generate_feature_matrix_and_labels(**self.__class__.kwargs["generate_feature_matrix_and_labels"]) feature_matrix_and_labels = zephyr.get_feature_matrix_and_labels() assert feature_matrix_and_labels is not None - + def test_generate_train_test_split(self): - zephyr = self.setup_zephyr(5) + zephyr = Zephyr() + zephyr.generate_entityset(**self.__class__.kwargs["generate_entityset"]) + zephyr.generate_label_times(**self.__class__.kwargs["generate_label_times"]) + zephyr.generate_feature_matrix_and_labels(**self.__class__.kwargs["generate_feature_matrix_and_labels"]) + zephyr.generate_train_test_split(**self.__class__.kwargs["generate_train_test_split"]) train_test_split = zephyr.get_train_test_split() assert train_test_split is not None - - def setup_zephyr_with_base_split(self, step_num): - zephyr = self.setup_zephyr(4) - zephyr.set_train_test_split(*self.base_train_test_split()) - for i in range(5, step_num): - setters, getters = zephyr.step_order[i] - setter = setters[0] - kwargs = self.kwargs[setter.__name__] - getattr(zephyr, setter.__name__)(**kwargs) - return zephyr - + X_train, X_test, y_train, y_test = train_test_split + assert isinstance(X_train, pd.DataFrame) + assert isinstance(X_test, pd.DataFrame) + assert isinstance(y_train, pd.Series) + assert isinstance(y_test, pd.Series) + def test_set_train_test_split(self): - zephyr = self.setup_zephyr_with_base_split(5) - assert zephyr.get_train_test_split is not None - - def test_set_and_fit_pipeline_no_visual(self): - zephyr = self.setup_zephyr_with_base_split(5) - output = zephyr.set_and_fit_pipeline() + zephyr = Zephyr() + zephyr.generate_entityset(**self.__class__.kwargs["generate_entityset"]) + zephyr.generate_label_times(**self.__class__.kwargs["generate_label_times"]) + zephyr.generate_feature_matrix_and_labels(**self.__class__.kwargs["generate_feature_matrix_and_labels"]) + zephyr.set_train_test_split(*self.base_train_test_split()) + train_test_split = zephyr.get_train_test_split() + assert train_test_split is not None + X_train, X_test, y_train, y_test = train_test_split + assert isinstance(X_train, pd.DataFrame) + assert isinstance(X_test, pd.DataFrame) + assert isinstance(y_train, list) + assert isinstance(y_test, list) + + def test_fit_pipeline_no_visual(self): + zephyr = Zephyr() + zephyr.generate_entityset(**self.__class__.kwargs["generate_entityset"]) + zephyr.generate_label_times(**self.__class__.kwargs["generate_label_times"]) + zephyr.generate_feature_matrix_and_labels(**self.__class__.kwargs["generate_feature_matrix_and_labels"]) + zephyr.set_train_test_split(*self.base_train_test_split()) + output = zephyr.fit_pipeline(**self.__class__.kwargs["fit_pipeline"]) assert output is None - pipeline = zephyr.get_pipeline() + pipeline = zephyr.get_fitted_pipeline() assert pipeline is not None - pipeline_hyperparameters = zephyr.get_pipeline_hyperparameters() - assert pipeline_hyperparameters is not None - - def test_set_and_fit_pipeline_visual(self): - zephyr = self.setup_zephyr_with_base_split(5) - output = zephyr.set_and_fit_pipeline(visual = True) + + def test_fit_pipeline_visual(self): + zephyr = Zephyr() + zephyr.generate_entityset(**self.__class__.kwargs["generate_entityset"]) + zephyr.generate_label_times(**self.__class__.kwargs["generate_label_times"]) + zephyr.generate_feature_matrix_and_labels(**self.__class__.kwargs["generate_feature_matrix_and_labels"]) + zephyr.set_train_test_split(*self.base_train_test_split()) + output = zephyr.fit_pipeline(visual=True, **self.__class__.kwargs["fit_pipeline"]) assert isinstance(output, dict) assert list(output.keys()) == ['threshold', 'scores'] - pipeline = zephyr.get_pipeline() + pipeline = zephyr.get_fitted_pipeline() assert pipeline is not None - pipeline_hyperparameters = zephyr.get_pipeline_hyperparameters() - assert pipeline_hyperparameters is not None - def test_predict_no_visual(self): - zephyr = self.setup_zephyr_with_base_split(6) + zephyr = Zephyr() + zephyr.set_train_test_split(*self.base_train_test_split()) + zephyr.fit_pipeline(**self.__class__.kwargs["fit_pipeline"]) predicted = zephyr.predict() _, _, _, test_y = self.base_train_test_split() - assert test_y == predicted + print(predicted) + assert predicted == test_y def test_predict_visual(self): - zephyr = self.setup_zephyr_with_base_split(6) - predicted, output = zephyr.predict(visual = True) - - assert self.test_y == predicted - - # visualization + zephyr = Zephyr() + zephyr.generate_entityset(**self.__class__.kwargs["generate_entityset"]) + zephyr.generate_label_times(**self.__class__.kwargs["generate_label_times"]) + zephyr.generate_feature_matrix_and_labels(**self.__class__.kwargs["generate_feature_matrix_and_labels"]) + zephyr.set_train_test_split(*self.base_train_test_split()) + zephyr.fit_pipeline(**self.__class__.kwargs["fit_pipeline"]) + predicted, output = zephyr.predict(visual=True) + assert isinstance(predicted, list) + assert len(predicted) == len(self.test_y) assert isinstance(output, dict) assert list(output.keys()) == ['threshold', 'scores'] - def test_evaluate(self): - zephyr = self.setup_zephyr_with_base_split(6) - scores = pd.Series(zephyr.evaluate(metrics = ["sklearn.metrics.accuracy_score", + zephyr = Zephyr() + zephyr.generate_entityset(**self.__class__.kwargs["generate_entityset"]) + zephyr.generate_label_times(**self.__class__.kwargs["generate_label_times"]) + zephyr.generate_feature_matrix_and_labels(**self.__class__.kwargs["generate_feature_matrix_and_labels"]) + zephyr.set_train_test_split(*self.base_train_test_split()) + zephyr.fit_pipeline(**self.__class__.kwargs["fit_pipeline"]) + scores = zephyr.evaluate(metrics=[ + "sklearn.metrics.accuracy_score", "sklearn.metrics.precision_score", "sklearn.metrics.f1_score", - "sklearn.metrics.recall_score"])) + "sklearn.metrics.recall_score" + ]) - expected = pd.Series({ - "sklearn.metrics.accuracy_score": 1.0, - "sklearn.metrics.precision_score": 1.0, - "sklearn.metrics.f1_score": 1.0, - "sklearn.metrics.recall_score": 1.0 - }) - pd.testing.assert_series_equal(expected, scores) + assert isinstance(scores, dict) + assert all(metric in scores for metric in [ + "sklearn.metrics.accuracy_score", + "sklearn.metrics.precision_score", + "sklearn.metrics.f1_score", + "sklearn.metrics.recall_score" + ]) + + def test_get_entityset_types(self): + zephyr = Zephyr() + entityset_types = zephyr.GET_ENTITYSET_TYPES() + # Check that it returns a dictionary + assert isinstance(entityset_types, dict) - - - - - - # def setup_method(self): - # self.zephyr = Zephyr('xgb_classifier') - - # def test_hyperparameters(self): - # hyperparameters = { - # "xgboost.XGBClassifier#1": { - # "max_depth": 2 - # }, - # "zephyr_ml.primitives.postprocessing.FindThreshold#1": { - # "metric": "precision" - # } - # } - - # zephyr = Zephyr('xgb_classifier', hyperparameters) - - # assert zephyr._hyperparameters == hyperparameters - - # def test_json(self): - # file = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) - # json_zephyr = Zephyr(os.path.join(file, 'zephyr_ml', 'pipelines', 'xgb_classifier.json')) - - # json_zephyr_hyperparameters = json_zephyr._mlpipeline.get_hyperparameters() - # zephyr_hyperparameters = self.zephyr._mlpipeline.get_hyperparameters() - # assert json_zephyr_hyperparameters == zephyr_hyperparameters - - # def test_fit(self): - # self.zephyr.fit(self.train, self.train_y) - - # def test_fit_visual(self): - # output = self.zephyr.fit(self.train, self.train_y, visual=True) - - # assert isinstance(output, dict) - # assert list(output.keys()) == ['threshold', 'scores'] - - # def test_fit_no_visual(self): - # zephyr = Zephyr(['xgboost.XGBClassifier']) - - # output = zephyr.fit(self.train, self.train_y, visual=True) - # assert output is None - - # def test_predict(self): - # self.zephyr.fit(self.train, self.train_y) - - # predicted = self.zephyr.predict(self.test) - - # assert self.test_y == predicted - - # def test_predict_visual(self): - # self.zephyr.fit(self.train, self.train_y) - - # predicted, output = self.zephyr.predict(self.test, visual=True) - - # # predictions - # assert self.test_y == predicted - - # # visualization - # assert isinstance(output, dict) - # assert list(output.keys()) == ['threshold', 'scores'] - - # def test_predict_no_visual(self): - # zephyr = Zephyr(['xgboost.XGBClassifier']) - - # zephyr.fit(self.train, self.train_y) - - # predicted = zephyr.predict(self.test, visual=True) - # assert len(self.test_y) == len(predicted) - - # def test_fit_predict(self): - # predicted = self.zephyr.fit_predict(self.random, self.random_y) - - # assert isinstance(predicted, list) - - # def test_save_load(self, tmpdir): - # path = os.path.join(tmpdir, 'some_path.pkl') - # self.zephyr.save(path) - - # new_zephyr = Zephyr.load(path) - # assert new_zephyr == self.zephyr - - # def test_load_failed(self, tmpdir): - # path = os.path.join(tmpdir, 'some_path.pkl') - # os.makedirs(os.path.dirname(path), exist_ok=True) - # with open(path, 'wb') as pickle_file: - # pickle.dump("something", pickle_file) - - # with pytest.raises(ValueError): - # Zephyr.load(path) - - # def test_evaluate(self): - # self.zephyr.fit(self.test, self.test_y) - # scores = self.zephyr.evaluate(X=self.test, y=self.test_y) - - # expected = pd.Series({ - # 'accuracy': 1.0, - # 'f1': 1.0, - # 'recall': 1.0, - # 'precision': 1.0, - # }) - # pd.testing.assert_series_equal(expected, scores) - - # def test_evaluate_fit(self): - # scores = self.zephyr.evaluate( - # X=self.test, - # y=self.test_y, - # fit=True, - # ) - - # expected = pd.Series({ - # 'accuracy': 1.0, - # 'f1': 1.0, - # 'recall': 1.0, - # 'precision': 1.0, - # }) - # pd.testing.assert_series_equal(expected, scores) - - # def test_evaluate_previously_fitted_with_fit_true(self): - # self.zephyr.fit(self.train, self.train_y) - - # scores = self.zephyr.evaluate( - # X=self.test, - # y=self.test_y, - # fit=True - # ) - - # expected = pd.Series({ - # 'accuracy': 1.0, - # 'f1': 1.0, - # 'recall': 1.0, - # 'precision': 1.0, - # }) - # pd.testing.assert_series_equal(expected, scores) - - # def test_evaluate_train_data(self): - # scores = self.zephyr.evaluate( - # X=self.test, - # y=self.test_y, - # fit=True, - # train_X=self.train, - # train_y=self.train_y - # ) - - # expected = pd.Series({ - # 'accuracy': 1.0, - # 'f1': 1.0, - # 'recall': 1.0, - # 'precision': 1.0, - # }) - # pd.testing.assert_series_equal(expected, scores) + # Check that it contains expected keys + assert "pidata" in entityset_types + assert "scada" in entityset_types + assert "vibrations" in entityset_types + + # Check structure of returned data + for es_type, info in entityset_types.items(): + assert isinstance(info, dict) + assert "obj" in info + assert "desc" in info + assert isinstance(info["obj"], str) + assert isinstance(info["desc"], str) + + def test_get_labeling_functions(self): + zephyr = Zephyr() + labeling_functions = zephyr.GET_LABELING_FUNCTIONS() + + # Check that it returns a dictionary + assert isinstance(labeling_functions, dict) + + # Check that it contains expected labeling functions + assert "brake_pad_presence" in labeling_functions + + # Check structure of returned data + for func_name, info in labeling_functions.items(): + assert isinstance(info, dict) + assert "obj" in info + assert "desc" in info + assert callable(info["obj"]) + assert isinstance(info["desc"], str) + + def test_get_evaluation_metrics(self): + zephyr = Zephyr() + evaluation_metrics = zephyr.GET_EVALUATION_METRICS() + + # Check that it returns a dictionary + assert isinstance(evaluation_metrics, dict) + + # Check that it contains expected metrics + expected_metrics = DEFAULT_METRICS + + for metric in expected_metrics: + assert metric in evaluation_metrics + + # Check structure of returned data + for metric_name, info in evaluation_metrics.items(): + assert isinstance(info, dict) + assert "obj" in info + assert "desc" in info + assert isinstance(info["obj"], MLBlock) + assert hasattr(info["obj"], "metadata") + assert isinstance(info["desc"], str) + + # def test_guide_handler_warnings(self): + # zephyr = Zephyr() + + # # Test skipping steps warning + # with pytest.warns(UserWarning, match="You are skipping the following steps"): + # zephyr.generate_feature_matrix_and_labels(**self.kwargs["generate_feature_matrix_and_labels"]) + + # # Test stale data warning + # with pytest.warns(UserWarning, match="This data is potentially stale"): + # zephyr.get_entityset() + + # # Test inconsistent state warning + # with pytest.warns(UserWarning, match="Unable to perform"): + # zephyr.get_label_times() diff --git a/zephyr_ml/core.py b/zephyr_ml/core.py index 0a0e169..87179c6 100644 --- a/zephyr_ml/core.py +++ b/zephyr_ml/core.py @@ -91,9 +91,9 @@ def get_steps_in_between(self, cur_step, next_step): step_strs.append(f"{step}. {' or '.join(option_strs)}") return step_strs - def perform_producer_step(self, method, *method_args, **method_kwargs): + def perform_producer_step(self, zephyr, method, *method_args, **method_kwargs): step_num = self.producer_to_step_map[method.__name__] - res = method(*method_args, **method_kwargs) + res = method(zephyr, *method_args, **method_kwargs) self.current_step = step_num self.terms[step_num] = self.cur_term return res @@ -151,18 +151,18 @@ def log_get_stale_warning(self, name, next_step): # tries to perform step if possible -> warns that data might be stale - def try_perform_forward_producer_step(self, method, *method_args, **method_kwargs): + def try_perform_forward_producer_step(self, zephyr, method, *method_args, **method_kwargs): name = method.__name__ next_step = self.producer_to_step_map[name] if name in self.set_methods: self.try_log_skipping_steps_warning(name, next_step) # next_step == 0, set method (already warned), or previous step is up to term - res = self.perform_producer_step(method, *method_args, **method_kwargs) + res = self.perform_producer_step(zephyr, method, *method_args, **method_kwargs) return res # next_step == 0, set method, or previous step is up to term - def try_perform_backward_producer_step(self, method, *method_args, **method_kwargs): + def try_perform_backward_producer_step(self, zephyr, method, *method_args, **method_kwargs): name = method.__name__ next_step = self.producer_to_step_map[name] self.try_log_making_stale_warning(next_step) @@ -170,23 +170,23 @@ def try_perform_backward_producer_step(self, method, *method_args, **method_kwar for i in range(0, next_step): if self.terms[i] != -1: self.terms[i] = self.cur_term - res = self.perform_producer_step(method, *method_args, **method_kwargs) + res = self.perform_producer_step(zephyr, method, *method_args, **method_kwargs) return res - def try_perform_producer_step(self, method, *method_args, **method_kwargs): + def try_perform_producer_step(self, zephyr, method, *method_args, **method_kwargs): name = method.__name__ next_step = self.producer_to_step_map[name] if next_step >= self.current_step: - res = self.try_perform_forward_producer_step(method, *method_args, **method_kwargs) + res = self.try_perform_forward_producer_step(zephyr, method, *method_args, **method_kwargs) return res else: - res = self.try_perform_backward_producer_step(method, *method_args, **method_kwargs) + res = self.try_perform_backward_producer_step(zephyr, method, *method_args, **method_kwargs) return res # dont update current step or terms - def try_perform_stale_or_inconsistent_producer_step(self, method, *method_args, **method_kwargs): + def try_perform_stale_or_inconsistent_producer_step(self, zephyr, method, *method_args, **method_kwargs): name = method.__name__ next_step = self.producer_to_step_map[name] if self.terms[next_step-1] == -1: #inconsistent @@ -197,7 +197,7 @@ def try_perform_stale_or_inconsistent_producer_step(self, method, *method_args, # no not possible b/c if there is a current iteration after this step, it must have updated this step's iteration # self.try_log_using_stale_warning(name, next_step) - res = self.perform_producer_step(method, *method_args, **method_kwargs) + res = self.perform_producer_step(zephyr, method, *method_args, **method_kwargs) return res @@ -205,7 +205,7 @@ def try_perform_stale_or_inconsistent_producer_step(self, method, *method_args, - def try_perform_getter_step(self, method, *method_args, **method_kwargs): + def try_perform_getter_step(self, zephyr, method, *method_args, **method_kwargs): name = method.__name__ # either inconsistent, stale, or up to date step_num = self.getter_to_step_map[name] @@ -213,11 +213,11 @@ def try_perform_getter_step(self, method, *method_args, **method_kwargs): if step_term == -1: self.log_get_inconsistent_warning(step_num) elif step_term == self.cur_term: - res = method(*method_args, **method_kwargs) + res = method(zephyr, *method_args, **method_kwargs) return res else: self.log_get_stale_warning(step_num) - res = method(*method_args, **method_kwargs) + res = method(zephyr, *method_args, **method_kwargs) return res @@ -225,19 +225,19 @@ def try_perform_getter_step(self, method, *method_args, **method_kwargs): - def guide_step(self, method, *method_args, **method_kwargs): + def guide_step(self, zephyr, method, *method_args, **method_kwargs): method_name = method.__name__ if method_name in self.producer_to_step_map: #up-todate next_step = self.producer_to_step_map[method_name] if method_name in self.set_methods or next_step == 0 or self.terms[next_step-1] == self.cur_term: - res = self.try_perform_producer_step(method, *method_args, **method_kwargs) + res = self.try_perform_producer_step(zephyr, method, *method_args, **method_kwargs) return res else: #stale or inconsistent - res = self.try_perform_stale_or_inconsistent_producer_step(method, *method_args, **method_kwargs) + res = self.try_perform_stale_or_inconsistent_producer_step(zephyr, method, *method_args, **method_kwargs) return res elif method_name in self.getter_to_step_map: - res = self.try_perform_getter_step(method, *method_args, **method_kwargs) + res = self.try_perform_getter_step(zephyr, method, *method_args, **method_kwargs) return res else: print(f"Method {method_name} does not need to be wrapped") @@ -258,7 +258,7 @@ def guide(method): @wraps(method) def guided_step(self, *method_args, **method_kwargs): - return self.guide_handler.guide_step(method, *method_args, **method_kwargs) + return self.guide_handler.guide_step(self, method, *method_args, **method_kwargs) return guided_step @@ -280,9 +280,8 @@ def __init__(self): - self.current_step = -1 # tuple of 2 arrays: producers and attributes - self.step_order = [ + step_order = [ ([self.generate_entityset, self.set_entityset], [self.get_entityset]), ([self.generate_label_times, self.set_label_times], [self.get_label_times]), ([self.generate_feature_matrix_and_labels, self.set_feature_matrix_and_labels], [self.get_feature_matrix_and_labels]), @@ -290,8 +289,8 @@ def __init__(self): ([self.fit_pipeline, self.set_fitted_pipeline], [self.get_fitted_pipeline]), ([self.predict, self.evaluate], []) ] - self.set_methods = set([self.set_entityset.__name__, self.set_label_times.__name__, self.set_feature_matrix_and_labels.__name__, self.set_train_test_split.__name__, self.set_fitted_pipeline.__name__]) - self.guide_handler = GuideHandler(self.step_order, self.set_methods) + set_methods = set([self.set_entityset.__name__, self.set_label_times.__name__, self.set_feature_matrix_and_labels.__name__, self.set_train_test_split.__name__, self.set_fitted_pipeline.__name__]) + self.guide_handler = GuideHandler(step_order, set_methods) @@ -601,7 +600,6 @@ def predict(self, X=None, visual=False, **kwargs): outputs = self.pipeline.predict(X, output_=outputs_spec, **kwargs) print(outputs) - if visual and visual_names: prediction = outputs[0] return prediction, dict(zip(visual_names, outputs[-len(visual_names) :])) diff --git a/zephyr_ml/pipelines/xgb_classifier.json b/zephyr_ml/pipelines/xgb_classifier.json index 47a0bb1..28fd0d5 100644 --- a/zephyr_ml/pipelines/xgb_classifier.json +++ b/zephyr_ml/pipelines/xgb_classifier.json @@ -1,34 +1,34 @@ { - "metadata": { - "name": "xgb", - "data_type": "single_table", - "task_type": "classification" - }, - "primitives": [ - "xgboost.XGBClassifier", - "zephyr_ml.primitives.postprocessing.FindThreshold" - ], - "input_names": { - "zephyr_ml.primitives.postprocessing.FindThreshold#1": { - "y_true": "y" - } - }, - "outputs": { - "default": [ - { - "name": "y", - "variable": "zephyr_ml.primitives.postprocessing.FindThreshold#1.y" - } - ], - "visual": [ - { - "name": "threshold", - "variable": "zephyr_ml.primitives.postprocessing.FindThreshold#1.threshold" - }, - { - "name": "scores", - "variable": "zephyr_ml.primitives.postprocessing.FindThreshold#1.scores" - } - ] + "metadata": { + "name": "xgb", + "data_type": "single_table", + "task_type": "classification" + }, + "primitives": [ + "xgboost.XGBClassifier", + "zephyr_ml.primitives.postprocessing.FindThreshold" + ], + "input_names": { + "zephyr_ml.primitives.postprocessing.FindThreshold#1": { + "y_true": "y" } + }, + "outputs": { + "default": [ + { + "name": "y_pred", + "variable": "zephyr_ml.primitives.postprocessing.FindThreshold#1.y_pred" + } + ], + "visual": [ + { + "name": "threshold", + "variable": "zephyr_ml.primitives.postprocessing.FindThreshold#1.threshold" + }, + { + "name": "scores", + "variable": "zephyr_ml.primitives.postprocessing.FindThreshold#1.scores" + } + ] + } } From 651619ba501b847f62bc9ed700fd67c7a4f7a1f8 Mon Sep 17 00:00:00 2001 From: Raymond Pan Date: Tue, 13 May 2025 09:16:40 -0400 Subject: [PATCH 23/28] fix tests --- README.md | 182 +++-- notebooks/feature_engineering.ipynb | 379 +++++----- notebooks/modeling.ipynb | 654 ++++++++++++++++-- notebooks/visualization.ipynb | 134 +++- setup.py | 6 +- tests/test_core.py | 127 +--- tests/test_entityset.py | 8 +- zephyr_ml/core.py | 505 +++++++------- zephyr_ml/core_prev.py | 2 +- zephyr_ml/entityset.py | 7 +- zephyr_ml/feature_engineering.py | 3 +- zephyr_ml/labeling/__init__.py | 2 - ...tives.postprocessing.confusion_matrix.json | 94 ++- ...ostprocessing.roc_auc_score_and_curve.json | 95 ++- zephyr_ml/primitives/postprocessing.py | 58 +- 15 files changed, 1439 insertions(+), 817 deletions(-) diff --git a/README.md b/README.md index 8ecbdf6..f4dc861 100644 --- a/README.md +++ b/README.md @@ -13,26 +13,26 @@ A machine learning library for assisting in the generation of machine learning problems for wind farms operations data by analyzing past occurrences of events. - | Important Links | | - | ----------------------------------- | -------------------------------------------------------------------- | - | :computer: **[Website]** | Check out the Sintel Website for more information about the project. | - | :book: **[Documentation]** | Quickstarts, User and Development Guides, and API Reference. | - | :star: **[Tutorials]** | Checkout our notebooks | - | :octocat: **[Repository]** | The link to the Github Repository of this library. | - | :scroll: **[License]** | The repository is published under the MIT License. | - | :keyboard: **[Development Status]** | This software is in its Pre-Alpha stage. | - | ![][Slack Logo] **[Community]** | Join our Slack Workspace for announcements and discussions. | - - [Website]: https://sintel.dev/ - [Documentation]: https://dtail.gitbook.io/zephyr/ - [Repository]: https://github.com/sintel-dev/Zephyr - [Tutorials]: https://github.com/sintel-dev/Zephyr/blob/master/notebooks - [License]: https://github.com/sintel-dev/Zephyr/blob/master/LICENSE - [Development Status]: https://pypi.org/search/?c=Development+Status+%3A%3A+2+-+Pre-Alpha - [Community]: https://join.slack.com/t/sintel-space/shared_invite/zt-q147oimb-4HcphcxPfDAM0O9_4PaUtw - [Slack Logo]: https://github.com/sintel-dev/Orion/blob/master/docs/images/slack.png - - - Homepage: https://github.com/signals-dev/zephyr +| Important Links | | +| ----------------------------------- | -------------------------------------------------------------------- | +| :computer: **[Website]** | Check out the Sintel Website for more information about the project. | +| :book: **[Documentation]** | Quickstarts, User and Development Guides, and API Reference. | +| :star: **[Tutorials]** | Checkout our notebooks | +| :octocat: **[Repository]** | The link to the Github Repository of this library. | +| :scroll: **[License]** | The repository is published under the MIT License. | +| :keyboard: **[Development Status]** | This software is in its Pre-Alpha stage. | +| ![][Slack Logo] **[Community]** | Join our Slack Workspace for announcements and discussions. | + +[Website]: https://sintel.dev/ +[Documentation]: https://dtail.gitbook.io/zephyr/ +[Repository]: https://github.com/sintel-dev/Zephyr +[Tutorials]: https://github.com/sintel-dev/Zephyr/blob/master/notebooks +[License]: https://github.com/sintel-dev/Zephyr/blob/master/LICENSE +[Development Status]: https://pypi.org/search/?c=Development+Status+%3A%3A+2+-+Pre-Alpha +[Community]: https://join.slack.com/t/sintel-space/shared_invite/zt-q147oimb-4HcphcxPfDAM0O9_4PaUtw +[Slack Logo]: https://github.com/sintel-dev/Orion/blob/master/docs/images/slack.png + +- Homepage: https://github.com/signals-dev/zephyr # Overview @@ -42,17 +42,17 @@ occurrences of events. The main features of **Zephyr** are: -* **EntitySet creation**: tools designed to represent wind farm data and the relationship -between different tables. We have functions to create EntitySets for datasets with PI data -and datasets using SCADA data. -* **Labeling Functions**: a collection of functions, as well as tools to create custom versions -of them, ready to be used to analyze past operations data in the search for occurrences of -specific types of events in the past. -* **Prediction Engineering**: a flexible framework designed to apply labeling functions on -wind turbine operations data in a number of different ways to create labels for custom -Machine Learning problems. -* **Feature Engineering**: a guide to using Featuretools to apply automated feature engineerinig -to wind farm data. +- **EntitySet creation**: tools designed to represent wind farm data and the relationship + between different tables. We have functions to create EntitySets for datasets with PI data + and datasets using SCADA data. +- **Labeling Functions**: a collection of functions, as well as tools to create custom versions + of them, ready to be used to analyze past operations data in the search for occurrences of + specific types of events in the past. +- **Prediction Engineering**: a flexible framework designed to apply labeling functions on + wind turbine operations data in a number of different ways to create labels for custom + Machine Learning problems. +- **Feature Engineering**: a guide to using Featuretools to apply automated feature engineerinig + to wind farm data. # Install @@ -60,8 +60,7 @@ to wind farm data. **Zephyr** has been developed and runs on Python 3.8, 3.9, 3.10, 3.11 and 3.12. -Also, although it is not strictly required, the usage of a [virtualenv]( -https://virtualenv.pypa.io/en/latest/) is highly recommended in order to avoid interfering +Also, although it is not strictly required, the usage of a [virtualenv](https://virtualenv.pypa.io/en/latest/) is highly recommended in order to avoid interfering with other software installed in the system where you are trying to run **Zephyr**. ## Download and Install @@ -79,35 +78,38 @@ If you want to install from source or contribute to the project please read the # Quickstart In this short tutorial we will guide you through a series of steps that will help you -getting started with **Zephyr**. +getting started with **Zephyr**. For more detailed examples, please refer to the tutorial notebooks in the `notebooks` directory: + +- `feature_engineering.ipynb`: Learn how to create EntitySets and perform feature engineering +- `modeling.ipynb`: Learn how to train and evaluate models +- `visualization.ipynb`: Learn how to visualize your data and results ## 1. Loading the data -The first step we will be to use preprocessed data to create an EntitySet. Depending on the -type of data, we will either the `zephyr_ml.create_pidata_entityset` or `zephyr_ml.create_scada_entityset` -functions. +The first step will be to use preprocessed data to create an EntitySet. Depending on the +type of data, we will use either the `generate_entityset` function with `es_type="pidata"`, `es_type="scada"` or `es_type="vibrations"`. **NOTE**: if you cloned the **Zephyr** repository, you will find some demo data inside the -`notebooks/data` folder which has been preprocessed to fit the `create_entityset` data -requirements. +`notebooks/data` folder which has been preprocessed to fit the data requirements. -```python3 +```python import os import pandas as pd -from zephyr_ml import create_scada_entityset +from zephyr_ml import Zephyr data_path = 'notebooks/data' data = { - 'turbines': pd.read_csv(os.path.join(data_path, 'turbines.csv')), - 'alarms': pd.read_csv(os.path.join(data_path, 'alarms.csv')), - 'work_orders': pd.read_csv(os.path.join(data_path, 'work_orders.csv')), - 'stoppages': pd.read_csv(os.path.join(data_path, 'stoppages.csv')), - 'notifications': pd.read_csv(os.path.join(data_path, 'notifications.csv')), - 'scada': pd.read_csv(os.path.join(data_path, 'scada.csv')) + 'turbines': pd.read_csv(os.path.join(data_path, 'turbines.csv')), + 'alarms': pd.read_csv(os.path.join(data_path, 'alarms.csv')), + 'work_orders': pd.read_csv(os.path.join(data_path, 'work_orders.csv')), + 'stoppages': pd.read_csv(os.path.join(data_path, 'stoppages.csv')), + 'notifications': pd.read_csv(os.path.join(data_path, 'notifications.csv')), + 'scada': pd.read_csv(os.path.join(data_path, 'scada.csv')) } -scada_es = create_scada_entityset(data) +zephyr = Zephyr() +scada_es = zephyr.generate_entityset(data, es_type="scada") ``` This will load the turbine, alarms, stoppages, work order, notifications, and SCADA data, and return it @@ -132,15 +134,10 @@ Entityset: SCADA data ## 2. Selecting a Labeling Function -The second step will be to choose an adequate **Labeling Function**. - -We can see the list of available labeling functions using the `zephyr_ml.labeling.get_labeling_functions` -function. - -```python3 -from zephyr_ml import labeling +The second step will be to choose an adequate **Labeling Function**. We can see the list of available labeling functions using the `GET_LABELING_FUNCTIONS` method. -labeling.get_labeling_functions() +```python +labeling_functions = zephyr.GET_LABELING_FUNCTIONS() ``` This will return us a dictionary with the name and a short description of each available @@ -158,14 +155,14 @@ amount of power lost over a slice of time. ## 3. Generate Target Times Once we have loaded the data and the Labeling Function, we are ready to start using -the `zephyr_ml.generate_labels` function to generate a Target Times table. +the `generate_label_times` function to generate a Target Times table. - -```python3 -from zephyr_ml import DataLabeler - -data_labeler = DataLabeler(labeling.labeling_functions.total_power_loss) -target_times, metadata = data_labeler.generate_label_times(scada_es) +```python +target_times, metadata = zephyr.generate_label_times( + labeling_fn="total_power_loss", # or any other labeling function name + num_samples=10, + gap="20d" +) ``` This will return us a `compose.LabelTimes` containing the three columns required to start @@ -177,17 +174,16 @@ working on a Machine Learning problem: the turbine ID (COD_ELEMENT), the cutoff ``` ## 4. Feature Engineering -Using EntitySets and LabelTimes allows us to easily use Featuretools for automatic feature generation. -```python3 -import featuretools as ft +Using EntitySets and LabelTimes allows us to easily use Featuretools for automatic feature generation. -feature_matrix, features = ft.dfs( - entityset=scada_es, - target_dataframe_name='turbines', +```python +feature_matrix, features, _ = zephyr.generate_feature_matrix( + target_dataframe_name="turbines", cutoff_time_in_index=True, - cutoff_time=target_times, - max_features=20 + agg_primitives=["count", "sum", "max"], + max_features = 20, + verbose=True ) ``` @@ -195,48 +191,46 @@ Then we get a list of features and the computed `feature_matrix`. ``` TURBINE_PI_ID TURBINE_LOCAL_ID TURBINE_SAP_COD DES_CORE_ELEMENT SITE DES_CORE_PLANT ... MODE(alarms.COD_STATUS) MODE(alarms.DES_NAME) MODE(alarms.DES_TITLE) NUM_UNIQUE(alarms.COD_ALARM) NUM_UNIQUE(alarms.COD_ALARM_INT) label -COD_ELEMENT time ... +COD_ELEMENT time ... 0 2022-01-01 TA00 A0 LOC000 T00 LOCATION LOC ... Alarm1 Alarm1 Description of alarm 1 1 1 45801.0 [1 rows x 21 columns] ``` - ## 5. Modeling -Once we have the feature matrix, we can train a model using the Zephyr interface where you can train, infer, and evaluate a pipeline. -First, we need to prepare our dataset for training by creating ``X`` and ``y`` variables and one-hot encoding features. +Once we have the feature matrix, we can train a model using the Zephyr interface. First, we need to prepare our dataset for training by creating a train-test split. -```python3 -y = list(feature_matrix.pop('label')) -X = pd.get_dummies(feature_matrix).values +```python +X_train, X_test, y_train, y_test = zephyr.generate_train_test_split( + test_size=0.2, + random_state=42 +) ``` -In this example, we will use an 'xgb' regression pipeline to predict total power loss. - -```python3 -from zephyr_ml import Zephyr +In this example, we will use an 'xgb' regression pipeline to predict total power loss. To train the pipeline, we simply call the `fit_pipeline` method. -pipeline_name = 'xgb_regressor' +```python +zephyr.fit_pipeline( + pipeline="xgb_regressor", + pipeline_hyperparameters=None, -zephyr = Zephyr(pipeline_name) +) ``` -To train the pipeline, we simply use the `fit` function. -```python3 -zephyr.fit(X, y) +After it finished training, we can make predictions using `predict` + +```python +y_pred = zephyr.predict(X_test) ``` -After it finished training, we can make prediciton using `predict` +We can also use `evaluate` to obtain the performance of the pipeline. -```python3 -y_pred = zephyr.predict(X) +```python +results = zephyr.evaluate() ``` -We can also use ``zephyr.evaluate`` to obtain the performance of the pipeline. - # What's Next? If you want to continue learning about **Zephyr** and all its -features please have a look at the tutorials found inside the [notebooks folder]( -https://github.com/signals-dev/zephyr/tree/main/notebooks). +features please have a look at the tutorials found inside the [notebooks folder](https://github.com/signals-dev/zephyr/tree/main/notebooks). diff --git a/notebooks/feature_engineering.ipynb b/notebooks/feature_engineering.ipynb index ea3c726..73667ef 100644 --- a/notebooks/feature_engineering.ipynb +++ b/notebooks/feature_engineering.ipynb @@ -6,10 +6,10 @@ "metadata": {}, "source": [ "# Feature Engineering\n", - "In this tutorial, we will show you how to use zephyr_ml to create EntitySets, generate label times, and do automated feature engineering. This tutorial assumes you have a folder with the mostly pre-processed data in seperate CSVs. If necessary, please update the steps and paths below.\n", + "In this tutorial, we will show you how to use `zephyr_ml`'s `Zephyr` class to create EntitySets, generate label times, and do automated feature engineering. This tutorial assumes you have a folder with the mostly pre-processed data in seperate CSVs. If necessary, please update the steps and paths below.\n", "\n", "## 1) Create EntitySet\n", - "zephyr_ml has strict assumptions about the data passed into its `create_pidata_entityset` and `create_scada_entityset` functions. It's the user's responsibility to apply the necessary pre-processing steps to get data into a format acceptable for zephyr_ml. \n", + "zephyr_ml has strict assumptions about the data passed into its `create_entityset` method. It's the user's responsibility to apply the necessary pre-processing steps to get data into a format acceptable for zephyr_ml. \n", "\n", "For example, the demo PI data needs to be converted to a tabular format instead of a `tag` `value` format. The `turbine` column also needs too be renamed to `COD_ELEMENT` to match the rest of the data." ] @@ -191,7 +191,7 @@ { "data": { "text/plain": [ - "Entityset: PI data\n", + "Entityset: pidata\n", " DataFrames:\n", " turbines [Rows: 1, Columns: 10]\n", " alarms [Rows: 2, Columns: 10]\n", @@ -213,8 +213,9 @@ } ], "source": [ - "from zephyr_ml import create_pidata_entityset\n", + "from zephyr_ml import Zephyr\n", "\n", + "zephyr = Zephyr()\n", "data = {\n", " 'turbines': pd.read_csv(path.join(data_path, 'turbines.csv')),\n", " 'alarms': pd.read_csv(path.join(data_path, 'alarms.csv')),\n", @@ -224,7 +225,7 @@ " 'pidata': pidata_df\n", "}\n", "\n", - "pidata_es = create_pidata_entityset(data)\n", + "pidata_es = zephyr.generate_entityset(dfs = data, es_type = \"pidata\")\n", "pidata_es" ] }, @@ -258,6 +259,34 @@ { "cell_type": "code", "execution_count": 5, + "id": "f00c300f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'brake_pad_presence': {'obj': ,\n", + " 'desc': 'Determines if brake pad present in stoppages.'},\n", + " 'converter_replacement_presence': {'obj': ,\n", + " 'desc': 'Calculates the converter replacement presence.'},\n", + " 'gearbox_replace_presence': {'obj': ,\n", + " 'desc': 'Determines if gearbox replacement/exchange is present in stoppages.'},\n", + " 'total_power_loss': {'obj': ,\n", + " 'desc': 'Calculates the total power loss over the data slice.'}}" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "zephyr.GET_LABELING_FUNCTIONS()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, "id": "e0ee16eb", "metadata": {}, "outputs": [ @@ -303,36 +332,47 @@ "0 0 2022-01-01 45801.0" ] }, - "execution_count": 5, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "from zephyr_ml import DataLabeler, labeling\n", - "\n", - "data_labeler = DataLabeler(labeling.total_power_loss)\n", - "\n", - "label_times, _ = data_labeler.generate_label_times(pidata_es)\n", + "label_times, _ = zephyr.generate_label_times(\"total_power_loss\")\n", "label_times" ] }, { "cell_type": "markdown", - "id": "ab8eefd3", + "id": "e26ac0cb", "metadata": {}, "source": [ - "## 3) Feature Engineering with SigPro\n", + "## 3) Feature Engineering with SigPro and Featuretools\n", "\n", - "Process signals with [SigPro](https://github.com/sintel-dev/SigPro) for PI signals or SCADA signals.\n", + "The feature engineering process in zephyr_ml combines signal processing with SigPro and automated feature generation with Featuretools into a single method, `generate_feature_matrix`. This unified approach allows for efficient processing of both time series signals and relational data." + ] + }, + { + "cell_type": "markdown", + "id": "a9a3f3a6", + "metadata": {}, + "source": [ + "### Signal Processing with SigPro\n", + "To perform signal processing in the `generate_feature_matrix` method, we pass in the following parameters:\n", + "- `signal_aggregations`: the specifications of the aggregation primitives\n", + "- `signal_transformations`: the specifications of the transformation priimitives\n", + "- `signal_dataframe_name`: the name of the dataframe whether `pidata` or `scada`.\n", + "- `signal_column`: the name of the signal column in the dataframe.\n", + "- `signal_window_size`: the size of the bin we want to process the signals over, e.g. each month.\n", + "- `signal_replace_dataframe`: an indicator whether we want to replace the current dataframe or add it as a new one.\n", "\n", - "Processing signals is done by specifying the `transformations` and `aggregations` we wish to apply to the data. To look at some of the primitives readily available, we use `get_primitives` function from `SigPro`." + "To look at some of the primitives readily available, we use `get_primitives` function from `SigPro`." ] }, { "cell_type": "code", - "execution_count": 6, - "id": "191a123a", + "execution_count": 7, + "id": "5dfdd53e", "metadata": {}, "outputs": [ { @@ -352,11 +392,12 @@ " 'sigpro.transformations.frequency.band.frequency_band',\n", " 'sigpro.transformations.frequency.fft.fft',\n", " 'sigpro.transformations.frequency.fft.fft_real',\n", + " 'sigpro.transformations.frequency.fftfreq.fft_freq',\n", " 'sigpro.transformations.frequency_time.stft.stft',\n", " 'sigpro.transformations.frequency_time.stft.stft_real']" ] }, - "execution_count": 6, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -369,7 +410,7 @@ }, { "cell_type": "markdown", - "id": "5b23aff6", + "id": "586cb217", "metadata": {}, "source": [ "Suppose we are interested in finding the amplitude mean for each month of readings in the signal. We first specify the `name` and respective `primitive` we want to apply for both `transformations` and `aggregations`.\n", @@ -379,17 +420,17 @@ }, { "cell_type": "code", - "execution_count": 7, - "id": "961af0ef", + "execution_count": 8, + "id": "bd00c9fc", "metadata": {}, "outputs": [], "source": [ - "aggregations = [{\n", + "signal_aggregations = [{\n", " \"name\":\"mean\",\n", " \"primitive\":\"sigpro.aggregations.amplitude.statistical.mean\"\n", "}]\n", "\n", - "transformations = [{\n", + "signal_transformations = [{\n", " \"name\":\"fft\",\n", " \"primitive\":\"sigpro.transformations.amplitude.identity.identity\"\n", "}]" @@ -397,33 +438,83 @@ }, { "cell_type": "markdown", - "id": "a9a3f3a6", + "id": "2520a27e", "metadata": {}, "source": [ - "We use `process_signals` function to accomplish our goal. We pass the following:\n", - "- `es`: the entityset we are working with.\n", - "- `signal_dataframe_name`: the name of the dataframe whether `pidata` or `scada`.\n", - "- `signal_column`: the name of the signal column in the dataframe.\n", - "- `window_size`: the size of the bin we want to process the signals over, e.g. each month.\n", - "- `replace_dataframe`: an indicator whether we want to replace the current dataframe or add it as a new one." + "### Automated Feature Generation with Featuretools\n", + "The `generate_feature_matrix` method also leverages Featuretools to automatically generate features from the previously generated EntitySet and use label times as cutoff times, ensuring temporal validity. For example, we can set interesting categorical values in our EntitySet and use them to generate aggregation features grouped by those interesting values. We can also set which primitives we want to use and control which columns and entities those primitives can be applied to. " ] }, { "cell_type": "code", - "execution_count": 8, - "id": "bea94368", + "execution_count": 9, + "id": "2a14d02c", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "/Users/sarah/anaconda3/envs/Zephyr/lib/python3.8/site-packages/numpy/core/fromnumeric.py:3474: RuntimeWarning: Mean of empty slice.\n", + "/Users/raymondpan/zephyr/Zephyr-repo/venv/lib/python3.8/site-packages/numpy/core/fromnumeric.py:3464: RuntimeWarning: Mean of empty slice.\n", " return _methods._mean(a, axis=axis, dtype=dtype,\n", - "/Users/sarah/anaconda3/envs/Zephyr/lib/python3.8/site-packages/numpy/core/_methods.py:189: RuntimeWarning: invalid value encountered in double_scalars\n", + "/Users/raymondpan/zephyr/Zephyr-repo/venv/lib/python3.8/site-packages/numpy/core/_methods.py:192: RuntimeWarning: invalid value encountered in scalar divide\n", " ret = ret.dtype.type(ret / rcount)\n" ] - }, + } + ], + "source": [ + "feature_matrix, features, processed_es =zephyr.generate_feature_matrix(\n", + " # signal processing parameters\n", + " signal_dataframe_name = \"pidata\",\n", + " signal_column = \"val1\",\n", + " signal_transformations = signal_transformations,\n", + " signal_aggregations = signal_aggregations,\n", + " signal_window_size = \"1m\",\n", + " signal_replace_dataframe = False,\n", + " \n", + " # feature generation parameters\n", + " target_dataframe_name = \"turbines\", \n", + " cutoff_time_in_index=True,\n", + " where_primitives=['count', 'sum'],\n", + " agg_primitives=['count', 'min', 'max', 'sum'],\n", + " trans_primitives=['num_words'],\n", + " ignore_dataframes=['notifications', 'work_orders'],\n", + " add_interesting_values = True,\n", + " interesting_dataframe_name = \"alarms\",\n", + " interesting_values = {'DES_NAME': ['Alarm1', 'Alarm2']}\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "7a77caa2", + "metadata": {}, + "source": [ + "`generate_feature_matrix` returns three outputs: `feature_matrix`, `features`, and `processed_es`. `processed_es` is a deepcopy of our Zephyr instance's original generated entityset, containing the signal processing and interesting values. `feature_matrix` is the generated feature matrix and `features` is a list of the generated features. " + ] + }, + { + "cell_type": "markdown", + "id": "d8474fa4", + "metadata": {}, + "source": [] + }, + { + "cell_type": "markdown", + "id": "3950f656", + "metadata": {}, + "source": [ + "Based on our original observations of `val1`, we now have `pidata_processed` with an entry for each month and the respective mean value of observations we see in that month.\n", + "\n", + "**Note**: in the months we don't have observations, the value becomes null." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "bea94368", + "metadata": {}, + "outputs": [ { "data": { "text/html": [ @@ -484,50 +575,19 @@ "2 2 0 2022-03-31 559" ] }, - "execution_count": 8, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "from zephyr_ml.feature_engineering import process_signals\n", - "\n", - "process_signals(es=pidata_es, \n", - " signal_dataframe_name='pidata', \n", - " signal_column='val1', \n", - " transformations=transformations, \n", - " aggregations=aggregations,\n", - " window_size='1m', \n", - " replace_dataframe=False)\n", - "\n", - "pidata_es['pidata_processed']" - ] - }, - { - "cell_type": "markdown", - "id": "fd88812a", - "metadata": {}, - "source": [ - "Based on our original observations of `val1`, we now have `pidata_processed` with an entry for each month and the respective mean value of observations we see in that month.\n", - "\n", - "**Note**: in the months we don't have observations, the value becomes null." - ] - }, - { - "cell_type": "markdown", - "id": "5aacf99b", - "metadata": {}, - "source": [ - "## 4) Feature Engineering with Featuretools\n", - "Using EntitySets and LabelTimes allows us to easily use Featuretools for automatic feature generation. For example, we can set interesting categorical values in our EntitySet and use them to generate aggregation features grouped by those interesting values. We can also set which primitives we want to use and control which columns and entities those primitives can be applied to. Featuretools can also use label times as cutoff times, ensuring that data after the label times is not used in feature generation. \n", - "\n", - "For additonal help using Featuretools, please see the documentation: https://featuretools.alteryx.com/en/stable/index.html" + "processed_es[\"pidata_processed\"]" ] }, { "cell_type": "code", - "execution_count": 9, - "id": "ee020300", + "execution_count": 11, + "id": "be788aaf", "metadata": {}, "outputs": [ { @@ -567,10 +627,10 @@ " ,\n", " ,\n", " ,\n", - " ,\n", " ,\n", - " ,\n", + " ,\n", " ,\n", + " ,\n", " ,\n", " ,\n", " ,\n", @@ -582,35 +642,19 @@ " ]" ] }, - "execution_count": 9, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "import featuretools as ft\n", - "\n", - "interesting_alarms = ['Alarm1', 'Alarm2']\n", - "pidata_es.add_interesting_values(dataframe_name='alarms', values={'DES_NAME': interesting_alarms})\n", - "\n", - "feature_matrix, features = ft.dfs(\n", - " entityset=pidata_es,\n", - " target_dataframe_name='turbines',\n", - " cutoff_time_in_index=True,\n", - " cutoff_time=label_times,\n", - " where_primitives=['count', 'sum'],\n", - " agg_primitives=['count', 'min', 'max', 'sum'],\n", - " trans_primitives=['num_words'],\n", - " ignore_dataframes=['notifications', 'work_orders'] \n", - ")\n", - "\n", "features" ] }, { "cell_type": "code", - "execution_count": 10, - "id": "bdce0acf", + "execution_count": 12, + "id": "d5b00ee9", "metadata": {}, "outputs": [ { @@ -635,26 +679,26 @@ " \n", " \n", " \n", - " TURBINE_PI_ID\n", - " TURBINE_LOCAL_ID\n", - " TURBINE_SAP_COD\n", - " DES_CORE_ELEMENT\n", - " SITE\n", - " DES_CORE_PLANT\n", - " COD_PLANT_SAP\n", - " PI_COLLECTOR_SITE_NAME\n", - " PI_LOCAL_SITE_NAME\n", " COUNT(alarms)\n", + " MAX(alarms.IND_DURATION)\n", + " MIN(alarms.IND_DURATION)\n", + " SUM(alarms.IND_DURATION)\n", + " COUNT(stoppages)\n", + " MAX(stoppages.COD_WO)\n", + " MAX(stoppages.IND_DURATION)\n", + " MAX(stoppages.IND_LOST_GEN)\n", + " MIN(stoppages.COD_WO)\n", + " MIN(stoppages.IND_DURATION)\n", " ...\n", - " MAX(stoppages.NUM_WORDS(DES_COMMENTS))\n", - " MAX(stoppages.NUM_WORDS(DES_DESCRIPTION))\n", - " MAX(stoppages.NUM_WORDS(DES_WO_NAME))\n", - " MIN(stoppages.NUM_WORDS(DES_COMMENTS))\n", - " MIN(stoppages.NUM_WORDS(DES_DESCRIPTION))\n", - " MIN(stoppages.NUM_WORDS(DES_WO_NAME))\n", - " SUM(stoppages.NUM_WORDS(DES_COMMENTS))\n", - " SUM(stoppages.NUM_WORDS(DES_DESCRIPTION))\n", - " SUM(stoppages.NUM_WORDS(DES_WO_NAME))\n", + " TURBINE_PI_ID_TA00\n", + " TURBINE_LOCAL_ID_A0\n", + " TURBINE_SAP_COD_LOC000\n", + " DES_CORE_ELEMENT_T00\n", + " SITE_LOCATION\n", + " DES_CORE_PLANT_LOC\n", + " COD_PLANT_SAP_ABC\n", + " PI_COLLECTOR_SITE_NAME_LOC0\n", + " PI_LOCAL_SITE_NAME_LOC0\n", " label\n", " \n", " \n", @@ -687,26 +731,26 @@ " \n", " 0\n", " 2022-01-01\n", - " TA00\n", - " A0\n", - " LOC000\n", - " T00\n", - " LOCATION\n", - " LOC\n", - " ABC\n", - " LOC0\n", - " LOC0\n", " 1\n", + " NaN\n", + " NaN\n", + " 0.0\n", + " 1\n", + " 12345.0\n", + " NaN\n", + " NaN\n", + " 12345.0\n", + " NaN\n", " ...\n", - " 4.0\n", - " 2.0\n", - " 3.0\n", - " 4.0\n", - " 2.0\n", - " 3.0\n", - " 4.0\n", - " 2.0\n", - " 3.0\n", + " 1\n", + " 1\n", + " 1\n", + " 1\n", + " 1\n", + " 1\n", + " 1\n", + " 1\n", + " 1\n", " 45801.0\n", " \n", " \n", @@ -715,62 +759,54 @@ "" ], "text/plain": [ - " TURBINE_PI_ID TURBINE_LOCAL_ID TURBINE_SAP_COD \\\n", - "COD_ELEMENT time \n", - "0 2022-01-01 TA00 A0 LOC000 \n", + " COUNT(alarms) MAX(alarms.IND_DURATION) \\\n", + "COD_ELEMENT time \n", + "0 2022-01-01 1 NaN \n", "\n", - " DES_CORE_ELEMENT SITE DES_CORE_PLANT \\\n", - "COD_ELEMENT time \n", - "0 2022-01-01 T00 LOCATION LOC \n", + " MIN(alarms.IND_DURATION) SUM(alarms.IND_DURATION) \\\n", + "COD_ELEMENT time \n", + "0 2022-01-01 NaN 0.0 \n", "\n", - " COD_PLANT_SAP PI_COLLECTOR_SITE_NAME \\\n", - "COD_ELEMENT time \n", - "0 2022-01-01 ABC LOC0 \n", - "\n", - " PI_LOCAL_SITE_NAME COUNT(alarms) ... \\\n", - "COD_ELEMENT time ... \n", - "0 2022-01-01 LOC0 1 ... \n", + " COUNT(stoppages) MAX(stoppages.COD_WO) \\\n", + "COD_ELEMENT time \n", + "0 2022-01-01 1 12345.0 \n", "\n", - " MAX(stoppages.NUM_WORDS(DES_COMMENTS)) \\\n", - "COD_ELEMENT time \n", - "0 2022-01-01 4.0 \n", + " MAX(stoppages.IND_DURATION) \\\n", + "COD_ELEMENT time \n", + "0 2022-01-01 NaN \n", "\n", - " MAX(stoppages.NUM_WORDS(DES_DESCRIPTION)) \\\n", - "COD_ELEMENT time \n", - "0 2022-01-01 2.0 \n", + " MAX(stoppages.IND_LOST_GEN) MIN(stoppages.COD_WO) \\\n", + "COD_ELEMENT time \n", + "0 2022-01-01 NaN 12345.0 \n", "\n", - " MAX(stoppages.NUM_WORDS(DES_WO_NAME)) \\\n", - "COD_ELEMENT time \n", - "0 2022-01-01 3.0 \n", + " MIN(stoppages.IND_DURATION) ... TURBINE_PI_ID_TA00 \\\n", + "COD_ELEMENT time ... \n", + "0 2022-01-01 NaN ... 1 \n", "\n", - " MIN(stoppages.NUM_WORDS(DES_COMMENTS)) \\\n", - "COD_ELEMENT time \n", - "0 2022-01-01 4.0 \n", + " TURBINE_LOCAL_ID_A0 TURBINE_SAP_COD_LOC000 \\\n", + "COD_ELEMENT time \n", + "0 2022-01-01 1 1 \n", "\n", - " MIN(stoppages.NUM_WORDS(DES_DESCRIPTION)) \\\n", - "COD_ELEMENT time \n", - "0 2022-01-01 2.0 \n", + " DES_CORE_ELEMENT_T00 SITE_LOCATION \\\n", + "COD_ELEMENT time \n", + "0 2022-01-01 1 1 \n", "\n", - " MIN(stoppages.NUM_WORDS(DES_WO_NAME)) \\\n", + " DES_CORE_PLANT_LOC COD_PLANT_SAP_ABC \\\n", "COD_ELEMENT time \n", - "0 2022-01-01 3.0 \n", + "0 2022-01-01 1 1 \n", "\n", - " SUM(stoppages.NUM_WORDS(DES_COMMENTS)) \\\n", - "COD_ELEMENT time \n", - "0 2022-01-01 4.0 \n", + " PI_COLLECTOR_SITE_NAME_LOC0 PI_LOCAL_SITE_NAME_LOC0 \\\n", + "COD_ELEMENT time \n", + "0 2022-01-01 1 1 \n", "\n", - " SUM(stoppages.NUM_WORDS(DES_DESCRIPTION)) \\\n", - "COD_ELEMENT time \n", - "0 2022-01-01 2.0 \n", - "\n", - " SUM(stoppages.NUM_WORDS(DES_WO_NAME)) label \n", - "COD_ELEMENT time \n", - "0 2022-01-01 3.0 45801.0 \n", + " label \n", + "COD_ELEMENT time \n", + "0 2022-01-01 45801.0 \n", "\n", "[1 rows x 48 columns]" ] }, - "execution_count": 10, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } @@ -781,11 +817,8 @@ } ], "metadata": { - "interpreter": { - "hash": "2d6fabd7bf745a21519616ebdce3b2479184204dadf576aa19f086ff78438203" - }, "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "venv", "language": "python", "name": "python3" }, @@ -799,7 +832,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.16" + "version": "3.8.0" } }, "nbformat": 4, diff --git a/notebooks/modeling.ipynb b/notebooks/modeling.ipynb index 69d26b3..4534722 100644 --- a/notebooks/modeling.ipynb +++ b/notebooks/modeling.ipynb @@ -7,7 +7,7 @@ "source": [ "# Modeling\n", "\n", - "In this tutorial, we will show you how to use `zephyr_ml` to train models using the `Zephyr` class. This tutorial builds on top of the previous one where we create EntitySets, generate label times, and do automated feature engineering. To do any of these previous steps, please refer to `feature_engineering` notebook.\n", + "In this tutorial, we will show you how to use `zephyr_ml`'s `Zephyr` class to train models. This tutorial builds on top of the previous one where we create EntitySets, generate label times, and do automated feature engineering. To do any of these previous steps, please refer to `feature_engineering` notebook.\n", "\n", "## 1) Load the Feature Matrix\n", "\n", @@ -19,58 +19,572 @@ "execution_count": 1, "id": "4a6724ad", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
COUNT(alarms)MAX(alarms.IND_DURATION)MIN(alarms.IND_DURATION)SUM(alarms.IND_DURATION)COUNT(stoppages)MAX(stoppages.COD_WO)MAX(stoppages.IND_DURATION)MAX(stoppages.IND_LOST_GEN)MIN(stoppages.COD_WO)MIN(stoppages.IND_DURATION)...DES_CORE_ELEMENT_T12DES_CORE_ELEMENT_T13DES_CORE_ELEMENT_T14DES_CORE_ELEMENT_T15SITE_LOCATIONDES_CORE_PLANT_LOCCOD_PLANT_SAP_ABCCOD_PLANT_SAP_XYZPI_COLLECTOR_SITE_NAME_LOC0PI_LOCAL_SITE_NAME_LOC0
01NaNNaN0.0112345.0NaNNaN12345.0NaN...0000111011
10NaNNaN0.0137452.0NaNNaN37452.0NaN...0000111011
20NaNNaN0.0123432.0NaNNaN23432.0NaN...0000111011
30NaNNaN0.0112452.0NaNNaN12452.0NaN...0000111011
40NaNNaN0.0132435.0NaNNaN32435.0NaN...0000111011
50NaNNaN0.0123534.0NaNNaN23534.0NaN...0000111011
60NaNNaN0.0165431.0NaNNaN65431.0NaN...0000111011
70NaNNaN0.0135742.0NaNNaN35742.0NaN...0000110111
80NaNNaN0.0121343.0NaNNaN21343.0NaN...0000110111
90NaNNaN0.0143565.0NaNNaN43565.0NaN...0000110111
100NaNNaN0.0124525.0NaNNaN24525.0NaN...0100110111
110NaNNaN0.0167432.0NaNNaN67432.0NaN...0010110111
120NaNNaN0.0121342.0NaNNaN21342.0NaN...0001110111
\n", + "

13 rows × 101 columns

\n", + "
" + ], + "text/plain": [ + " COUNT(alarms) MAX(alarms.IND_DURATION) MIN(alarms.IND_DURATION) \\\n", + "0 1 NaN NaN \n", + "1 0 NaN NaN \n", + "2 0 NaN NaN \n", + "3 0 NaN NaN \n", + "4 0 NaN NaN \n", + "5 0 NaN NaN \n", + "6 0 NaN NaN \n", + "7 0 NaN NaN \n", + "8 0 NaN NaN \n", + "9 0 NaN NaN \n", + "10 0 NaN NaN \n", + "11 0 NaN NaN \n", + "12 0 NaN NaN \n", + "\n", + " SUM(alarms.IND_DURATION) COUNT(stoppages) MAX(stoppages.COD_WO) \\\n", + "0 0.0 1 12345.0 \n", + "1 0.0 1 37452.0 \n", + "2 0.0 1 23432.0 \n", + "3 0.0 1 12452.0 \n", + "4 0.0 1 32435.0 \n", + "5 0.0 1 23534.0 \n", + "6 0.0 1 65431.0 \n", + "7 0.0 1 35742.0 \n", + "8 0.0 1 21343.0 \n", + "9 0.0 1 43565.0 \n", + "10 0.0 1 24525.0 \n", + "11 0.0 1 67432.0 \n", + "12 0.0 1 21342.0 \n", + "\n", + " MAX(stoppages.IND_DURATION) MAX(stoppages.IND_LOST_GEN) \\\n", + "0 NaN NaN \n", + "1 NaN NaN \n", + "2 NaN NaN \n", + "3 NaN NaN \n", + "4 NaN NaN \n", + "5 NaN NaN \n", + "6 NaN NaN \n", + "7 NaN NaN \n", + "8 NaN NaN \n", + "9 NaN NaN \n", + "10 NaN NaN \n", + "11 NaN NaN \n", + "12 NaN NaN \n", + "\n", + " MIN(stoppages.COD_WO) MIN(stoppages.IND_DURATION) ... \\\n", + "0 12345.0 NaN ... \n", + "1 37452.0 NaN ... \n", + "2 23432.0 NaN ... \n", + "3 12452.0 NaN ... \n", + "4 32435.0 NaN ... \n", + "5 23534.0 NaN ... \n", + "6 65431.0 NaN ... \n", + "7 35742.0 NaN ... \n", + "8 21343.0 NaN ... \n", + "9 43565.0 NaN ... \n", + "10 24525.0 NaN ... \n", + "11 67432.0 NaN ... \n", + "12 21342.0 NaN ... \n", + "\n", + " DES_CORE_ELEMENT_T12 DES_CORE_ELEMENT_T13 DES_CORE_ELEMENT_T14 \\\n", + "0 0 0 0 \n", + "1 0 0 0 \n", + "2 0 0 0 \n", + "3 0 0 0 \n", + "4 0 0 0 \n", + "5 0 0 0 \n", + "6 0 0 0 \n", + "7 0 0 0 \n", + "8 0 0 0 \n", + "9 0 0 0 \n", + "10 0 1 0 \n", + "11 0 0 1 \n", + "12 0 0 0 \n", + "\n", + " DES_CORE_ELEMENT_T15 SITE_LOCATION DES_CORE_PLANT_LOC \\\n", + "0 0 1 1 \n", + "1 0 1 1 \n", + "2 0 1 1 \n", + "3 0 1 1 \n", + "4 0 1 1 \n", + "5 0 1 1 \n", + "6 0 1 1 \n", + "7 0 1 1 \n", + "8 0 1 1 \n", + "9 0 1 1 \n", + "10 0 1 1 \n", + "11 0 1 1 \n", + "12 1 1 1 \n", + "\n", + " COD_PLANT_SAP_ABC COD_PLANT_SAP_XYZ PI_COLLECTOR_SITE_NAME_LOC0 \\\n", + "0 1 0 1 \n", + "1 1 0 1 \n", + "2 1 0 1 \n", + "3 1 0 1 \n", + "4 1 0 1 \n", + "5 1 0 1 \n", + "6 1 0 1 \n", + "7 0 1 1 \n", + "8 0 1 1 \n", + "9 0 1 1 \n", + "10 0 1 1 \n", + "11 0 1 1 \n", + "12 0 1 1 \n", + "\n", + " PI_LOCAL_SITE_NAME_LOC0 \n", + "0 1 \n", + "1 1 \n", + "2 1 \n", + "3 1 \n", + "4 1 \n", + "5 1 \n", + "6 1 \n", + "7 1 \n", + "8 1 \n", + "9 1 \n", + "10 1 \n", + "11 1 \n", + "12 1 \n", + "\n", + "[13 rows x 101 columns]" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "import pandas as pd\n", "\n", - "feature_matrix = pd.read_csv('data/feature_matrix.csv')" + "feature_matrix = pd.read_csv('data/feature_matrix.csv')\n", + "feature_matrix" ] }, { "cell_type": "markdown", - "id": "02e2c90a", + "id": "2be92488", "metadata": {}, "source": [ "## 2) Preparing Model Inputs\n", "\n", - "Prepare the data for modeling. Depending on the data, you might need to: normalize the data, impute missing values, create one-hot encodings for categorical values, etc.\n", + "Prepare the data for modeling. Depending on the data, you might need to: normalize the data, impute missing values, etc.\n", "\n", "In this part of the notebook, we do the following:\n", "* create `X` and `y` variables from the feature matrix\n", "* impute missing values using a SimpleImpute\n", - "* split the data into training and testing" + "* pass the data into our `Zephyr` instance and split the data into training and testing" ] }, { "cell_type": "code", "execution_count": 2, - "id": "20da6581", + "id": "b3be626a", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/raymondpan/zephyr/Zephyr-repo/venv/lib/python3.8/site-packages/sklearn/impute/_base.py:555: UserWarning: Skipping features without any observed values: [ 1 2 6 7 9 10 15 16 17 18]. At least one non-missing value is needed for imputation with strategy='mean'.\n", + " warnings.warn(\n", + "Performing set_feature_matrix. You are skipping the following steps:\n", + "0. generate_entityset or set_entityset\n", + "1. generate_label_times or set_label_times\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[True, False, True, False, True, False, False, False, False, True, False, False, False]\n" + ] + } + ], "source": [ "from sklearn.impute import SimpleImputer\n", - "from sklearn.model_selection import train_test_split\n", + "from zephyr_ml import Zephyr\n", "\n", "# pop the target labels\n", "y = list(feature_matrix.pop('label'))\n", + "print(y)\n", "X = feature_matrix.values\n", "\n", + "\n", "# impute missing values\n", "imputer = SimpleImputer()\n", - "X = imputer.fit_transform(X)\n", + "X = pd.DataFrame(imputer.fit_transform(X))\n", "\n", - "# create train and test splits\n", - "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=33)" + "zephyr = Zephyr()\n", + "zephyr.set_feature_matrix(X, labels = y)\n", + "X_train, X_test, y_train, y_test = zephyr.generate_train_test_split(test_size=0.2, random_state=33)" ] }, { "cell_type": "markdown", - "id": "32afe1aa", + "id": "3c8b00e2", "metadata": {}, "source": [ "## 3) Train a Model\n", "\n", - "We train a model using the `Zephyr` interface where you can train, infer, and evaluate a pipeline.\n", + "We train a model using `Zephyr`'s `fit_pipeline` method.\n", "In this notebook, we use an `xgb_classifier` pipeline which consists of two primitives:\n", "\n", "```\n", @@ -80,115 +594,163 @@ "\n", "An `XGBClassifier` primitive is an XGB model that returns the probability of each class, and `FindThreshold` primitive creates binary labels from the output of the XGB model by choosing a threshold that produces the best metric value (F1 Score by default)\n", "\n", - "To use a pipeline, we simply pass the name of the pipeline to `Zephyr`\n", + "To use a pipeline, we simply pass the name of the pipeline to our `Zephyr` instance.\n", "Optionally, you can change the default settings of the primitive by passing a hyperparameter dictionary. For example, we can change the number of trees in the classifier to be 50 instead of the default value (100)." ] }, { "cell_type": "code", "execution_count": 3, - "id": "b02986d9", + "id": "edffee03", "metadata": {}, "outputs": [], "source": [ - "from zephyr_ml import Zephyr\n", - "\n", "hyperparameters = {\n", " \"xgboost.XGBClassifier#1\": {\n", " \"n_estimators\": 50\n", " }\n", "}\n", "\n", - "zephyr = Zephyr('xgb_classifier', hyperparameters)" + "zephyr.fit_pipeline(pipeline = \"xgb_classifier\", pipeline_hyperparameters = hyperparameters)" ] }, { "cell_type": "markdown", - "id": "a1297396", + "id": "445afd22", "metadata": {}, "source": [ - "Then, training a pipeline can be done using the `fit` function and passing the training data" + "Now that the pipeline is trained, we can use it to predict the values of the test data using `predict` function\n" ] }, { "cell_type": "code", "execution_count": 4, - "id": "442c5258", + "id": "78187756", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "[1, 0, 1]" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "zephyr.fit(X_train, y_train)" + "zephyr.predict()" ] }, { "cell_type": "markdown", - "id": "8d4bf2cc", + "id": "24cda971", "metadata": {}, "source": [ - "Now that the pipeline is trained, we can use it to predict the values of the test data using `predict` function" + "Lastly, we can evaluate the performance of the pipeline using `evaluate` function\n" ] }, { "cell_type": "code", "execution_count": 5, - "id": "83814cd8", + "id": "cd097853", + "metadata": {}, + "outputs": [], + "source": [ + "res = zephyr.evaluate()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "8df0f26c", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "[1, 0, 1]" + "{'sklearn.metrics.accuracy_score': 0.6666666666666666,\n", + " 'sklearn.metrics.precision_score': 0.5,\n", + " 'sklearn.metrics.f1_score': 0.6666666666666666,\n", + " 'sklearn.metrics.recall_score': 1.0,\n", + " 'zephyr_ml.primitives.postprocessing.confusion_matrix': (array([[1, 1],\n", + " [0, 1]]),\n", + "
),\n", + " 'zephyr_ml.primitives.postprocessing.roc_auc_score_and_curve': (0.5,\n", + "
)}" ] }, - "execution_count": 5, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "zephyr.predict(X_test)" + "res" ] }, { "cell_type": "markdown", - "id": "15f257eb", + "id": "e2657da3", "metadata": {}, "source": [ - "Lastly, we can evaluate the performance of the pipeline using `evaluate` function" + "The `confusion_matrix` and `roc_auc_score_and_curve` evaluation metrics return some `matplotlib.figure.Figure` objects, which we can display, as shown below." ] }, { "cell_type": "code", - "execution_count": 6, - "id": "191a123a", + "execution_count": 7, + "id": "b74c3618", "metadata": {}, "outputs": [ { "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAh4AAAHsCAYAAACdcaTFAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/OQEPoAAAACXBIWXMAAA9hAAAPYQGoP6dpAABGqElEQVR4nO3deVyU5f7/8feALCqYW6i5piYqJJKCUWoILrmcErOOqZnmUm7kVu7mVuaWhoZbWpmalqK5lmUeLUvRtI6mfl0ylxQxN0R2mN8fHucXgTrgcA8wr6ePeRy85pr7/gw+OLz7XNd9j8lsNpsFAABgACd7FwAAABwHwQMAABiG4AEAAAxD8AAAAIYheAAAAMMQPAAAgGEIHgAAwDAEDwAAYBiCB4ACgXsdAoUDwQP4h4MHD+qNN95QcHCw6tWrp+bNm2vs2LE6e/Zsnp3z448/1pNPPql69eopMjLSJsfcs2ePvL29tWfPHpscz5pzeXt764cffsh2zsmTJy1zzp07Z/WxU1JS9M4772jDhg33nOvt7a05c+ZYfWwAxiN4AH+zfPlyderUSZcvX9bQoUO1aNEi9enTR9HR0erYsaOOHj1q83PGx8dr6tSpqlevnhYvXqywsDCbHNfHx0erVq2Sj4+PTY5nDScnJ3311VfZPrd58+ZcHTM2NlaffPKJ0tLS7jl31apVev7553N1HgDGIHgA//Pzzz/r7bffVufOnbVkyRL961//UqNGjfTCCy/os88+k5ubm0aNGmXz816/fl0ZGRlq3ry5AgICVKFCBZsc18PDQ/Xr15eHh4dNjmeNxx57TN988022IWHz5s2qU6dOnp6/fv36Kl++fJ6eA8D9IXgA/7N48WJ5enpqyJAhWZ4rXbq0RowYodDQUCUkJEiS0tPTtXz5cv3rX/9SvXr1FBwcrBkzZig5OdnyuhEjRqh79+5as2aNWrVqJV9fXz377LPauXOnJCkqKkohISGSpFGjRsnb21uSFBISohEjRmSqISoqKtMyRVJSksaPH6+mTZvK19dXTz/9tBYvXmyZn91Sy8GDB9WzZ081atRIjz32mF577TUdP348y2t++uknvfLKK/Lz89OTTz6p6dOnKz09/Z7fwzZt2ujatWvavXt3pvGjR4/qjz/+UOvWrbO85ttvv1Xnzp3l7+9veR/Lly+XJJ07d06hoaGSpJEjR1q+VyNGjNDLL7+st956S4899pjatGmj9PT0TEstAwYM0KOPPqrff//dcq45c+aoTp06io6Ovud7AZA3CB6Abm1c/OGHHxQUFKSiRYtmO6dNmzbq37+/ihUrJkkaN26cpkyZoubNm2vevHnq0qWLli1bpn79+mXaCHno0CEtXrxY4eHh+uCDD+Ts7KyBAwfq+vXrCg4O1ty5cyVJffv21apVq6yu+Z133tHOnTs1fPhwLV68WKGhoZo2bZrWrFmT7fzdu3frxRdftLx28uTJunDhgjp16qSTJ09mmjts2DA1aNBA8+fPV7t27fThhx/qiy++uGdNNWvW1COPPJJluWXTpk0KDAzUgw8+mGn8P//5j/r37y8fHx9FRkZqzpw5qly5siZOnKhff/1VXl5emb4/t7+WpH379unChQv64IMPNHToUDk7O2c69vjx41WsWDG99dZbkm79O8yfP1+vvPKKAgMD7/leAOSNIvYuAMgPrl69quTkZFWqVMmq+SdOnNDq1as1dOhQ9enTR5L05JNPysvLS2+++aZ27typp556SpJ048YNRUVFqUqVKpKkYsWKqWvXrtq9e7datWplWX6oUqWK6tevb3XN0dHRevLJJ9W2bVtJUqNGjVSsWDGVKVMm2/kzZ85U1apVtXDhQssv6caNG6tFixaKiIjQ+++/b5n7/PPPq3///pKkoKAgffvtt/rPf/6jTp063bOu1q1ba+nSpRo/fryKFLn1fzGbN2/Wa6+9lmXuiRMnFBYWptGjR1vG/P391ahRI+3Zs0d+fn6Zvj9169a1zEtLS9PEiRPvuLRStmxZvfXWWxo8eLC++OILffLJJ6pVq5Zef/31e74HAHmHjgcgWX4RW7OcIMnSqr/9S/+2tm3bytnZOdPyRunSpS2hQ5LlF2ViYuJ91dyoUSN9/vnn6t27t5YtW6azZ8+qf//+Cg4OzjI3ISFBBw8eVOvWrTN1BkqUKKFmzZplWXrw9/fP9Pfy5ctblpju5Z/LLb/++qsuXryoli1bZpnbq1cvvfvuu7p586YOHTqkzZs3a8GCBZJuXc1yNyVLlrznfo42bdqoVatWGjdunM6ePasZM2bI1dXVqvcBIG8QPABJDzzwgIoXL67z58/fcU5CQoKuX78uSZb//efSQZEiRVSqVCnduHHDMvbPpRuTySRJysjIuK+aR48erUGDBuncuXOaNGmSmjdvrk6dOmV75c2NGzdkNptVtmzZLM+VLVs2U72S5O7ununvTk5OVt9H4+GHH1adOnUsyy2bN29W48aN9cADD2SZe+XKFQ0cOFANGzbUCy+8oDlz5ig+Pl7Sve/bUbx4cavqCQsLU0ZGhqpVq6aHH37YqtcAyDsED+B/GjdurD179mTaHPp3n3/+uR5//HH99ttvll+ily5dyjQnNTVVV69eValSpe67nn92X/7ZcXB1dVXfvn21ZcsWbd++3fJf9UOHDs1yLE9PT5lMJv31119Znrt06ZJKlix53/X+XZs2bfTNN98oNTVVX331VZbO0G3Dhg3TwYMH9fHHH+uXX37Rli1bbHrlUGJioqZMmaJatWrp2LFjWrJkic2ODSB3CB7A/7zyyiu6du2aZs+eneW5S5cuacmSJapZs6Z8fHwsmxM3bdqUad6mTZuUnp6uBg0a3FctHh4eiomJyTT2888/W75OSkpSq1atLL9IH3roIXXp0kVt27bNtmtTrFgx+fr6asuWLZkCzY0bN/Sf//znvuv9p9atW+vatWuaP3++rl+/brky5Z9+/vlntWzZUo0aNbIsgdy+4ud2R+ifm0ZzYubMmYqJidGcOXPUtWtXRUREZNlIC8BYbC4F/qd+/fp6/fXXNXv2bJ08eVLt27dXqVKldPz4cS1evFjJycmWUFKzZk2FhYUpIiJCiYmJCggI0JEjRzR37lw1atRITZo0ua9amjVrpgULFmjBggXy8/PTd999l+kSVXd3d/n4+Gju3LlycXGRt7e3Tp06pbVr16pVq1bZHnPo0KHq2bOn+vTpo86dOys1NVULFy5USkqKZSOprVSuXFmPPvqoFixYoBYtWliuBPqnevXqacOGDfLx8VH58uW1f/9+LVy4UCaTybIHxtPTU5L0008/qUaNGvLz87OqhujoaC1btkyDBw9WtWrVNGjQIH3zzTcaMWKEVq5ceV+BBkDuETyAv+nbt6/q1q2r5cuX65133tH169dVoUIFBQcH67XXXst0c6+3335bVatW1Zo1a7Ro0SJ5eXmpW7du6tevn5yc7q+Z+Oqrr+rKlStavHixUlNTFRwcrLffflt9+/a1zJk4caJmz56tJUuW6NKlSypTpow6dux4x6s2goKC9NFHHykiIkJDhgyRq6urGjZsqKlTp+qRRx65r3qz06ZNGx08ePCOyyyS9O6772rSpEmaNGmSJKlatWqaMGGC1q9fr3379km61f3p0aOHVq1apR07dmjXrl33PHdCQoJGjhypWrVqqWfPnpJu7QkZN26c+vbtqw8//FCvvvqqDd4lgJwymfnkJQAAYBD2eAAAAMMQPAAAgGEIHgAAwDAEDwAAYBiCBwAAMAzBAwAAGIbgAQAADEPwAAAAhiF4AAAAwxA8AACAYQgeAADAMAQPAABgGIIHAAAwDMEDAAAYhuABAAAMQ/AAAACGIXgAAADDEDwAAIBhCB4AAMAwBA8AAKAFCxbopZdeuuucq1evaujQoQoICFBgYKAmTJigxMTEHJ2nyP0UCQAACr7ly5dr9uzZatiw4V3nhYeHKzExUR9//LHi4uI0evRoJSQkaOrUqVafi+ABAICDunjxot566y3t2bNH1apVu+vcAwcOKDo6Wps3b1aNGjUkSRMnTlSvXr00ZMgQlStXzqpzstQCAICD+u233+Ti4qL169fLz8/vrnP37dunBx980BI6JCkwMFAmk0k///yz1eek4wEAQAEWGhp61+e3bdt2x+dCQkIUEhJi1XkuXryoChUqZBpzdXVVyZIldeHCBauOITlQ8CjqP8DeJQAACojEA3Pz9Pi2/J30RGmbHequEhMT5erqmmXczc1NycnJVh/HYYIHAACF0d06Grbk7u6ulJSULOPJyckqVqyY1cdhjwcAAEYzOdnuYZDy5csrNjY201hKSoquXbsmLy8vq49D8AAAwGgmk+0eBgkICFBMTIxOnz5tGYuOjpYkNWjQwOrjEDwAAEAW6enpunTpkpKSkiRJfn5+euyxxzR48GD997//1e7duzVu3Di1b9/e6ktpJYIHAADGKwBLLRcuXFDjxo21efPmWyWbTJo7d64qVaqkl19+WYMGDVLTpk01fvz4HB3XZDabzXlQb77DVS0AAGvl+VUtAUNsdqzEve/Z7FhG4KoWAACMZuCm0PzGcd85AAAwHB0PAACMZuDVKPkNwQMAAKOx1AIAAJD36HgAAGA0lloAAIBhWGoBAADIe3Q8AAAwGkstAADAMCy1AAAA5D06HgAAGI2lFgAAYBgHXmoheAAAYDQHDh6O+84BAIDh6HgAAGA0J/Z4AAAAo7DUAgAAkPfoeAAAYDQupwUAAIZhqQUAACDv0fEAAMBoLLUAAADDsNQCAACQ9+h4AABgNJZaAACAYRx4qYXgAQCA0Ry44+G4kQsAABiOjgcAAEZjqQUAABiGpRYAAIC8R8cDAACjsdQCAAAM48DBw3HfOQAAMBwdDwAAjObAm0sJHgAAGI2lFgAAgLxHxwMAAKOx1AIAAAzjwEstBA8AAIzmwB0Px41cAADAcHQ8AAAwmMmBOx4EDwAADObIwYOlFgAAYBg6HgAAGM1xGx4EDwAAjMZSCwAAgAHoeAAAYDBH7ngQPAAAMJgjBw+WWgAAgGHoeAAAYDBH7ngQPAAAMJrj5g6CBwAARnPkjgd7PAAAgGHoeAAAYDBH7ngQPAAAMJgjBw+WWgAAgGHoeAAAYDBH7ngQPAAAMJrj5g6WWgAAgHHoeAAAYDCWWgAAgGEcOXiw1AIAgIPKyMhQRESEmjRpovr166t37946e/bsHedfvnxZQ4cO1eOPP65GjRpp8ODBunjxYo7OSfAAAMBgJpPJZo/7ERkZqRUrVmjSpElauXKlMjIy1KtXL6WkpGQ7f9CgQTp//rw++ugjffTRRzp//rz69++fo3MSPAAAMJrJho9cSklJ0ZIlSxQeHq7g4GDVrl1bs2bNUkxMjLZu3ZplflxcnKKjo9W7d2/VqVNHdevWVZ8+fXTw4EFdu3bN6vMSPAAAMFh+6HgcPXpUN2/eVFBQkGWsRIkSqlu3rvbu3Ztlvru7u4oXL65169YpPj5e8fHx+vLLL/Xwww+rRIkSVp+XzaUAABRgoaGhd31+27Zt2Y7HxMRIkipUqJBp3MvLy/Lc37m6uurdd9/VuHHj1LBhQ5lMJnl5eWnZsmVycrK+j0HHAwAAg+WHjkdiYqKkW4Hi79zc3JScnJxlvtls1pEjR+Tv76/ly5frk08+0UMPPaR+/fopPj7e6vPS8QAAwGC2vJz2Th2Ne3F3d5d0a6/H7a8lKTk5WUWLFs0yf8uWLVq2bJm2b98uDw8PSdL8+fPVrFkzrV69Wt27d7fqvHQ8AABwQLeXWGJjYzONx8bGqly5clnm79u3Tw8//LAldEjSAw88oIcfflinT5+2+rwEDwAADJYfllpq164tDw8P7dmzxzIWFxenw4cPKyAgIMv88uXL6/Tp05mWYRISEnTu3DlVq1bN6vPmq+Bx9OhRjRw5Up06ddLFixe1fPnyTN8QAAAKhXxwOa2rq6u6du2qGTNmaNu2bTp69KgGDx6s8uXLq2XLlkpPT9elS5eUlJQkSWrfvr2kW/fyOHr0qI4ePaohQ4bIzc1NHTp0sPq8+SZ4HDp0SC+88ILOnTunQ4cOKSUlRUeOHFHPnj21Y8cOe5cHAEChEx4ero4dO2rMmDF68cUX5ezsrMWLF8vFxUUXLlxQ48aNtXnzZkm3rnZZsWKFzGazXn75ZfXo0UMuLi5asWKFPD09rT6nyWw2m/PqDeVE9+7d5efnp8GDB8vf31/r169X5cqVNWXKFP38889avXr1fR2/qP8AG1UKACjsEg/MzdPjV+y71mbH+nNemM2OZYR81fG43cb5uy5duujkyZPGFwQAQB7JD3s87CXfBA8XF5dsrwO+cOFCtpf1AACAgiffBI/mzZtr9uzZiouLs4ydPHlSb7/9toKDg+1XGAAANkbHIx8YPny4bt68qccff1yJiYnq0KGD2rVrJ2dnZ7355pv2Lg8AANvJB1e12Eu+uXOph4eHVq5cqZ9++kmHDx9WRkaGatWqpSZNmuToHvAAAOR3BbFTYSv57jd6UFCQevbsqeeee06JiYk6f/68vUuCjVX0KqkLO6epSYNH7F0KkK/wswFHkG+Cx7Fjx9SqVSvt3btXcXFxevbZZzVo0CC1adNGu3fvtnd5sJFK5Upqw7z+KulZzN6lAPkKPxuOhT0e+cDUqVNVtWpVVa9eXRs3blRqaqp27Nihnj17avbs2fYuD/fJZDKp678a6afPRsirdAl7lwPkG/xsOCaCRz5w4MABDR8+XGXKlNH333+vp556SuXKlVOHDh109OhRe5eH+/ToIw9pzuhOWrEpWj3HfmLvcoB8g58NOJp8s7nUyclJrq6uSktLU3R0tMaOHStJunnzZqaP60XBdDbmqnyfmaA/Y6+xfg38DT8bjqkgdipsJd8Ej/r162vBggUqXbq0kpOT1bRpU128eFHvvfee6tevb+/ycJ+uxiXoalyCvcsA8h1+NhyU4+aO/LPUMnbsWB0+fFifffaZRo0apdKlS2vhwoU6efIk9/EAAKCQyDcdj6pVqyoqKirTWP/+/TVq1Cg5OzvbqSoAAGyPpRY7seYeHUlJSZKkhx56KK/LAQDAEAQPOwkJCbnnN99sNstkMunIkSMGVQUAAPKKXYPH0qVL7Xl6AADswoEbHvYNHoGBgfY8PQAAdsFSSz6QnJysVatW6dixY0pPT7eMp6Sk6NChQ/r666/tWB1s6fufj6uo/wB7lwHkO/xsOA4Hzh35J3hMnjxZ69atU926dXXw4EH5+/vr9OnTunz5srp3727v8gAAgA3km/t4bNu2TVOmTNGqVatUsWJFTZo0Sdu3b1doaKhSU1PtXR4AADbDZ7XkA3FxcXrsscckSTVr1tThw4fl4uKiV199Vdu3b7dzdQAA2I7JZLtHQZNvgkfp0qV1+fJlSVK1atV07NgxSVKpUqX0119/2bM0AABgI/kmeDRt2lQTJkzQ8ePH1aBBA23cuFEHDx7U8uXLVb58eXuXBwCAzTg5mWz2KGjyTfB488035eXlpejoaIWGhqpGjRp6/vnn9emnnyo8PNze5QEAYDOOvNRi16tapk2bpgEDBqhYsWIqUaKEIiMjLc8tXLhQR44cUdmyZeXl5WXHKgEAgK3YtePx0UcfKTExMdNYnz59FBsbK5PJpLp16xI6AACFjiNf1WLXjofZbM4ytnfvXiUnJ9uhGgAAjFEA84LN5Js9HgAAoPDLN3cuBQDAURTEJRJbsXvwcORvPgDAMTny7z67B4/JkyfLzc3N8vfU1FRNnz5dxYsXzzRvypQpRpcGAECecODcYd/gERAQoEuXLmUa8/f319WrV3X16lU7VQUAAPKKXYPHp59+as/TAwBgFyy1AAAAwzhw7uByWgAAYBw6HgAAGIylFgAAYBgHzh0stQAAAOPQ8QAAwGAstQAAAMM4cO5gqQUAABiHjgcAAAZjqQUAABjGgXMHwQMAAKM5cseDPR4AAMAwdDwAADCYAzc8CB4AABiNpRYAAAAD0PEAAMBgDtzwIHgAAGA0lloAAAAMQMcDAACDOXLHg+ABAIDBHDh3sNQCAACMQ8cDAACDsdQCAAAM48C5I3fBIz4+Xjdv3lS5cuWUmpqqTz/9VOfPn1erVq0UEBBg6xoBAChUHLnjkeM9Hr/++quaNWumZcuWSZImT56sadOmaf369Xr55Ze1bds2mxcJAAAKhxwHj9mzZ6tGjRp64YUXlJiYqC+//FKdO3dWdHS0OnbsqPnz5+dFnQAAFBomk+0eBU2uOh59+/ZV5cqVtWvXLiUnJ+vZZ5+VJLVp00bHjx+3eZEAABQmTiaTzR4FTY6Dh5OTk9zc3CRJ33//vUqUKKF69epJurX3w93d3bYVAgCAQiPHm0t9fX31xRdfyN3dXV999ZWCg4NlMpl0+fJlLVq0SL6+vnlRJwAAhUYBbFTYTI47Hm+88YZ+/PFHderUSc7Ozurbt68kqV27dvrjjz80aNAgW9cIAEChYjKZbPa4HxkZGYqIiFCTJk1Uv3599e7dW2fPnr3j/NTUVM2cOdMyv2vXrjpy5EiOzpnj4OHj46NvvvlGq1at0rfffqtq1apJksaPH6+NGzfS8QAAoICIjIzUihUrNGnSJK1cuVIZGRnq1auXUlJSsp0/fvx4RUVF6Z133tGaNWtUunRp9e7dWzdu3LD6nLm6ZbqHh4ceffRRnTlzRjt37lR8fLwaNWqkBx98MDeHAwDAoTiZbPfIrZSUFC1ZskTh4eEKDg5W7dq1NWvWLMXExGjr1q1Z5p89e1Zr1qzR22+/rSZNmqhGjRqaPHmyXF1ddejQIavPm6sbiH355ZeaOXOmYmNj5eTkpC+++EJz5syRi4uLZs6cKVdX19wcFgAAh5AfbiB29OhR3bx5U0FBQZaxEiVKqG7dutq7d6/atWuXaf6uXbvk6emppk2bZpr/3Xff5ei8OQ4emzdv1vDhw/XMM8+oWbNmGjx4sCSpRYsWmjBhgiIjI9nnAQCAQUJDQ+/6/J1u7BkTEyNJqlChQqZxLy8vy3N/d+rUKVWuXFlbt27VwoULdfHiRdWtW1cjRoxQjRo1rK43x0st8+fPV6dOnTRt2jS1bNnSMv7cc89p4MCB2rRpU04PCQCAQ8kPNxBLTEyUpCyrFG5ubkpOTs4yPz4+XqdPn1ZkZKSGDBmiefPmqUiRIurcubMuX75s9Xlz3PE4deqUhg8fnu1zfn5+mjNnTk4PCQCAQzHJdkstuf2oktv33UpJScl0D67k5GQVLVo0y/wiRYooPj5es2bNsnQ4Zs2apaeeekpr165Vr169rDpvjjseZcqU0cmTJ7N97uTJkypTpkxODwkAgEPJD5tLby+xxMbGZhqPjY1VuXLlsswvX768ihQpkmlZxd3dXZUrV9a5c+esPm+Og0ebNm0UERGhr776ynK5jclk0qFDhxQZGamnn346p4cEAAAGq127tjw8PLRnzx7LWFxcnA4fPpztJ80HBAQoLS1NBw8etIwlJSXp7Nmzqlq1qtXnzfFSy6BBg3Ts2DENGjRITk63cstLL72khIQENWzYUK+//npODwkAgEPJD1e1uLq6qmvXrpoxY4ZKly6tihUravr06Spfvrxatmyp9PR0XblyRZ6ennJ3d1fDhg31xBNPaPjw4Zo4caJKliypiIgIOTs7Wz6zzRo5Dh6urq768MMPtWvXLv3000+6fv26PD09FRgYqKeeeipffDMBAMjP8suvyvDwcKWlpWnMmDFKSkpSQECAFi9eLBcXF507d06hoaGaMmWKOnToIEmaM2eOZsyYoQEDBigpKUmPPfaYli5dqtKlS1t9TpPZbDbn1RvKT4r6D7B3CQCAAiLxwNw8PX77D/fZ7FjrejW02bGMkOOOx9y59/7HGDCAX/IAANxJQfw4e1uxafDw8PCQl5cXwQMAgLtw4NyR8+Bx9OjRLGMJCQnat2+fxo8fr7Fjx9qkMAAAUPjk6kPi/qlYsWJq2rSp+vfvr2nTptnikAAAFFo5+dj7ez0Kmlx9SNydPPTQQ3e8uRgAALilAOYFm7FJ8DCbzYqJidGHH36oihUr2uKQAACgEMpx8Khdu/YdWztms5mlFgAA7oGrWnKgf//+2QYPDw8PBQcHq1q1araoCwCAQstxY0cugsfAgQPzog4AABxGQdwUaitWBY9169bl6KDt27fPRSkAAKCwsyp4jBgxwuoDmkwmggcAAHdxPx9nX9BZFTy2bduW13UAAOAwWGq5h5xcIhsfH5/rYgAAQOGW482lKSkp+uSTTxQdHa2UlBTd/nBbs9mshIQEnThxQr/++qvNCwUAoLBw4IZHzoPHtGnTtGzZMtWqVUtXrlyRm5ubSpcurWPHjik1NZUPiAMA4B4ceaklx5/VsnXrVvXo0UPr169X165d5evrqy+++EJbt25VxYoVlZGRkRd1AgCAQiDHwePKlStq2rSpJKlWrVo6ePCgJKlcuXLq06ePNm/ebNsKAQAoZJxMtnsUNDleavH09FRKSookqWrVqrpw4YLi4+Pl4eGhatWq6cKFCzYvEgCAwoSllhxo2LChPv30UyUmJqpq1aoqWrSovv32W0nSgQMH5OHhYfMiAQBA4ZDj4NG/f3/98ssv6tOnj4oUKaLOnTtr7Nix6tChg95//321atUqL+oEAKDQMNnwUdBYtdQyZ84cdezYURUqVFDt2rW1ZcsWHTt2TJI0dOhQeXh4aP/+/QoJCVGfPn3ytGAAAAo6R/50WpP59o047sLX11cZGRkKCgpSx44d1bx5c7m4uBhRn80U9ecyXwCAdRIPzM3T4/f+/JDNjrXoBV+bHcsIVi217NixQ2+88Yb++usvDR48WE2aNNGUKVN0/PjxvK4PAAAUIlZ1PP7uyJEjWrdunTZu3KgrV67o0Ucf1fPPP682bdqoePHieVXnfaPjAQCwVl53PPp88ZvNjrXweR+bHcsIOQ4et6Wnp2vnzp1at26dtm/fLmdnZz399NPq2LGjGjRoYOs67xvBAwBgrbwOHq+utl3wWNCxYAWPHN/H4zZnZ2c1a9ZMzZo1040bN7Rt2zbNnz9f69at05EjR2xZIwAAKCRyHTxuO3LkiDZs2KCtW7fq3LlzCggIsEVdAAAUWo58VUuugse5c+e0ceNGbdiwQb///rsefPBBhYWFqUOHDqpataqtawQAoFBx4NxhffC4evWqtmzZog0bNuiXX36Rs7OzQkJC9Oabb6pJkyZycsrxvcgAAICDsSp4vPbaa/rhhx+UlpamRx55RMOHD9czzzyj0qVL53V9AAAUOo78WS1WBY99+/apQ4cO6tixo+rVq5fXNeWJq3vzdocyUBCVCuBqL8AeHHmNwKrgsWvXLrm5ueV1LQAAOARH7nhYFboIHQAAwBbu+3JaAACQM06O2/AgeAAAYDRHDh6OvL8FAAAYjI4HAAAGc+TNpVYFj9q1a1v9TTKZTDp8+PB9FQUAQGHmyEstVgWP/v37O3Q6AwAAtmFV8Bg4cGBe1wEAgMNw5P+Wz9Uej4sXL+rnn39WSkqKZSwjI0OJiYnat2+fZs2aZbMCAQAobPh02hz46quvNGzYMKWlpVmWX8xms+Xr6tWr27ZCAABQaOT4ctr58+fLx8dHUVFR6tChg5599llt2rRJb7zxhpydnTVq1Ki8qBMAgELDyYaPgibHHY9Tp05p5syZqlu3rho1aqQlS5aoRo0aqlGjhv766y/Nnz9fTz75ZF7UCgBAoeDAKy05D0tOTk564IEHJElVq1bV77//royMDElS06ZNdeLECdtWCABAIeNkMtnsUdDkOHhUr15d+/fvt3ydkpKio0ePSpLi4uIybTgFAAD4uxwvtXTq1ElvvfWWEhISNHjwYD3++OMaOXKkOnbsqGXLlsnHxycv6gQAoNAogI0Km8lxx+P555/X6NGjLZ2NSZMmKTk5WW+//bbS0tI0evRomxcJAEBh4mSy3aOgydV9PLp06WL5unLlytqyZYuuXr2q0qVL26wwAABQ+NjkQ+JMJhOhAwAAKxXETaG2kuPgYc0Hxh05ciTXBQEAUNg5cO7IefDI7gPjbt68qf379+vMmTMaNmyYzYoDAACFS46Dx90+MO7NN9/UoUOH9Nxzz91XUQAAFGYFcVOordj0bqthYWHavHmzLQ8JAEChY7Lhn4LGpsHjzJkzSktLs+UhAQBAIZLjpZa5c+dmGcvIyFBMTIw2b96sZs2a2aQwAAAKK0dearFJ8JAkDw8PNW/eXCNHjrzvogAAKMwIHjlw+3NZAABA7tzrthSFWY73eIwcOVJnz57N9rnff/9dr7322n0XBQAACierOh7nz5+3fL127Vo1b95czs7OWebt3LlTP/74o+2qAwCgEGKp5R4mTJignTt3SrrVHhowYEC288xms5588knbVQcAQCHkwCst1gWPiRMn6scff5TZbNaoUaPUt29fValSJdMcJycnlShRQo0aNcqTQgEAQMFnVfAoV66cwsLCJN3qeAQHB6tEiRKW5ZakpCSlpqbK09Mz7yoFAKCQyC8fEpeRkaG5c+fqiy++0I0bNxQQEKBx48apcuXK93zt+vXr9cYbb2jbtm2qVKmS1efM8ebSdu3aafbs2XrhhRcsY/v371dQUJCmTp2qjIyMnB4SAACH4mSy3eN+REZGasWKFZo0aZJWrlypjIwM9erVSykpKXd93Z9//qmJEyfm6pw5Dh5z5szR+vXr1a5dO8tY3bp1NWzYMH3++ef68MMPc1UIAAAwTkpKipYsWaLw8HAFBwerdu3amjVrlmJiYrR169Y7vi4jI0NvvPGGfHx8cnXeHAePDRs2aPjw4erRo4dlrGTJkurevbsGDx6s1atX56oQAAAchclku0duHT16VDdv3lRQUJBlrESJEqpbt6727t17x9fNnz9fqampevXVV3N13hzfQOzq1at3XPupXr26YmJiclUIAACOwsmGH+4WGhp61+e3bduW7fjt39cVKlTINO7l5XXH3+X//e9/tWTJEq1evVoXL17MRbW56HhUr15dX3/9dbbPfffdd6patWquCgEAAMZJTEyUJLm6umYad3NzU3Jycpb5CQkJGjZsmIYNG6Zq1arl+rw57nh069ZNI0aM0LVr19S8eXOVKVNGV65c0fbt27VlyxZNmTIl18UAAOAIbHlRy506Gvfi7u4u6dZej9tfS1JycrKKFi2aZf7kyZP18MMPq1OnTrkr9H9yHDzat2+vmzdvKjIyMtPmk1KlSmncuHF69tln76sgAAAKu/xw59LbSyyxsbGZ7s0VGxsrb2/vLPPXrFkjV1dX+fv7S5LS09Ml3bra9bXXXrP6I1NyHDwkqUuXLurcubNOnTqla9euqUSJEvL09NQXX3yhkJAQbd++PTeHBQDAIeSH+3jUrl1bHh4e2rNnjyV4xMXF6fDhw+ratWuW+f+80uXXX3/VG2+8oYULF6pWrVpWnzdXwUO6dSOx6tWr6/vvv9fixYu1Y8cOpaWl5egmIgAAwD5cXV3VtWtXzZgxQ6VLl1bFihU1ffp0lS9fXi1btlR6erquXLkiT09Pubu7Z9nDeXsD6kMPPaSSJUtafd5cBY8rV65o9erV+vzzz/Xnn3/Kw8NDYWFhevbZZ9WwYcPcHBIAAIeRDxoekqTw8HClpaVpzJgxSkpKUkBAgBYvXiwXFxedO3dOoaGhmjJlijp06GCzc5rMZrPZ2sm7d+/WqlWr9O233yo9PV0NGjTQvn379MknnygwMNBmReWFpDR7VwDkP6UCsv/AR8DRJR6Ym6fHXxx9xmbH6hlY5d6T8hGrOh4ff/yxVq1apVOnTqlq1arq16+fwsLCVKxYMQUGBsqUX6IbAADI16wKHu+++668vb21dOnSTJ2NGzdu5FlhAAAUVo783+tW3UCsbdu2On36tF599VX169dP33zzjdLSWLsAACA3nGz4KGis6njMnDlT8fHx2rBhg6KiojRw4ECVKlVKzZs3l8lkYqkFAABYJUebS287fvy41qxZow0bNujy5cuqUqWK2rZtq7Zt26pmzZp5Ued9Y3MpkBWbS4Hs5fXm0k/2nbXZsV5umP3np+VXuQoet6WlpWn79u1as2aNfvjhB6Wnp+uRRx7R+vXrbVmjTRA8gKwIHkD28jp4LLVh8OhWwIJHrm8gJklFihRRixYt1KJFC/31119au3at1q5da6vaAABAIWOzfSlly5ZV7969tXnzZlsdEgCAQsnJZLLZo6C5r44HAADIuYIXF2yH4AEAgMEKYKPCZgriJcAAAKCAouMBAIDBHPn+VwQPAAAM5sjLDY783gEAgMHoeAAAYDCWWgAAgGEcN3aw1AIAAAxExwMAAIOx1AIAAAzjyMsNjvzeAQCAweh4AABgMJZaAACAYRw3dhA8AAAwnAM3PNjjAQAAjEPHAwAAgzk58GILwQMAAIOx1AIAAGAAOh4AABjMxFILAAAwCkstAAAABqDjAQCAwbiqBQAAGIalFgAAAAPQ8QAAwGCO3PEgeAAAYDAup81HUlJS5Orqau8yAADIM06Omzvyzx6Pzz77TCEhIapfv77Onj2rt956S5GRkfYuCwAA2FC+CB4bNmzQzJkzFRYWJhcXF0lSjRo1NH/+fC1ZssTO1QEAYFsmG/4paPJF8FiyZIlGjx6tgQMHysnpVkndunXTuHHjtGrVKjtXBwCAbZlMtnsUNPkieJw6dUoNGzbMMt6oUSNduHDBDhUBAIC8kC+CR9myZXXq1Kks4wcOHJCXl5cdKgIAIO+w1GJn//73vzVx4kRt27ZNkvT777/rs88+09tvv60OHTrYuTrYyo+7flDnF55TowZ+at0yRJ98tFhms9neZQH5RkWvkrqwc5qaNHjE3qUgjzmZbPcoaPLF5bS9e/fWjRs3NGTIECUnJ+vVV19VkSJF1KlTJ7322mv2Lg828N9ff9HAfq+pVevW6j/wdR3Y/7NmzZyutLR09ezdx97lAXZXqVxJrY/sr5KexexdCpCn8kXwkKQhQ4aob9++OnHihMxms6pXry4PDw97lwUbiZw7R7Xr1NE7706XJD3ZpKlS09K0eNF8dXmpm9zd3e1cIWAfJpNJXdoFasrgMJkK4k5B5EpBXCKxlXyx1HL+/HmdP39eV69eVZkyZVS2bFnFxcVZxlGwpaSkaN/ePQoJbZFpvEXLVrp586YO7P/ZTpUB9vfoIw9pzuhOWrEpWj3HfmLvcmAQR76qJV90PEJCQu6a9I8cOWJgNbC1c2fPKjU1VVWrVcs0XqVKVUnSH6dOKeiJJ+1QGWB/Z2OuyveZCfoz9hp7O+AQ8kXwWLp0aaa/p6en69SpU/r44481YsQIO1UFW4mPvyFJWZbOihUvLkm6eTPe8JqA/OJqXIKuxiXYuwwYrAA2KmwmXwSPwMDALGNBQUGqXLmy5syZo5CQEDtUBVvJyMi46/MmU75Y8QMAwzgVxDUSG8kXweNOqlWrpqNHj9q7DNwnD09PSdLNmzczjd+Mv9Xp8PRkEzEAx+K4sSOfBI/sNpDGx8drwYIFqlSpkh0qgi1VrlxFzs7OOnvmdKbxM2fOSJIerl7DHmUBAOwgXwSP7DaXms1mFStWTNOnT7dTVbAVNzc3PdagobZ9+41e7tHT8m/97Tdfy9PTU76P1rNzhQBgMAdueeSL4PHPzaWS5OLiolq1aqn4/zYgomDr/Wpfvdqrh94Y8rrad3hOvxw4oE8+WqzXBw9V0aJF7V0eABjKke/jkW+Cx+DBg1WjBi33wqrR40GaOXuO5n0QoUED+8urXDkNHvamXu7+ir1LAwAYKF8Ej927d8vNzc3eZSCPhTZvodDmLe49EXBQ3/98XEX9B9i7DBjAgS9qyR93Lg0LC9OMGTN0/PhxpaSk2LscAADylMmGj4ImX3Q8duzYoTNnzujrr7/O9nnuXAoAQOFgt+BRp04d/fDDDypTpoz69u1rrzIAADBeQWxV2IjdgofZbLZ8HRYWZq8yAAAwnCNf1ZIv9ngAAADHYNc9Hlu2bMnywWHZad++fd4XAwCAQRz5qha7Bo/Jkyffc47JZCJ4AAAKFQfOHfYNHrt27VKZMmXsWQIAAMZz4ORhtz0e//xsFgAAYKyMjAxFRESoSZMmql+/vnr37q2zZ8/ecf7x48fVp08fNWrUSEFBQQoPD8/2g17vxm7B4+9XtQAA4EhMNvxzPyIjI7VixQpNmjRJK1euVEZGhnr16pXtzTyvXr2qHj16yN3dXZ9++qkWLVqkK1euqFevXkpOTrb6nHYLHmFhYdwmHQDgkEwm2z1yKyUlRUuWLFF4eLiCg4NVu3ZtzZo1SzExMdq6dWuW+d9++60SEhI0bdo01apVS76+vpo+fbpOnjyp/fv3W31euwWPKVOmWHVFCwAAsL2jR4/q5s2bCgoKsoyVKFFCdevW1d69e7PMDwoKUmRkpNzd3S1jTk63YkRcXJzV580Xt0wHAMCR2HKXY2ho6F2f37ZtW7bjMTExkqQKFSpkGvfy8rI893eVKlVSpUqVMo0tXLhQ7u7uCggIsLpebiAGAIDR8sGnxCUmJkqSXF1dM427ublZtWfj008/1bJlyzRs2DCVLl3a6vPS8QAAoAC7U0fjXm4vmaSkpGRaPklOTlbRokXv+Dqz2az3339f8+bNU9++ffXSSy/l6LwEDwAADJYfPqvl9hJLbGysqlSpYhmPjY2Vt7d3tq9JTU3VyJEjtXHjRo0cOVLdu3fP8XlZagEAwGD54aqW2rVry8PDQ3v27LGMxcXF6fDhw3fcs/Hmm2/qq6++0syZM3MVOiQ6HgAAOCRXV1d17dpVM2bMUOnSpVWxYkVNnz5d5cuXV8uWLZWenq4rV67I09NT7u7uioqK0ubNm/Xmm28qMDBQly5dshzr9hxr0PEAAMBg+WBvqSQpPDxcHTt21JgxY/Tiiy/K2dlZixcvlouLiy5cuKDGjRtr8+bNkqSNGzdKkqZNm6bGjRtnetyeY9V7NzvILUST0uxdAZD/lAoYYO8SgHwp8cDcPD3+oT/jbXYs34oF655YLLUAAGCw/LC51F5YagEAAIah4wEAgMEc+QPaCR4AABjMgXMHSy0AAMA4dDwAADCaA7c8CB4AABiMq1oAAAAMQMcDAACDcVULAAAwjAPnDpZaAACAceh4AABgNAdueRA8AAAwmCNf1ULwAADAYI68uZQ9HgAAwDB0PAAAMJgDNzwIHgAAGM6BkwdLLQAAwDB0PAAAMBhXtQAAAMNwVQsAAIAB6HgAAGAwB254EDwAADCcAycPlloAAIBh6HgAAGAwrmoBAACGceSrWggeAAAYzIFzB3s8AACAceh4AABgMJZaAACAgRw3ebDUAgAADEPHAwAAg7HUAgAADOPAuYOlFgAAYBw6HgAAGIylFgAAYBhHvmU6Sy0AAMAwdDwAADCa4zY8CB4AABjNgXMHwQMAAKM58uZS9ngAAADD0PEAAMBgjnxVC8EDAACjOW7uYKkFAAAYh44HAAAGc+CGB8EDAACjcVULAACAAeh4AABgMK5qAQAAhmGpBQAAwAAEDwAAYBiWWgAAMJgjL7UQPAAAMJgjby5lqQUAABiGjgcAAAZjqQUAABjGgXMHSy0AAMA4dDwAADCaA7c8CB4AABiMq1oAAAAMQMcDAACDcVULAAAwjAPnDpZaAABwVBkZGYqIiFCTJk1Uv3599e7dW2fPnr3j/KtXr2ro0KEKCAhQYGCgJkyYoMTExBydk+ABAIDRTDZ83IfIyEitWLFCkyZN0sqVK5WRkaFevXopJSUl2/nh4eE6ffq0Pv74Y73//vvasWOHxo8fn6NzEjwAADCYyYZ/cislJUVLlixReHi4goODVbt2bc2aNUsxMTHaunVrlvkHDhxQdHS0pk6dKh8fHwUFBWnixIn68ssvdfHiRavPS/AAAMBgJpPtHrl19OhR3bx5U0FBQZaxEiVKqG7dutq7d2+W+fv27dODDz6oGjVqWMYCAwNlMpn0888/W31eNpcCAFCAhYaG3vX5bdu2ZTseExMjSapQoUKmcS8vL8tzf3fx4sUsc11dXVWyZElduHDB6nodJni4O8w7BayXeGCuvUsAHFJ++J10e1Ooq6trpnE3Nzddv3492/n/nHt7fnJystXnzQdvHQAA5NadOhr34u7uLunWXo/bX0tScnKyihYtmu387DadJicnq1ixYlaflz0eAAA4oNvLJrGxsZnGY2NjVa5cuSzzy5cvn2VuSkqKrl27Ji8vL6vPS/AAAMAB1a5dWx4eHtqzZ49lLC4uTocPH1ZAQECW+QEBAYqJidHp06ctY9HR0ZKkBg0aWH1elloAAHBArq6u6tq1q2bMmKHSpUurYsWKmj59usqXL6+WLVsqPT1dV65ckaenp9zd3eXn56fHHntMgwcP1vjx45WQkKBx48apffv22XZI7sRkNpvNefi+AABAPpWenq733ntPUVFRSkpKUkBAgMaNG6dKlSrp3LlzCg0N1ZQpU9ShQwdJ0uXLlzVhwgR9//33cnNz09NPP62RI0fKzc3N6nMSPAAAgGHY4wEAAAxD8AAAAIYheAAAAMMQPAAAgGEIHgAAwDAEDwAAYBiCBwAAMAx3LoXVXnrpJcvtcf/plVde0fDhw+/6+j179qhbt27atm2bKlWqlBclAnY1YsQIrV279q5z/u///s+gaoD8ieCBHGndurVGjx6dZTy7TzIEHM3o0aM1dOhQy98bN26sUaNGqU2bNnasCshfCB7IEXd3dz344IP2LgPIlzw9PeXp6ZlljJ8Z4P9jjwds5vr16xozZoyaNGkiHx8fBQUFacyYMUpMTMx2/h9//KGePXuqQYMG8vf3V8+ePTO1oW/cuKGxY8fq8ccfV4MGDdStWzcdPHjQqLcD2FxUVJRatGihyZMnq0GDBurXr5/27Nkjb29vnTt3zjLvn2Nms1mLFi1SaGio/Pz89Oyzz2r9+vX2ehvAfaHjAZsZMWKELl68qLlz56pMmTLav3+/Ro0apZo1a6p79+5Z5g8ZMkS1a9fWmjVrlJaWpqlTp2rAgAH65ptvZDab1bt3b7m7u2vBggXy8PDQl19+qRdffFGff/656tata/wbBGzgzJkzio2N1bp165SUlKQrV67c8zWzZs3Sxo0bNW7cOFWvXl179+7V+PHjdePGDXXp0sWAqgHbIXggRzZs2KCvv/4601iDBg304Ycf6sknn1RAQIC8vb0lSZUqVdKyZct07NixbI915swZPfHEE6pYsaJcXFz0zjvv6Pfff1dGRob27NmjX375Rbt371bJkiUl3Qoq+/fv19KlS/Xuu+/m6fsE8lK/fv1UuXJlSbe6G3eTkJCgjz/+WO+9956Cg4MlSVWqVNGff/6pxYsXEzxQ4BA8kCMhISEaNmxYpjF3d3dJUufOnfXdd99p7dq1+uOPP3TixAmdO3dO1atXz/ZYgwcP1jvvvKMVK1YoMDBQTZo0Ubt27eTk5KTffvtNZrNZzZo1y/SalJQUJScn582bAwxSrVo1q+eeOHFCycnJGjp0qJyc/v/qeFpamlJSUpSUlGT5GQQKAoIHcqR48eKqWrVqlvGMjAy9+uqrOn78uNq1a6c2bdrIx8dHY8eOveOxunTpoqefflo7duzQTz/9pIiICM2bN0/r1q1TRkaGPDw8FBUVleV1rq6uNn1PgNHuFRTS09MtX5vNZknS7Nmzsw3x/DygoCF4wCaOHDminTt36vPPP5efn58kKTU1VWfOnLG0lP/u8uXL+uCDD9SnTx916NBBHTp00MWLF9W0aVNFR0erVq1aio+PV2pqqmrWrGl53ZgxY1S7dm117drVsPcG5CUXFxdJUnx8vGXsjz/+sHxdvXp1FSlSROfPn8/UAVy6dKlOnDihiRMnGlYrYAtc1QKbKFu2rIoUKaItW7bo7NmzOnjwoAYNGqRLly4pJSUly/wHHnhA//nPfzRmzBgdOXJEZ8+e1cqVK+Xi4iJfX181adJEderU0eDBg7V7926dPn1aU6ZMUVRUlGrUqGGHdwjkjVq1aqlYsWJauHChzpw5o++//14fffSR5XlPT0916tRJ77//vr788kudPXtWq1ev1vTp0+Xl5WXHyoHcoeMBmyhXrpzeffddzZkzR8uXL9eDDz6o4OBgde/eXd99912W+UWKFNGiRYs0depUde/eXYmJiapTp44WLlyoKlWqSJKWLFmi6dOna9CgQUpMTFSNGjU0d+5cBQUFGf32gDzj4eGh6dOna8aMGWrTpo1q166t4cOHq3///pY5I0eOVKlSpfT+++8rNjZWFSpUUHh4uHr16mXHyoHcMZlvLyACAADkMZZaAACAYQgeAADAMAQPAABgGIIHAAAwDMEDAAAYhuABAAAMQ/AAAACGIXgAAADDEDwAAIBhCB4AAMAwBA8AAGAYggcAADAMwQMAABiG4AEAAAxD8AAAAIYheAAAAMMQPAAAgGEIHgAAwDAEDwAAYBiCBwAAMAzBAwAAGIbgAQAADEPwAAAAhiF4AIWI2Wy2dwkAcFcED+B/XnrpJXl7e2d6+Pr6Kjg4WBMmTND169fz7NxRUVHy9vbWuXPnJElz5syRt7e31a+PiYlRnz599Oeff953LefOnZO3t7eioqKyfX7evHny9vbWf//73zseY9KkSfL391d8fPw9zzdixAiFhITkul4ABUsRexcA5Cd169bVW2+9Zfl7amqqfvvtN7333ns6cuSIPvvsM5lMpjyv4/nnn1eTJk2snv/jjz9qx44deVjR/xcWFqaIiAht2LBB9erVy/J8SkqKNm7cqKeffloeHh6G1ASg4CB4AH/j4eGh+vXrZxoLCAjQzZs3FRERoV9//TXL83mhfPnyKl++fJ6fJzfKly+vxo0ba/PmzRoxYoScnZ0zPb9jxw5du3ZNHTt2tFOFAPIzlloAK/j6+kqSzp8/L+nWssywYcMUHh6u+vXrq0ePHpKk5ORkTZs2TU899ZR8fX31r3/9S5s3b850rIyMDEVGRio4OFh+fn7q169flmWc7JZa1q1bp7CwMPn5+Sk4OFgzZ85USkqKoqKiNHLkSElSaGioRowYYXnNF198obZt21qWjObMmaP09PRMx926daueeeYZ1atXT2FhYTp69Og9vx/PPfec/vrrL/30009Znlu7dq2qV6+uBg0aKD09XQsXLlS7du1Ur1491a9fX506ddLu3bvveGxvb2/NmTPnnt+Pffv2qWvXrvLz81NgYKCGDx+uK1euWJ7PyMjQrFmzFBISIl9fX4WEhGjmzJlKTU295/sDkHcIHoAVTp06JUmqXLmyZWzLli0qXry45s2bp169eslsNqt///5auXKlevTooXnz5snf31+DBw/WunXrLK+bPn26PvjgA3Xs2FFz585VyZIlNXPmzLuef/ny5Ro+fLh8fHw0d+5c9enTR59++qkmT56s4OBg9e3bV5I0d+5c9evXT5K0YMECjR07VkFBQZo/f766dOmiRYsWaezYsZbjfvfddwoPD5e3t7c++OADtW7dWm+88cY9vx8hISEqVaqUNmzYkGn8ypUr2rlzp6XbMWPGDEVGRurf//63PvzwQ02aNEnXrl3T66+/rsTExHue50727t2r7t27y93dXbNnz9aoUaMUHR2tbt26KSkpSZK0aNEiffbZZ+rfv7+WLFmiF198UYsXL9a8efNyfV4A94+lFuBvzGaz0tLSLH+/fv26oqOjLSHidudDklxcXDRhwgS5urpKknbt2qXvv/9es2bNUps2bSRJTZo0UWJiombMmKF27dopISFBn376qXr06KEBAwZY5sTGxur777/PtqaMjAx98MEHat68uSZPnmwZT0xM1KZNm+Tp6akqVapIkurUqaNKlSrpxo0bll/4Y8aMkSQ1btxYJUuW1JgxY9SjRw898sgj+uCDD1SvXj1Nnz7dUoukewYhV1dXPfPMM1q9erUmTJggd3d3SdKmTZskSe3bt5ckxcbGavDgwXrppZcsr3Vzc9PAgQP1f//3f7letpo5c6YefvhhLViwwLLU4+fnp7Zt22rNmjXq0qWLoqOj5evrq+eee06SFBgYqKJFi8rT0zNX5wRgG3Q8gL/Zu3evfHx8LI8nnnhCQ4YMka+vr2bOnJlpY2n16tUtoUOSfvrpJ5lMJj311FNKS0uzPEJCQnTp0iUdP35cv/zyi1JTU9WsWbNM523duvUdazp16pQuX76sFi1aZBrv2bOnoqKi5OLikuU1Bw4cUFJSkkJCQrLUIt0KSUlJSfrtt99yVMvfPffcc7p586a+++47y9jatWsVHBysMmXKSLoVEF5++WVduXJF+/bt05o1a7R+/XpJtzah5kZiYqJ+/fVXPfXUU5agmJaWpsqVK6tGjRratWuXJKlRo0batWuXOnfurA8//FAnTpxQ165d9eyzz+bqvABsg44H8Dc+Pj6aMGGCJMlkMsnNzU0VKlTI9uqM4sWLZ/r7tWvXZDab9dhjj2V77NjYWMXFxUmSSpUqlem5Bx988I41Xbt2TZIsv8ytcfs1ffr0uWMt169fl9lszlKLl5eXVee4fbnx+vXr1aZNG504cUK//fabXn/9dcucgwcPasKECTp48KCKFi2qmjVr6qGHHpKU+3uOxMXFKSMjQ4sWLdKiRYuyPO/m5iZJ6tWrl4oXL641a9ZoxowZmj59uh555BGNGTNGjz/+eK7ODeD+ETyAvylevLgeffTRXL3W09NTxYoV09KlS7N9vmrVqpZ7X1y+fFnVq1e3PHc7KGSnRIkSkpRp46QkXb16VYcPH5a/v/8dXzNjxgxVq1Yty/Nly5ZVyZIl5eTkpL/++ivTc3er5Z86duyot99+W9euXdO6detUrlw5NW7cWJIUHx+vXr16ydvbW5s2bVL16tXl5OSkHTt26Ouvv77rcf+5ATYhIcHydfHixWUymdS9e3e1bds2y2uLFi0qSXJyclKXLl3UpUsXXb58WTt27ND8+fM1cOBA7dq1K1O3CoBxWGoBbCQwMFAJCQkym8169NFHLY9jx47pgw8+UFpamvz9/eXu7q6vvvoq02u3b99+x+NWr15dpUqVyjLnyy+/VJ8+fZSamionp8w/yn5+fnJxcdHFixcz1VKkSBG99957OnfunNzc3OTv76+tW7dm6j78fenkXtq1aydnZ2dt375dW7ZsUVhYmGXPxe+//65r166pW7duqlmzpqXGnTt3Srq1dyU7Hh4eunjxYqax/fv3Z3q+bt26+v333zO9t0ceeURz5szRnj17JEmdOnWy7IkpU6aMOnTooC5duiguLs6qG5sByBt0PAAbeeqppxQQEKB+/fqpX79+qlGjhv773/8qIiJCTZo0UenSpSVJ/fr10+zZs1W0aFE9/vjj2rFjx12Dh7OzswYOHKiJEyeqTJkyCgkJ0alTpxQREaEuXbrogQcesHQ4vvnmGzVt2lQ1atRQr1699P777ys+Pl6NGjXSxYsX9f7778tkMql27dqSpCFDhujll1/WgAED9O9//1unTp3S/PnzrX7Pnp6eatGihebPn68///wz0707Hn74YXl4eGj+/PkqUqSIihQpoq+//lqrV6+WpDte1RIcHKxNmzbJz89PVatWVVRUlE6fPp1pzpAhQ9SnTx8NHTpUzzzzjNLT07VkyRL9+uuvlqt6AgICtGTJEpUtW1b+/v66ePGiPvroIwUGBlr+LQDYgRmA2Ww2m7t27Wru2rXrfc29efOm+Z133jE3bdrU7OPjYw4JCTHPnDnTnJSUlGne0qVLzaGhoWZfX1/zSy+9ZF6xYoW5Vq1a5rNnz5rNZrM5IiLCXKtWrUyviYqKMrdt29bs4+NjDg0NNUdGRppTU1PNZrPZHB8fb+7evbvZx8fH3Lt3b8trli1bZm7Tpo3Zx8fH/MQTT5iHDh1q/vPPPzMdd9euXebnnnvO/Oijj5pbt25t/u6778y1atUyr1mzxqrvxU8//WSuVauWuVu3blme2717t7lDhw7mevXqmYOCgsyvvPKKed++fWZ/f3/z1KlTzWaz2Tx8+HBzs2bNLK+5dOmSOTw83Fy/fn1zw4YNzePGjTN//vnnWb4fP/74o7lz587mevXqmRs0aGDu1q2bee/evZbnU1NTzREREebmzZubfX19zUFBQebRo0ebr1y5YtX7ApA3TGYznyoFAACMwR4PAABgGIIHAAAwDMEDAAAYhuABAAAMQ/AAAACGIXgAAADDEDwAAIBhCB4AAMAwBA8AAGAYggcAADAMwQMAABiG4AEAAAzz/wCx9csPgExUTwAAAABJRU5ErkJggg==", "text/plain": [ - "accuracy 0.666667\n", - "f1 0.666667\n", - "recall 1.000000\n", - "precision 0.500000\n", - "dtype: float64" + "
" ] }, - "execution_count": 6, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%matplotlib inline\n", + "_, conf_matrix_fig = res[\"zephyr_ml.primitives.postprocessing.confusion_matrix\"]\n", + "conf_matrix_fig" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "d59e86b1", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjsAAAHJCAYAAABjZPjUAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/OQEPoAAAACXBIWXMAAA9hAAAPYQGoP6dpAABj0UlEQVR4nO3dd1hTZ/8G8DuDsBFQERUHThx1A1Jx4ai1rqqte7XgLlZxttb9qq3yunEgWOtsK9aqdeDeorbWWhVfVxFFhrJnSHJ+f/gzlYKWAMkh4f5cl5fhyZPDnYeEfHnOc86RCIIggIiIiMhEScUOQERERKRPLHaIiIjIpLHYISIiIpPGYoeIiIhMGosdIiIiMmksdoiIiMiksdghIiIik8Zih4iIiEwaix0iIiIyaSx2iKhEBAQEoH79+ggNDc1338yZM+Hj4/PGxw4bNgzDhg3L1/7o0SPMmzcPnTt3RpMmTdChQwdMmTIFkZGRRcr4/PlzBAQEwNPTEy1btsSUKVMQHx//r48bNGgQ6tevn+/fzZs3ddq2SqXCypUr0b59ezRt2hSDBw/GjRs3ivRciKjwJLxcBBEVV1paGry9vVG9enUolUocOXIEEolEe//MmTNx5coVnDx5ssDHvyp0tm3bpm0LDw/H9OnTUbduXQwcOBAuLi6IjY3F1q1bcf/+faxfvx5t2rQpdEaVSoWPPvoI6enpmDJlClQqFQIDA2Fra4u9e/fCzMyswMcJgoCWLVvi448/Rrdu3fLcV69ePVhZWRV624sWLcKePXsQEBCAqlWrYsuWLfjzzz+xb98+1KhRo9DPhYh0JBARFdPOnTuFJk2aCJcuXRLq1asnXLx4Mc/9M2bMEDp27PjGxw8dOlQYOnSo9uuoqCihWbNmwsSJEwWVSpWnb2ZmptC7d2+hTZs2Qk5OTqEzHjhwQKhXr55w7949bdu9e/eE+vXrCz///PMbH/fXX38V+Jx03XZMTIzQsGFDYceOHdo+OTk5QocOHYQvv/yy0M+DiHTH3VhEVGxhYWHw8vJC69atUaNGDezevbtY29u2bRuUSiVmz54NmUyW5z5LS0vMmDED/fr1Q0pKCgBg7969qF+/PiIiIt64zfPnz8PV1RV16tTRttWpUwe1a9fGmTNn3vi4O3fuAADc3NyKte1Lly5BpVKhS5cu2j4KhQIdOnR46/cnouJjsUNExXLv3j3cvHkTffr0AQD06dMHJ06cwPPnz4u8zXPnzqFhw4aoVKlSgfd7eXlh8uTJqFixIgCgQ4cO+P7779GoUaM3bvPBgweoWbNmvvbq1avj0aNHb3zcnTt3YGVlhW+++Qaenp5455134Ofnh4cPH+q07QcPHsDa2lqb+ZUaNWogPj4eGRkZb8xARMXDYoeIiiUsLAz29vbaBcgffvgh1Go19uzZU+RtxsbGwsXFpdD9HR0d0axZM9jY2LyxT1paWoH3W1tbv7XQiIyMRGZmJuzs7LBu3TosWrQIUVFRGDJkCOLi4gq97bf1AYD09PS3P0kiKjIWO0RUZLm5udi/fz86d+6M7OxspKamwtraGi1btsQPP/wAjUYDAHkWKxeGTCaDWq0u0azCW47FeFu+yZMnY/v27Zg1axZatWqF3r17IyQkBGlpafjuu+8Kve239QEAqZS/jon0RS52ACIyXqdPn8aLFy+wZ8+eAmdyzp07h/bt28PS0hJKpfKN21EqlbC3t9d+XaVKFcTExLyxf25uLlJSUlChQoVCZ7WxsSlwBic9PR22trZvfFxBa3WqVauG2rVraw+BL8y239YHwFszEFHx8E8JIiqysLAwVKtWDd99912ef1u3boWtra12oXKFChWQnJz8xoInNjY2T+Hi7e2N27dvIyEhocD+Z86cQZs2bXDs2LFCZ3V1dcXjx4/ztT9+/Bi1a9cu8DEqlQo//fQTrl+/nu++7OxsODo6FnrbtWrVQnp6OhITE/P0iYqKQtWqVWFhYVHo50JEumGxQ0RFkpCQgHPnzuGDDz6Ap6dnnn+tW7dGt27dcObMGcTFxcHDwwO5ubkFFic3btxAbGwsWrdurW0bMmQIzMzM8J///Cff7qzMzEysXr0aDg4OaNeuXaHzent748GDB7h//7627f79+3jw4MEbz9cjl8uxdu1afPPNN3nab926hcePH8PT07PQ23733XcBAEeOHNH2USqVOH36tE7nCyIi3fGkgkRUJMHBwVi+fDkOHDiAevXq5bv/2rVrGDJkCCZOnIjPPvsM48aNw8WLF+Hr64tWrVpBKpXi9u3b2Lx5M2rUqIFt27blOcz8p59+wpdffokmTZpg4MCBqFy5Mh4/fowtW7YgOjoaISEh8PDwAAAkJibi8ePHqFOnzhsXKSuVSvTq1Qs5OTkICAgAAAQGBsLGxgY//fQT5PKXe/Vv374NhUKhPYx83759mDFjBnr37o3evXsjJiYGq1atgpOTE3788UfIZLJCb3vmzJn45ZdfMGXKFNSsWVN7UsGffvqJJxUk0iMWO0RUJO+//z5kMhkOHjxY4P2CIKBz587Izc3FqVOnoNFosGXLFvzyyy+Ijo6GRqNB1apV0b17d/j6+sLc3DzfNn777Tds3boVN27cwIsXL1CxYkW0aNEC48aNy7Prae/evZg1axa+++477WxLQZ49e4b//Oc/uHDhAszMzNCmTRvMmjULTk5O2j4+Pj6oWrVqnrM5Hzp0CJs3b8bDhw9haWmJLl26YMqUKXnWGRVm20qlEsuXL8fBgweRmZmJRo0aYfr06WjatGmhxpyIiobFDhEREZk0rtkhIiIik8Zih4iIiEwaix0iIiIyaSx2iIiIyKSx2CEiIiKTxmKHiIiITBqLHSIiIjJpvBAoXp78TKPRz+mGpFKJ3rZNf+M4GwbH2TA4zobBcTYcfYy1VCqBRCIpVF8WOwA0GgGJifmvRlxccrkUDg7WSE3NhEqlKfHt00scZ8PgOBsGx9kwOM6Go6+xdnS0hkxWuGKHu7GIiIjIpLHYISIiIpPGYoeIiIhMGosdIiIiMmksdoiIiMiksdghIiIik8Zih4iIiEwaix0iIiIyaSx2iIiIyKTxDMpEZJrUaphdvghpXCw0lZyR2/pdQCYTOxVR2aJWQ375PJCeDLmNPVTuXqK8D0vVzM7GjRsxbNiwt/ZJSkpCQEAA3N3d4eHhgfnz5yMrK8tACYnIGCgO7odjy0aw//AD2I39FPYffgDHlo2gOLhf7GhEZcar96Ftr+7A4MGw7dVdtPdhqSl2duzYgZUrV/5rP39/f0RFReHbb7/FqlWrcObMGcybN0/v+YjIOCgO7ofdp8MgjYnJ0y599gx2nw5jwUNkAKXtfSgRBEHUS77GxcVh7ty5iIiIgLOzMypUqIBt27YV2Pf69esYOHAgDh06hNq1awMAzp8/D19fX5w5cwaVKlUqUga1WqPXC4EmJWXwQnN6xHE2DKMYZ7Uaji0bQRoTAwkAAUCWmbn2bkEigca5MpLOXy21u7TkMinsHayQnJQJlbqUjrMJ4DjrkVoNB+9WkD57hleX6bTMzdHeFiQSaCpXQeKvfxbrffjyQqCFm7MRfc3OrVu3YGZmhv3792PdunV4+vTpG/teu3YNFStW1BY6AODh4QGJRIJff/0V3bt3L3IOubzkJ7le/RAK+8OgouE4G4YxjLP88nnI/v8vSQFA/yHf4FeXhvk7rrts2GBEZc2gtXm+bPXkFn7cMQMSABJBgCzmKSyuXoLKu51B4ohe7Pj4+MDHx6dQfePi4lC5cuU8bQqFAvb29nj27FmRM0ilEjg4WBf58f/Gzs5Sb9umv3GcDaNUj3N6svZmlpl5wYUOEZUKtunJgB4/e18nerGji6ysLCgUinzt5ubmyMnJKfJ2NRoBqamZxYlWIJlMCjs7S6SmZkHNaVK94TgbhjGMs9zGHrYFtF9bMwRWudnar9N+2Au1l7fhgulAKpPCzs4CqanZ0JTScTYFHGf9UV84hWXB/dD3jhQeMS9ngl/fjfVKmo09VElFX0JiZ2dpPLuxdGFhYQGlUpmvPScnB1ZWVsXatj7XIKjVmtK7xsGEcJwNozSPs8rdC1ZVqkD6j5leq9xsWOXmaNcKZLR+F1LpP3/1lg5ymQRWCjlyZBKohNKZ0RRwnPUjKvUvjI5dgOut1djvpkbkWsBClbfPq/dhtrsXYKDfJaV353sBnJ2dER8fn6dNqVQiOTkZTk5OIqUiolJDJkP6om8AvPyF+rpXX6cv+rrULk4mMmYHH+xHpx/a4nrCb7CXWmP1YcBcXTreh0ZV7Li7uyM2NhZRUVHatitXrgAAWrZsKVYsIipFlD16ITVkGzTOznnaNZWrIDVkG5Q9eomUjMg0ZauyMevcVHxydChSlSlwd/bEySFX0H7Gdmj+sc5WrPdhqd6NpVarkZiYCFtbW1hYWKBp06Zo0aIFJk+ejHnz5iEzMxNz5sxBnz59inzYORGZHmWPXsjq3E171FXKrjDI27ThjA5RCUvKTkT//b1x8/kNAMBnzSdjpsdsmMnMoOxRDYnvfwCLq5dgm56MNBv7l7uuyvoZlP/p2bNn8Pb2xqFDhwAAEokEa9euhYuLC0aMGIHPP/8c7dq140kFiSi/136h5nqx0CHSB3tzB1S1qYryFuWxu0cYvvKaDzOZ2d8dZLKXh5cPGvTyf5Heh6VqZmfp0qV5vnZxccHdu3fztJUvXx6rV682ZCwiIiL6f1mqLKgFNWzMbCCRSLDKJwjZqmxUtqkidrQ3KtUzO0RERFR63Ev6H7rt8cG005/j1QUYHCwcS3WhA5SymR0iIiIqnX64uwvTz0xBpioDCVnxiMuMhbN15X9/YCnAYoeIiIjeKCM3A1+cm4ZdkdsBAG2rtkdQ52BUsnb+l0eWHix2iIiIqECRiXfgd3QE7iZFQiqRYmqrmZjcchpkUuNa8M9ih4iIiPJRaVQYfmgg/kp9hEpWztjQJQRtqrYVO1aRcIEyERER5SOXyhHYYTU6Ve+Ckx9fMNpCB+DMDhEREf2/W8//xNP0aHSt+T4AoK1Le3hXbQeJxLivH8aZHSIiojJOEAR8d2sL3g/zwZhjn+Jh8n3tfcZe6ACc2SEiIirT0pSpmHp6En66HwYA6OLyHsqZO4icqmSx2CEiIiqj/kj4HX7hI/Eo5SHkUjm+9JyHcc0mQioxrR0/LHaIiIjKoJCbmzD3whdQapRwsamGTV23oJWzh9ix9ILFDhERURkUlfoXlBolurl+gFUd18HBwlHsSHrDYoeIiKiM0Aga7S6q2a3noUnFpuhX92OTWIT8Nqa1U46IiIjyEQQBG2+sQ7+feyJXnQsAUMgU6F9vgMkXOgBndoiIiExaUnYiJp0cjyN/HQIA/HR/Dz6uP0jkVIbFYoeIiMhEXY2NwJjwT/AkPRoKqQILvJfgo3oDxY5lcCx2iIiITIxG0CDo9zVYHDEfKo0KruVqYXPXrXinYlOxo4mCxQ4REZGJmXdxNjbcWAsA+LBOPyzvsAq2CjuRU4mHC5SJiIhMzPCGo1DeojwCO6zGhi6hZbrQATizQ0REZPQ0ggZXYiPQurIXAKCOQ11cG/YnrM2sRU5WOnBmh4iIyIglZCZg4MG+6LPvfVx8el7bzkLnb5zZISIiMlLnn57F2GOfIj4zDpZyS8RnxokdqVRisUNERGRk1Bo1/vvrNwi89jU0ggZujg0Q3HUr6ju6iR2tVGKxQ0REZETiMmIx/rgfzj09AwAY7DYMi9sug5WZlcjJSi8WO0REREbkxONjOPf0DKzk1ljWfgU+ql/2ThKoKxY7RERERmSQ21A8Tv0L/esNRB2HumLHMQo8GouIiKgUe5Yeg3HHfJGcnQQAkEgkmOn5FQsdHXBmh4iIqJQ6ERWOiSfG4EX2C0gkEgR1DhY7klFisUNERFTK5KpzseTKQqy9vhIA0KRiM0x1nyluKCPGYoeIiKgUeZIWjTHHPsHV2AgAwKfvjMa8d/8Dc5m5yMmMF4sdIiKiUuJqbASG/PIRknOSYacohxUd16Jn7d5ixzJ6LHaIiIhKiTr2dWFtZgPXcrWwqeu3qGFXU+xIJoHFDhERkYgSMhNQwbICJBIJHCwcEdb7AFxsqkEhU4gdzWTw0HMiIiKRHHjwM7x2tsCuyO3atlrlarPQKWEsdoiIiAwsW5WNmWcD8OnRYUhVpiDs3o8QBEHsWCaLu7GIiIgM6GHKA/gdHYmbz28AACY2/xyzPL6CRCIROZnpYrFDRERkIPvuhWHKaX+k56bB0cIRazttROca74kdy+Sx2CEiIjKAe0n/w5hjn0CAgNaV38WGLiGoYlNV7FhlAosdIiIiA6jrUA8BrWZALagwzf0LyKX8CDYUjjQREZGe7Pnf92jh1BK17OsAAKZ7fCFyorKJR2MRERGVsMzcTHx+cgLGH/eDX/goZKuyxY5UpnFmh4iIqATdTYyE79HhuJsUCQkk6ObaHWZSM7FjlWksdoiIiErI7sgdmHF2CrJUWXCyqoQNXULgXbWd2LHKPBY7RERExZSZm4npZyfjh7u7AADtXTpiXedgOFk5iZyMABY7RERExSaXynE/6X+QSqSY6TEb/i2mQCrhstjSgsUOERFREQiCAI2ggUwqg0KmwMauW/AsPQatq7wrdjT6B5adREREOkpTpmLssU+wJGKhtq2GXU0WOqUUZ3aIiIh0cDPhBnzDR+BRykOYSc0wsvGncLGtJnYsegvO7BARERWCIAgIubkJ74d1wqOUh3CxqYZ9fQ6x0DECnNkhIiL6F6k5KZh8+jMceLAPANCtZnes8gmCg4WjuMGoUFjsEBERvYVao0avfe/j9os/YSY1wxyvBRjdZDwkEonY0aiQuBuLiIjoLWRSGSY080d12xo4+GE4xjSdwELHyHBmh4iI6B+SshPxJP0J3qnQBADwUf2B+KBWL1iZWYmcjIqCMztERESvuRobgU4/tMWQXz7C86zn2nYWOsaLxQ4REREAjaDB2uur0Hvf+3iSHg0LmQUSs16IHYtKgOjFjkajwerVq9G2bVs0a9YMfn5+iI6OfmP/Fy9eICAgAK1bt4anpycmT56MuLg4AyYmIiJT8yLrBYb+8jEWXPoKKo0Kfer0xYmPz6GeY32xo1EJEL3YCQoKws6dO7Fw4ULs3r0bGo0Gvr6+UCqVBfb//PPPERMTgy1btmDLli2IiYnBhAkTDJyaiIhMxeWYi/D5oQ2OPw6Hucwcy9uvwsYuW2CrsBM7GpUQUYsdpVKJ0NBQ+Pv7o0OHDnBzc8OKFSsQGxuL8PDwfP1TU1Nx5coV+Pn5oUGDBmjYsCFGjx6NmzdvIjk52fBPgIiIjN63t0LwLCMGdezr4ki/UxjeaBSPtjIxohY7kZGRyMjIgJeXl7bNzs4ODRs2xNWrV/P1t7CwgLW1Nfbt24f09HSkp6fj559/hqurK+zsWIETEZHulrVfgfHN/BH+0Rk0qtBY7DikB6Ieeh4bGwsAqFy5cp52Jycn7X2vUygUWLp0KebMmYNWrVpBIpHAyckJ27dvh1RavLpNLi/5uk8mk+b5n/SD42wYxjbOco3w922ZVC/vcX0wtnE2Rueiz2D/g30I7rMRMpkUDlb2WNRusdixTFZpeE2LWuxkZWUBeFnEvM7c3BwpKSn5+guCgDt37qB58+bw9fWFWq3GihUrMH78eOzatQs2NjZFyiGVSuDgYF2kxxaGnZ2l3rZNf+M4G4axjLO5UqW9be9gBSuFcZ1WzFjG2ZioNWosOrsIC84ugEbQoK1rG4xoNkLsWGWGmK9pUd/9FhYWAF6u3Xl1GwBycnJgaZl/UA4fPozt27fj1KlT2sJmw4YN6NixI/bs2YORI0cWKYdGIyA1NbNIj30bmUwKOztLpKZmQa3WlPj26SWOs2EY2zhnKdXa28lJmchRyERMU3jGNs7GIjYjFmOPfoqzT84AAIY2Gob+DftznA1AX69pOzvLQs8WiVrsvNp9FR8fj+rVq2vb4+PjUb9+/sP9rl27BldX1zwzOOXKlYOrqyuioqKKlUWl0t+LXa3W6HX79BLH2TCMZZxVr/1SVak1UKmMa8GpsYyzMTgdfRLjj/vheVYCrOTW+Kb9fzG40RBYK6yRlJHBcTYQMV/Tou4UdnNzg42NDSIiIrRtqampuH37Ntzd3fP1d3Z2RlRUFHJycrRtmZmZePLkCWrWrGmIyEREZEQ23FiLAQc+xPOsBDRwbIRjH53Bx/UHiR2LDEzUYkehUGDo0KFYvnw5Tpw4gcjISEyePBnOzs7o2rUr1Go1EhISkJ2dDQDo06cPgJfn2omMjERkZCSmTJkCc3Nz9O3bV8RnQkREpVFzp1aQSqQY3vATHOl/EnUd6okdiUQg+nJ/f39/9O/fH7Nnz8agQYMgk8kQEhICMzMzPHv2DN7e3jh06BCAl0dp7dy5E4IgYMSIERg1ahTMzMywc+dO2NraivxMiIioNIjL/Pus+p6VW+PswAgs77ASlnIu+i6rJIIgCP/ezbSp1RokJmaU+HblcikcHKyRlMR9wvrEcTYMYxvnrFw12q2+AAA4698GlmbGsUDZ2Ma5NMlV52LJlYUIvRmMo/1Pob6j2xv7cpwNR19j7ehobRwLlImIiErCk7RojDn2Ca7GvlwDGh515K3FDpUtLHaIiMioHXl0CP4nxyI5Jxl2inJY0XEtetbuLXYsKkVY7BARkVFSqpVYeHkuNt5YBwBo7tQCG7tsQc1yriIno9JG9AXKRERERbH9zlZtoTOmyXgc+DCchQ4ViDM7RERklIY3HIUz0acw0G0I3nf9QOw4VIpxZoeIiIxCjjoHa66vRI765Yll5VI5tr6/k4UO/SvO7BARUan3MOUBRoePwh8Jv+NZ+lMsbrtM7EhkRFjsEBFRqfbz/b2YfOozpOemwdHCER2rdRI7EhkZFjtERFQqZamy8NX5WfjudigAwLOyFzZ2CUUVm6oiJyNjw2KHiIhKnYcpD/DJkWG4/eJPSCDBpBYBmO7xBeRSfmyR7viqISKiUkcmkeFJWjQqWFbAuk7B6Fidu66o6FjsEBFRqaDWqCGTvryGWQ27mtj6/k7Usa+LStbOIicjY8dDz4mISHR3EyPh84M3Tj4+rm1rU7UtCx0qESx2iIhIVLsjd6Drnva4k3gLCy7NgUbgVcipZHE3FhERiSI9Nx0zzwbgh7u7AADtXDoiqHMwpBL+HU4li8UOEREZ3O0Xt+B3dATuJf8PUokUM9y/xKSWASx0SC9Y7BARkUE9SnmIbns6IludDWfrytjYJRReVdqIHYtMGIsdIiIyKNdytdCnbj/EZ8ZhbadNqGBZQexIZOJY7BARkd7dfP4HKltX0RY2y9qvhJnUjLutyCD4KiMiIr0RBAGhfwbj/T0++OzEGO2RVuYycxY6ZDCc2SEiIr1IzUnB5NOf4cCDfQAAuVSOLFUWrM2sxQ1GZQ6LHSIiKnHX436F37FReJz6F8ykZvjKaz7GNJkAiUQidjQqg1jsEBFRiREEAZv+CMKCS3OQq8lFddsa2NR1C1pUaiV2NCrDWOwQEVGJyVBlYPPNjcjV5OKDWr2wsuNalDO3FzsWlXEsdoiIqMTYmNkguOu3+DXuKj5pPJq7rahUYLFDRERFphE0WP/7WlibWWNk408BAM2cWqCZUwuRkxH9jcUOEREVyYusF/A/ORbHoo5CIVWgfbWOcC1XS+xYRPkUudh58OABLly4gPj4eAwbNgzR0dFwc3ODjY1NSeYjIqJS6PKzSxgb/gliMp7CXGaORd5fo6adq9ixiAqkc7Gj0WgwZ84chIWFQRAESCQSvP/++wgKCsLjx4+xfft2ODs76yMrERGJTCNosOa3FVh6ZRHUghq17esguOtWNK7wjtjRiN5I59NXBgUF4cCBA1i0aBEuXLgAQRAAANOmTYNGo8GKFStKPCQREYlPI2gw7NAA/CdiPtSCGv3qfoxj/c+w0KFST+diJywsDP7+/ujXrx/s7e217Q0aNIC/vz8uXLhQkvmIiKiUkEqk8HBuDUu5JVZ2XIegzsGwUdiKHYvoX+m8G+v58+do0KBBgfdVqlQJqampxQ5FRESlg1qjxvPs56hkVQkA8FmLyehZpw9qlastcjKiwtN5ZqdGjRo4c+ZMgfdduXIFNWrUKHYoIiISX1xmHD4+0Acf7++NzNxMAC9nd1jokLHReWZnxIgRmDNnDnJzc9GxY0dIJBJERUUhIiICoaGhmDlzpj5yEhGRAZ2JPoXxx/2QkBUPK7kVbj7/A56VW4sdi6hIdC52PvroIyQmJmL9+vXYtWsXBEHAlClTYGZmBl9fXwwaNEgfOYmIyABUGhWWX12CFb8uhwABDRwbYfN7W1HXoZ7Y0YiKrEjn2RkzZgyGDBmC69evIzk5GXZ2dmjatGmeBctERGRcnqXHYOzxT3Ep5uWBJsMajsQi769hKbcUORlR8ei8ZmfWrFmIjo6GjY0N2rZti549e6J9+/awt7fHw4cPMXbsWH3kJCIiPZt1bhouxVyAtZkNNnQJQWCH1Sx0yCQUamYnJiZGe3vfvn3o3LkzZDJZvn5nz57FxYsXSy4dEREZzOK23yAjNx1ftwtELfs6YschKjGFKnbmz5+Ps2fPar+eOHFigf0EQUCbNm1KJhkREenV07QnOPLXIXz6zmgAQBWbqvix188ipyIqeYUqdhYsWICLFy9CEAR88cUXGDduHKpXr56nj1QqhZ2dHTw9PfUSlIiISs7Rvw7D/8RYJOUkobJ1FXSv1UPsSER6U6hip1KlSvjwww8BABKJBO3bt4ejo6NegxERUclTqpVYdHkeNtxYCwBoVrE5GpZvJHIqIv3S+WisDz/8EDk5Ofjjjz+gVCq118bSaDTIysrCtWvXMHXq1BIPSkRExfM4NQqjw0fit/hfAQCjm4zDV14LYC4zFzkZkX7pXOxERERg0qRJSElJKfB+a2trFjtERKXMkUeH8NnJsUjJSUY5c3us6hjEXVdUZuhc7KxYsQIODg5YuHAh9u/fD6lUir59++Ls2bPYtWsXgoOD9ZGTiIiKIVejREpOMlpWcsemrltQzbb6vz+IyEToXOzcvXsXixYtQpcuXZCWlobdu3ejffv2aN++PXJzc7F+/Xps2rRJH1mJiEgHKo0KcunLX/M9a/fBlm470LVGN5jJzERORmRYOp9UUKPRoFKll1e/rVGjBu7du6e977333sPt27dLLh0RERXJz/f3os2uVojLiNW2fVCrJwsdKpN0LnaqV6+Ou3fvAgBcXV2RlZWFhw8fAgBUKhUyMjJKNiERERVatiob085Mhl/4SDxKeYh1v68WOxKR6HTejdWzZ08sX74cgiBg6NChaNy4MRYuXIhhw4Zhw4YNqFOHZ90kIhLDg+R78D06Erde3AQATGoRgBkeX4qcikh8Os/s+Pr6YuDAgbhx4wYAYO7cubhz5w7Gjx+Phw8fYvr06SUekoiI3m7P/75Hpx/a4daLm6hgWQG7e+zFl63natfsEJVlOr8LpFIpZsyYof36nXfewfHjx/Hw4UPUqlULNjY2JRqQiIjebnfkDvifHAcAaFOlLdZ32Qxn68oipyIqPXSe2SmIjY0NmjRpgrS0NEyaNKkkNklERIXUo3ZvuDk2QECrGdjTaz8LHaJ/KNTMjlqtxsqVK7F3715IJBL06dMHkydP1l75XKlUIjg4GJs3b0Z2drZeAxMREXDy8XF0qOYDqUQKGzMbhPc/Awu5hdixiEqlQs3srF69GsHBwahWrRrc3NwQEhKiPXngr7/+ih49emDNmjVwcnLChg0b9BqYiKgsy8jNwMQTYzDwYF+s/32ttp2FDtGbFWpm5+jRo+jZsyeWLVsGAAgODsauXbtQv359fPbZZzAzM0NAQABGjhwJMzOew4GISB9uv7gFv6MjcC/5f5BKpFALKrEjERmFQs3sxMXFoUePv6+h0qtXL8TExGD69Olo2bIlfvnlF/j5+bHQISLSA0EQsP32VnTb0xH3kv8HZ+vK2NvrIPxbTBE7GpFRKFSxk5WVBQcHB+3Xjo6OAABPT098++23qFKlSpEDaDQarF69Gm3btkWzZs3g5+eH6OjoN/bPzc1FYGCgtv/QoUNx586dIn9/IqLSLF2ZhnHHP8WU058hW50Nn+qdcfLjC3i3qrfY0YiMRpGOxpJKXz5s5MiRkEgkxQoQFBSEnTt3YuHChdi9ezc0Gg18fX2hVCoL7D9v3jzs3bsXixcvRlhYGBwdHeHn54e0tLRi5SAiKo0eJN/HgQc/QyaRYXbr+dj5wR5UsKwgdiwio1KsQ88tLS2L9c2VSiVCQ0Ph7++PDh06wM3NDStWrEBsbCzCw8Pz9Y+OjkZYWBj+85//oG3btqhduzYWLVoEhUKBP//8s1hZiIhKo6ZOzbG8/Srs63MY/i0mQyopkTOGEJUpxXrXFHdWJzIyEhkZGfDy8tK22dnZoWHDhrh69Wq+/hcuXICtrS3atWuXp//JkyfzbIOIyFil5qRg6N6huJnwh7ZtUIOh8KzcWsRURMat0GdQHjBgQL62fv365WuTSCSFvvJ5bOzLq/FWrpz3BFhOTk7a+1736NEjVKtWDeHh4di0aRPi4uLQsGFDzJw5E7Vr1y7U93wTubzk/1qSyaR5/if94DgbhrGNs1wj/H1bJtXLe7ykXY/7DZ8eGYG/Uh7h16e/4dzgy5BJZWLHMknG9no2ZqVhrAtV7EycOFEv3zwrKwsAoFAo8rSbm5sjJSUlX//09HRERUUhKCgI06dPh52dHdavX4/Bgwfj0KFDKF++fJFySKUSODhYF+mxhWFnV7zdfVQ4HGfDMJZxNlf+fVi2vYMVrBSl9xpRgiBgzZU1mBo+FbmaXNQoVwPffrgFFcrbiR3N5BnL69kUiDnWohY7FhYvT4KlVCq1twEgJyenwPVAcrkc6enpWLFihXYmZ8WKFWjfvj1++ukn+Pr6FimHRiMgNTWzSI99G5lMCjs7S6SmZkGt1pT49ukljrNhGNs4ZynV2tvJSZnIUZTOGZLk7CR8dnwcfnl4EADQo3ZPfNdvK2S5FkhKyhA5nekyttezMdPXWNvZWRZ6tkjUP3Ve7b6Kj49H9erVte3x8fGoX79+vv7Ozs6Qy+V5dllZWFigWrVqePLkSbGyqFT6e7Gr1Rq9bp9e4jgbhrGMs+q1X6oqtQYqVfHWGOrD07Qn6LWvG6LTHkMhVWDeu4swpvk4OFjaICk7wyjG2dgZy+vZFIg51qLurHRzc4ONjQ0iIiK0bampqbh9+zbc3d3z9Xd3d4dKpcLNmze1bdnZ2YiOjkaNGjUMkpmIqKRUtqmCWuVqo4ZdTfzS9xh8m4wt9oEfRJSfqDM7CoUCQ4cOxfLly+Ho6IiqVati2bJlcHZ2RteuXaFWq5GYmAhbW1tYWFigVatWePfddzFjxgwsWLAA9vb2WL16NWQyGXr37i3mUyEiKpTE7BewkFnCyswKUokUG7qEwkwqh515ObGjEZks0Zeh+/v7o3///pg9ezYGDRoEmUyGkJAQmJmZ4dmzZ/D29sahQ4e0/desWQMPDw9MnDgR/fv3R3p6Or777jvtWZ2JiEqriGeX4fO9N766MFPbVt6yPAsdIj2TCIIg/Hs306ZWa5CYWPILAeVyKRwcrJGUxH3v+sRxNgxjG+esXDXarb4AADjr3waWZuItUNYIGqy9vhJLIhZCLahRq1xthPc/XWCRY2zjbKw4zoajr7F2dLTW7wLlxMREhISE4OLFi0hISMDmzZtx/PhxuLm5oXPnzkXZJBGRSUrITMDEE6NxKvoEAKBv3Y+wvP1K2ChsRU5GVHbovBsrOjoavXr1wg8//IBKlSrhxYsXUKvVePToEfz9/XH69Gk9xCQiMj4Xn56Hzw9tcCr6BCxkFljRYS3Wd97MQofIwHSe2fn6669Rvnx5bNu2DVZWVmjcuDEAIDAwEDk5OdiwYQM6dOhQ0jmJiIxKZm4mfMNH4HlWAuo51Edw161oUL6h2LGIyiSdZ3YuXbqE8ePHw87OLt8hkgMGDMC9e/dKLBwRkbGyMrPCqo7rMNBtCI72P81Ch0hERVqzI5cX/DClUslzRBBRmXX2yWko1TnoXOM9AECXmt3QpWY3kVMRkc4zO61atcLGjRuRmfn35RUkEgk0Gg127dqFFi1alGhAIqLSTq1RY+mVRfhof2+MP+6HJ2nRYkciotfoPLMTEBCAQYMGoWvXrvD09IREIkFISAgePHiAqKgo7Ny5Ux85iYhKpdiMZxh77FNcjDkPAOhRqzccLYp2UWIi0g+dZ3bq1auHPXv2wNPTExEREZDJZLh48SKqV6+O3bt3o0GDBvrISURU6px8fBwdv38XF2POw9rMBus7b8Z/O66BlZmV2NGI6DU6z+yo1Wq4uroiMDBQH3mIiEo9QRDwn8vzsfr6fwEAjcq/g83vfYva9nVFTkZEBdF5Zsfb2xuLFi3KczFOIqKyRCKRIFWZAgAY1dgXh/udYKFDVIrpPLPTo0cPHDlyBDt27ECNGjXQp08f9OzZE1WrVtVHPiKiUiNXnQszmRkAYEGbJehS4z0ebUVkBHSe2fnyyy9x9uxZhIaGolWrVtiyZQu6dOmCoUOH4scff0RaWpo+chIRiUapVmLuhS8x8Jd+UGvUAAALuQULHSIjUaSrnkskEnh5eWHRokU4f/48goKCULlyZcyfPx9t27Yt6YxERKJ5nBqF3vu6Yf2NNTj35DTOPDkpdiQi0lGRip1XVCoVzp8/j0OHDuHs2bMAAC8vrxIJRkQktkMPD6LTj23xa9w1lDO3x7fddsKnehexYxGRjnResyMIAi5fvoxffvkFx44dQ0pKCpo0aQJ/f390794dDg4O+shJRGQwOeocLLj4FYJvbgAAtKzUChu7bEF1uxoiJyOiotC52Gnbti1evHiBKlWqYPDgwejduzdq1qyph2hEROKYdHI89t77EQAwruln+LL1XChkCpFTEVFR6Vzs+Pj4oFevXmjVqpU+8hARie6z5pNxKeYClrVfga413xc7DhEVk87FzoIFC/SRg4hINNmqbFyJvYx2Lh0AAI0qNMaVoTdgLjMXNxgRlYhCFTudOnXCunXr4Obmhk6dOr21r0QiwfHjx0skHBGRvj1Mvg/f8JGITLyN/X2OoJWzBwCw0CEyIYUqdjw8PGBtbQ0AcHd3h0Qi0WsoIiJD2HvvRwScnoSM3HSUtyiPTFWm2JGISA8KVewsWbJEe3vp0qVv7atWq4uXiIhIz7JUWfjy3HRsv7MVAOBVpQ02dA5BZZsqIicjIn3Q+Tw7nTp1QmRkZIH3/fHHH3j33XeLHYqISF/+l3gX3fZ0xPY7WyGBBFNaTUdYrwMsdIhMWKFmdg4ePAiVSgUAePr0KcLDwwsseC5duoTc3NySTUhEVIJOR5/AncTbqGjphKDOwWhfraPYkYhIzwpV7Ny8eRNbt76c7pVIJAgKCnpj31GjRpVMMiIiPfBrMg6pylQMazQKlawqiR2HiAygUMVOQEAAhg8fDkEQ0LlzZ6xduxYNGjTI00cmk8HGxgY2NjZ6CUpEVBRKyV/wCw/Chi7BsFHYQiKRYKr7TLFjEZEBFarYUSgUqFq1KgDgxIkTcHJygpmZmV6DEREVhyAISJOFI8lsA55FKbE4YgEWt10mdiwiEkGhip21a9fio48+QqVKlfDTTz+9ta9EIsGECRNKJBwRUVGkK9Mw5fTnSFS8vORDe5dOmNxyusipiEgshS522rVrh0qVKmHt2rVv7ctih4jE9Ofzm/ALH4EHyfcBQQp71TB82+2/sFZwNpqorCpUsfP6kVdvOuyciEhs4X8dxqdHhyNHnYPK1lWgSZwEC00jSCU6n2WDiExIifwGSEhIwK1bt3hCQSISVTOnlrBTlEOXGu/hcN9zsNA0EjsSEZUCOhc76enpmDVrFnbs2AEAOHz4MDp27Ij+/fujR48eePbsWYmHJCJ6kydp0drbTlZOONzvBLZ1/x4OFo4ipiKi0kTnYicwMBBHjx5FuXLlAADLly+Hm5sb1q5dC7lcjuXLl5d4SCKifxIEAZv/2IDWO5rjp3t7tO3V7WpwtxUR5aHzb4QTJ05g5syZ6NGjB/788088ffoUfn5+6NSpEyZOnIgLFy7oIycRkVZydhJGHRmKL85Ph1KjxPGocLEjEVEpVqgFyq9LTk5GrVq1AABnzpyBXC5HmzZtAADlypVDTk5OySYkInrNr3FXMSb8EzxOi4KZ1Azz3l0E33fGih2LiEoxnWd2qlatirt37wIAjh8/jmbNmmnPmnzmzBm4uLiUbEIiIrzcbRX0+xr0/Ok9PE6LQg27mvil7zH4NRkHiUQidjwiKsV0LnYGDhyIpUuXonv37rhz5w4GDx4MAJg4cSK+/fZbDBw4sMRDEhFdi7uCeRe/hEqjQs/afXDio3No5tRC7FhEZAR03o01YsQIlC9fHlevXsXEiRPRvXt3AICZmRnmzZuHAQMGlHhIIiJ3Z09MahGAKjZVMbLRp5zNIaJC07nYAYAePXqgR48eedpWrFhRIoGIiABAI2iw8UYQetbuDRfbagCAL1vPFTkVERmjIhU7jx49wurVq3HlyhWkpqbCwcEBrVq1woQJE1C7du2SzkhEZczzrOeYeGI0Tj4+jgMP9mH/h0cglxbp1xURke7Fzv379zFw4EDIZDL4+PigQoUKSEhIwKlTp3D69Gn8+OOPLHiIqMguxVzAmGOfIDbjGSxkFhjcYBhkEpnYsYjIiOlc7CxfvhwuLi7Ytm0bbG1tte1paWkYMWIEVqxY8a8XCyUi+ie1Ro1VvwXim6uLoRE0qGtfD8HvbUXD8rzkAxEVj85HY129ehVjx47NU+gAgK2tLUaPHo2rV6+WWDgiKhsSs19gwMG+WHplETSCBh/XH4SjH51moUNEJULnmR25XA5zc/MC71MoFFAqlcUORURli6XcCs+zEmAlt8LSdoEY6DZE7EhEZEJ0Lnbeeecd7Ny5Ex06dMhz6KcgCNixYwcaN25cogGJyDSpNWoAgEwqg6XcEiHvbYVKo0Z9RzeRkxGRqdG52Jk0aRIGDRqEXr16oVu3bqhYsSISEhJw5MgRPHr0CFu2bNFHTiIyIbEZzzDumC+8XdohoNUMAEBt+7oipyIiU1WkmZ3NmzcjMDAQa9euhSAIkEgkaNy4MYKDg+Hu7q6PnERkIk49PoEJJ/zwPOs5bj7/A5809oODhaPYsYjIhBXpxBWtW7fGjz/+iKysLKSmpsLOzg6WlpYlnY2ITIhKo8LXV/6DVb8FAgAalX8Hm9/7loUOEeldoYudFy9eYO/evYiJiUGNGjXQs2dPlC9fnkUOEf2rmPSnGHPsE0Q8uwQAGNHoUyxsswQWcguRkxFRWVCoYuf+/fsYMmQIUlJStG1BQUFYt24dd1sR0Vtlq7LRPawzYjKewsbMFis6rkHvOn3FjkVEZUihzrOzcuVK2NjYYPv27bhx4wZ++uknuLi4YOHChfrOR0RGzkJugcmtpqFJxWY48fE5FjpEZHCFKnauXbuGKVOmoFWrVjA3N0eDBg3wxRdf4N69e0hMTNR3RiIyMtFpj3Hz+R/ar4c3HIVDfY/DtVwtEVMRUVlVqGInLS0NVapUydPm5uYGQRDw/PlzvQQjIuN0+NEv8PnBG6MOD0FKTjIAQCKRQCFTiBuMiMqsQhU7arUaMlneC/G9Wpicm5tb8qmIyOgo1UrMPj8DIw4PQkpOMipYVkBGbobYsYiIinboORHR6/5KeYTR4SPxe8J1AMDYphMxu/U8zuYQUalQ7GLn9UtGEFHZc+DBPnx+aiLSlKmwN7fHmk4b8V7N98WORUSkVehiZ8CAAQW29+vXL8/XEokEt2/fLnQAjUaDtWvX4scff0RaWhrc3d0xZ84cVKtW7V8fu3//fkybNg0nTpyAi4tLob8nEZUMQRDww91dSFOmwt3ZExu7hMLF9t/fu0REhlSoYmfixIl6CxAUFISdO3di6dKlcHZ2xrJly+Dr64sDBw5AoXjzFPjTp0+xYMECveUion8nkUiwyicIW/8MxcTmn8NMZiZ2JCKifEQtdpRKJUJDQzF16lR06NABALBixQq0bdsW4eHh6NGjR4GP02g0mDZtGho1aoTLly/rJRsRFSzsfz/iQvR5fN3uv5BIJHC0KI/JraaJHYuI6I0KdTSWvkRGRiIjIwNeXl7aNjs7OzRs2BBXr1594+M2bNiA3NxcjBkzxhAxiQhAlioLow+Mht+RUfj2VgiO/nVY7EhERIUi6tFYsbGxAIDKlSvnaXdyctLe909//PEHQkNDsWfPHsTFxZVYFrm85Os+mUya53/SD46z/v0v8S4+PTICt57/CQkkCPCYjm61u0EuLb1jLtcIf9+WSfXyHtcHvp4Ng+NsOKVhrEUtdrKysgAg39occ3PzPNfheiUzMxNTp07F1KlTUbNmzRIrdqRSCRwcrEtkWwWxs+PFUg2B46wf3934DuN+GYfM3ExUsq6E7X23o3OtzmLH+lfmSpX2tr2DFawUxnWmDb6eDYPjbDhijrWo734Li5dXPFYqldrbAJCTk1Pg1dQXLVoEV1dXDBw4sERzaDQCUlMzS3SbwMsq1s7OEqmpWVCrNSW+fXqJ46w/c8/PxprfVgIA2lXrgO8/3gUrTTkkJZX+kwVmKdXa28lJmchRyN7Su/Tg69kwOM6Go6+xtrOzLPRskajFzqvdV/Hx8ahevbq2PT4+HvXr18/XPywsDAqFAs2bNwfw8szOANCjRw+MHTsWY8eOLXIWlUp/L3a1WqPX7dNLHOeS165qR6z/fS2mtJyOaZ4zUMHGDklJGUYxzqrXfqmq1BqoVMZ1TjC+ng2D42w4Yo51kYqdxMREhISE4OLFi0hISMDmzZtx/PhxuLm5oXPnwk9vu7m5wcbGBhEREdpiJzU1Fbdv38bQoUPz9Q8PD8/z9Y0bNzBt2jRs2rQJ9erVK8pTIaLXCIKA6LTHqG5XAwDQvlpHRAz5HdVsq0NWitfnEBG9jc6/vaKjo9GrVy/88MMPqFSpEl68eAG1Wo1Hjx7B398fp0+fLvS2FAoFhg4diuXLl+PEiROIjIzE5MmT4ezsjK5du0KtViMhIQHZ2dkAgBo1auT5V6lSJQBAlSpVYG9vr+tTIaLXpOemY/xxP3T4/l08THmgba9mW/0tjyIiKv10Lna+/vprlC9fHidOnMDatWshCC+PeAgMDISPjw82bNig0/b8/f3Rv39/zJ49G4MGDYJMJkNISAjMzMzw7NkzeHt749ChQ7rGJCId/Pn8Jrr82A5h935AlioTV59FiB2JiKjE6Lwb69KlS1i8eDHs7Oy0a2ZeGTBgAD7//HOdtieTyTBt2jRMm5b/pGQuLi64e/fuGx/r6en51vuJ6O0EQcB3t7dg9vkZyFHnoIp1VWzoGorWlb3+/cFEREaiSGt25PKCH6ZUKnlhUCIjkaZMxZRT/vj5wV4AQJca72FNpw1wtCgvcjIiopKl826sVq1aYePGjcjM/PtQbYlEAo1Gg127dqFFixYlGpCI9CP4jw34+cFeyKVyzPVahG3dv2ehQ0QmSeeZnYCAAAwaNAhdu3aFp6cnJBIJQkJC8ODBA0RFRWHnzp36yElEJWxi889x68WfGNd0Ilo5e4gdh4hIb3Se2alXrx7CwsLg6emJiIgIyGQyXLx4EdWrV8fu3bvRoEEDfeQkomJKyUnG0iuLkKvOBQAoZAqEvPcdCx0iMnlFWrNTs2ZNBAYGlnQWItKT3+KuYXT4KDxOi4Jao8aXreeKHYmIyGB0LnZiYmL+tU+VKlWKFIaISpYgCNj4xzosvDQXuZpc1LCriQ9q9RQ7FhGRQelc7Pj4+PzrEVd37twpciAiKhlJ2YnwPzkOR/86DADoWbsPVnRYAzvzciInIyIyLJ2LncWLF+crdjIzM3Ht2jVERERg8eLFJRaOiIrm9/jfMOrIUDxNfwKFVIEF3kswqpEvTw1BRGWSzsVO3759C2wfMmQIlixZggMHDqBDhw7FzUVExWBtZoOk7CS4lquFzV234p2KTcWOREQkmhK96rmPjw/Gjx9fkpskokJSqpVQyBQAgLoO9bCrxx40rvAObBV2IicjIhJXiV7G+MaNG288uzIR6c/lmItovaM5LsVc0LZ5VWnDQoeICEWY2Zk1a1a+No1Gg9jYWFy9ehX9+/cvkWBE9O80ggarfg3E11f/A42gwfJrXyOs136xYxERlSo6FzsREfmvhiyRSGBjYwM/Pz+MHTu2RIIR0dvFZ8ZjwnE/nHlyCgAwoP5gLG3H818REf2TzsVOcHAwateurY8sRFRI556cwbjjvojPjIOV3ApL2wVioNsQsWMREZVKOq/ZGTx4MPbt26eHKERUGL/H/4b++3shPjMObo4NcLT/aRY6RERvofPMjpmZGRwcHPSRhYgKoWnF5uhV+0PYKmyxyPtrWJlZiR2JiKhU07nYmTRpEr755hukpaXBzc0NVlb5f9HychFEJevckzNoUrEpypnbQyKRIKhzMMxkZmLHIiIyCjoXO/PmzYNarca0adPe2IeXiyAqGSqNCt9cWYxVvwWie62eCH1vGyQSCQsdIiId6FzsLFq0SB85iOgfYtKfYuyxT3H52UUAQAXLilBpVCx0iIh0VKhiZ/jw4Zg7dy5q166NDz/8UN+ZiMq841FHMfHEGCRmJ8LGzBYrOq5B7zoFX6qFiIjerlDFzpUrV5CRkaHvLERlXq46F4sjFmDd76sAAE0qNkNw12/hWq6WyMmIiIxXiV4ugoiKJz03DT/d2wMA8H1nDH7pe4yFDhFRMfFCVkSliIOFIzZ23YKEzHj0qN1L7DhERCah0MXOhAkToFAo/rWfRCLB8ePHixWKqKxQqpVYeGkOGlV4R3tiQM/KrUVORURkWgpd7DRs2BCOjo76zEJUpkSl/oXR4SNxPf43WMmt0Kl6V1S0qih2LCIik6PTzE6TJk30mYWozDjw4GdMPjURqcoU2JvbY02njSx0iIj0hGt2iAwoW5WNeRe/ROifwQAAd2dPbOwSChfbaiInIyIyXSx2iAxEqVai10/v4feE6wCAz5pPxkyP2TxJIBGRnhWq2Pnwww958U+iYlLIFOhYvROi0x5jbaeN6FSjq9iRiIjKhEIVO0uWLNF3DiKTlKXKQkpOMpytKwMAprl/gU8aj0Yla2eRkxERlR08qSCRntxPuof3wzph2KGByFHnAADkUjkLHSIiA2OxQ6QHP97djc4/tsPtF3/iafoTPEp5KHYkIqIyiwuUiUpQZm4mZp2bil2R2wEAbau2R1DnYM7mEBGJiMUOUQmJTLwDv6MjcDcpElKJFFNbzcTkltMgk8rEjkZEVKax2CEqITPPBuBuUiQqWTljQ5cQtKnaVuxIREQErtkhKjErO65Dz9p9cPLjCyx0iIhKERY7REV06/mf2PzHBu3XNcu5IuS973jZByKiUoa7sYh0JAgCtt3+Fl+enw6lWok6DvXQoZqP2LGIiOgNWOwQ6SBNmYqppyfhp/thAIDO1bvinQpNRU5FRERvw2KHqJBuJtyAb/gIPEp5CLlUji8952Fcs4mQSrg3mIioNGOxQ1QI225/i1lnp0KpUcLFpho2dg2Fu7On2LGIiKgQWOwQFYJCqoBSo0S3mt2xyicIDhaOYkciIqJCYrFD9AY56hyYy8wBAAPcBqOilRM6VusEiUQicjIiItIFFxsQ/YMgCNh4Yx3a7HLHi6wX2naf6p1Z6BARGSEWO0SvScpOxIjDg/DVhVl4nPoXdkZuEzsSEREVE3djEf2/q7ERGBP+CZ6kR0MhVWCB9xKMauQrdiwiIiomFjtU5mkEDYJ+X4PFEfOh0qjgWq4WNnfdincq8vw5RESmgMUOlXnrfl+NhZfmAAD61u2P5e1XwUZhK3IqIiIqKVyzQ2XeiIaj4ObYAIEdVmN95xAWOkREJoYzO1TmaAQNfnl4AD1q9YJEIoGdeTmc/PgC5FK+HYiITBFndqhMSchMwIADH+LTo8Ow5dZmbTsLHSIi08Xf8FRmnH96FmOPfYr4zDhYyi1hLbcWOxIRERkAix0yeWqNGv/99RsEXvsaGkEDN8cGCO66FfUd3cSORkREBsBih0xaXEYsxh/3w7mnZwAAg92GYXHbZbAysxI5GRERGQqLHTJpj1If4WLMeVjJrbGs/Qp8VH+g2JGIiMjARF+grNFosHr1arRt2xbNmjWDn58foqOj39j/3r17GD16NDw9PeHl5QV/f3/ExMQYMDEZk9aVvfDfDmtw/KOzLHSIiMoo0YudoKAg7Ny5EwsXLsTu3buh0Wjg6+sLpVKZr29SUhJGjRoFCwsLbNu2DcHBwUhMTISvry9ycnJESE+lzbP0GAw62A//S7yrbRvUYCjqONQVMRUREYlJ1GJHqVQiNDQU/v7+6NChA9zc3LBixQrExsYiPDw8X//jx48jMzMT33zzDerVq4fGjRtj2bJlePDgAX777TcRngGVJsf+CofPD21w4vExTD49EYIgiB2JiIhKAVGLncjISGRkZMDLy0vbZmdnh4YNG+Lq1av5+nt5eSEoKAgWFhbaNqn05VNITU3Vf2AqlXLVuZhxbAYG7O+LF9kv0KRiM6zptAESiUTsaEREVAqIukA5NjYWAFC5cuU87U5OTtr7Xufi4gIXF5c8bZs2bYKFhQXc3d2LlUUuL/m6TyaT5vmfSt6TtGj4HhmJK88iAAB+TcZggfdimMvNRU5meozt9SzX/D2zJ5dJ9fIe1wdjG2djxXE2nNIw1qIWO1lZWQAAhUKRp93c3BwpKSn/+vht27Zh+/btmD17NhwdHYucQyqVwMFBfyeYs7Oz1Nu2y7Jb8bfQfldbJGUnoZx5OYT0CkG/hv3EjmXyjOX1bK5UaW/bO1jBSmFcB58ayzgbO46z4Yg51qK++1/tjlIqlXl2TeXk5MDS8s2DIggCVq1ahfXr12PcuHEYNmxYsXJoNAJSUzOLtY2CyGRS2NlZIjU1C2q1psS3X9Y5yVxQ39ENSrUSewb8iPIyZyQlZYgdy2QZ2+s5S6nW3k5OykSOQiZimsIztnE2Vhxnw9HXWNvZWRZ6tkjUYufV7qv4+HhUr15d2x4fH4/69esX+Jjc3FzMmjULBw8exKxZszBy5MgSyaJS6e/FrlZr9Lr9siQ67TGcrCrBXGYOQIot7+2Eg1U5VHJwQFJSBsfZAIzl9ax67ZeqSq2BSmVca7iMZZyNHcfZcMQca1F3Vrq5ucHGxgYRERHattTUVNy+ffuNa3CmT5+OI0eOIDAwsMQKHTIOBx/sR8fv22DhpTnatvKW5aGQKd7yKCIiKutEndlRKBQYOnQoli9fDkdHR1StWhXLli2Ds7MzunbtCrVajcTERNja2sLCwgJ79+7FoUOHMH36dHh4eCAhIUG7rVd9yPTkqHMw7+KXCLm5CQDwW9yvyFHn/P/sDhER0duJvgzd398f/fv3x+zZszFo0CDIZDKEhITAzMwMz549g7e3Nw4dOgQAOHjwIADgm2++gbe3d55/r/qQaXmY8gAf7O2iLXQmNv8cP/c5zEKHiIgKTfTDE2QyGaZNm4Zp06blu8/FxQV37/59JtzQ0FBDRiOR/Xx/Lyaf+gzpuWlwtHDE2k4b0bnGe2LHIiIiIyN6sUNUkMTsF5hy2h/puWloXfldbOgSgio2VcWORURERojFDpVKjhblsbLjWvz5/A9Mc/8CcilfqkREVDT8BKFS48e7u1HRygkdqvkAAHrW7oOetfuIG4qIiIweix0SXWZuJmadm4pdkdtRwbICTg+4DCcrJ7FjERGRiWCxQ6K6mxgJ36PDcTcpEhJIMKqxH8pblBc7FhERmRAWOyQKQRCwO3IHZp4LQJYqC05WlbChSwi8q7YTOxoREZkYFjtkcCqNCpNOjseP/9sNAGjv0hHrOgdz1xUREekFix0yOLlUDolEAqlEipkes+HfYgqkEtHPb0lERCaKxQ4ZhCAIyFZnw1L+8mr2X7f7L4Y3/AQelT1FTkZERKaOf06T3qUpUzH22CfwPTocGuHlFW+tzaxZ6BARkUFwZof06mbCDfiGj8CjlIeQS+W4EX8dzSu1FDsWERGVIZzZIb0QBAEhNzfh/bBOeJTyEC421fBzn8MsdIiIyOA4s0MlLiUnGVNO++PAg30AgG41u2OVTxAcLBzFDUZERGUSix0qcZ8cHY5zT07DTGqGOV4LMLrJeEgkErFjERFRGcVih0rcbM+5mJD+FOs6beJuKyIiEh3X7FCxJWUn4kRUuPbr5pVa4tzAKyx0iIioVGCxQ8VyNTYCnX5oi5FHhuDm8z+07TKpTMRUREREf2OxQ0WiETRYe30Veu97H0/So1HZuorYkYiIiArENTuksxdZL+B/ciyORR0FAPSp0xeBHVbDVmEncjIiIqL8WOyQTi4/u4Qx4aPwLCMG5jJz/Mf7GwxrOJJHWxERUanFYod0cunpeTzLiEEd+7oI7roVjSo0FjsSERHRW7HYIZ34t5gCM5kCIxt/ChszG7HjEBER/SsuUKa3uvD0HD4+0AdZqiwAL4+ymth8EgsdIiIyGix2qEBqjRrLri5Bv/09cTr6JNb8tkLsSEREREXC3ViUT1xmHMYf88W5p2cAAIPdhmFi88/FDUVERFRELHYojzPRpzDuuC+eZyXASm6Nb9r/Fx/XHyR2LCIioiJjsUNaO+9sw+RTEyFAQAPHRtj83lbUdagndiwiIqJiYbFDWu1dOsLe3B49a3+Ihd5LYCm3FDsSERFRsbHYKePuJf1PO3tT1dYFZwddQSWrSiKnIiIiKjk8GquMylXnYsGlOfDe5Y4jjw5p21noEBGRqWGxUwY9SYtG733vY+31lRAg4Ne4q2JHIiIi0hvuxipjjjw6BP+TY5Gckww7RTms6LgWPWv3FjsWERGR3rDYKSOUaiUWXp6LjTfWAQCaO7XAxi5bULOcq8jJiIiI9Iu7scqI80/PaAudMU3G48CH4Sx0iIioTODMThnhU70LPms+Ga2cPfC+6wdixyEiIjIYzuyYqBx1DhZdmoe4zDht21de81noEBFRmcOZHRP0MOUBRoePwh8Jv+N6wm/Y0/NnSCQSsWMRERGJgsWOidl3LwxTTvsjPTcNjhaOGNtkPAsdIiIq01jsmIgsVRa+Oj8L390OBQB4VvbCxi6hqGJTVeRkRERE4mKxYwKepEVj6KEBuP3iT0ggwaQWAZju8QXkUv54iYiI+GloAuwtHJCrVqKCZQWs6xSMjtU7iR2JiIio1GCxY6SyVFkwl5lDKpHCxswG376/E7YKWzhbVxY7GhERUanCQ8+N0N3ESLy3pwM23gjSttV1qMdCh4iIqAAsdozM7sgd6LqnPSIT72DTH0HIUmWJHYmIiKhU424sI5Gem46ZZwPww91dAID2Lh2xrnMwLOWWIicjIiIq3VjsGIFbz//E6PCRuJf8P0glUsxw/xKTWgZAKuHEHBER0b9hsVPKJWcnode+bkhTpsLZujI2dgmFV5U2YsciIiIyGix2Sjl7CwdMc5+JM9GnsKbTRlSwrCB2JCIiIqPCYqcUuplwAzKpHA3LNwIAjGkyAaObjOduKyIioiLgp2cpIggCQv8MxvthnfDp0WFIV6YBACQSCQsdIiKiIuLMTimRmpOCyac/w4EH+wAAdezrQqVRiRuKiIjIBLDYKQWux/0Kv2Oj8Dj1L8ilcszxWoAxTSbwauVEREQlgMWOiARBQPAf6zH/0lfI1eSium0NbOq6BS0qtRI7GhERkclgsSMiAQLCo44iV5OL7q49scpnHcqZ24sdi4iIyKSw2BGRVCJFUOdgHH50EMMbjuJuKyIiIj3gIT76olZDfv4ssGvXy//VamgEDdZdX41Z56ZquzlZOWFEo09Y6BCVNLVae9Ps0oU8XxNR2SJ6saPRaLB69Wq0bdsWzZo1g5+fH6Kjo9/YPykpCQEBAXB3d4eHhwfmz5+PrKzSdTFMxcH9cGzZCLa9ugODB8O2V3do3nXDiC0dMP/SbITc3ITLzy6JHZPIZCkO7oeD999r38oN6gfHlo2gOLhfxFREJBbRi52goCDs3LkTCxcuxO7du6HRaODr6wulUllgf39/f0RFReHbb7/FqlWrcObMGcybN8+wod9CcXA/7D4dBmlMjLbtfHWgVZ84HM3+HeYww7L2K+Hp3FrElESmS/sefPYsT7v02TPYfTqMBQ9RGSTqmh2lUonQ0FBMnToVHTp0AACsWLECbdu2RXh4OHr06JGn//Xr13HlyhUcOnQItWvXBgAsWLAAvr6+mDJlCipVqmTop5CXWg2b2dMBQYAEgFoCLGonw8L2aqilQN0XEnx7riJcPhmKbJVG3KwmRK4RYK5UIUuphkrNcdUXoxhntRoW82YjU65AlplFnrskggBBIoHN7BlIfP8DQCYTKSQRGZqoxU5kZCQyMjLg5eWlbbOzs0PDhg1x9erVfMXOtWvXULFiRW2hAwAeHh6QSCT49ddf0b179yJnkcuLP8klv3wesv+f0REA1JzSDE9sfwcAWKs6IttqPAa/Zwmsu1zs70VEbzBo7RvvkggCZDFPYXH1ElTe7QwYqvBkMmme/0k/OM6GUxrGWtRiJzY2FgBQuXLlPO1OTk7a+14XFxeXr69CoYC9vT2e/WPKWhdSqQQODtZFfrxWenKeL6ulNcVTmztwzB0Da3UXSMBFyESG1urJLVjm5uRps01PBkriPa9HdnaWYkcoEzjOhiPmWIta7LxaWKxQKPK0m5ubIyUlpcD+/+z7qn9OTk6+9sLSaASkpmYW+fGvyG3sYfv/tyUALmzaiqhyCjhlbgSwUdsv7Ye9UHt5F/v70UtSmRR2dhZITc2GprTuXjEBxjDOskvnYftx3zxtlrk5+f7MSLOxhyopw3DBdCCTSWFnZ4nU1CyoS+k4mwKOs+Hoa6zt7CwLPVskarFjYfFyn7pSqdTeBoCcnBxYWuavAC0sLApcuJyTkwMrK6tiZVGVwBoalbsXrKpUgfTZM0j+f91OzZS/8woSCTSVqyCj9buQSjnLU1LkMgmsFHLkyCRQCRxXfTGKcW79Lswrlte+B//p1Xsw290LKOXr5tRqTYn8XqK34zgbjphjLerOyle7pOLj4/O0x8fHF7jY2NnZOV9fpVKJ5ORkODk56S9oYclkSF/0DYCXv1Rf9+rr9EVfc2Ekkb7wPUhEBRC12HFzc4ONjQ0iIiK0bampqbh9+zbc3d3z9Xd3d0dsbCyioqK0bVeuXAEAtGzZUv+BC0HZoxdSQ7ZB84+1RZrKVZAasg3KHr1ESkZUNvA9SET/JOpuLIVCgaFDh2L58uVwdHRE1apVsWzZMjg7O6Nr165Qq9VITEyEra0tLCws0LRpU7Ro0QKTJ0/GvHnzkJmZiTlz5qBPnz7iH3b+GmWPXkh8/wNYXL0E2/RkpNnYv5w251+TRAbx6j1odvkipHGx0FRyRm7rd/keJCqjRL82lr+/P1QqFWbPno3s7Gy4u7sjJCQEZmZmePLkCTp16oQlS5agb9++kEgkWLt2LebPn48RI0bA3Nwc3bp1w6xZs8R+GvnJZC8PbXWwfrkQkvuEiQxLJkNum7ZipyCiUkAiCAWs4itj1GoNEhNL/sgMuVwKBwdrJCVlcAGcHnGcDYPjbBgcZ8PgOBuOvsba0dG60Edj8WxKREREZNJY7BAREZFJY7FDREREJo3FDhEREZk0FjtERERk0ljsEBERkUljsUNEREQmjcUOERERmTSeVBCAIAjQaPQzDDKZtEQvaU8F4zgbBsfZMDjOhsFxNhx9jLVUKoHkHxf8fRMWO0RERGTSuBuLiIiITBqLHSIiIjJpLHaIiIjIpLHYISIiIpPGYoeIiIhMGosdIiIiMmksdoiIiMiksdghIiIik8Zih4iIiEwaix0iIiIyaSx2iIiIyKSx2CEiIiKTxmKHiIiITBqLnWLQaDRYvXo12rZti2bNmsHPzw/R0dFv7J+UlISAgAC4u7vDw8MD8+fPR1ZWlgETGyddx/nevXsYPXo0PD094eXlBX9/f8TExBgwsXHSdZxft3//ftSvXx9PnjzRc0rjp+s45+bmIjAwUNt/6NChuHPnjgETGyddx/nFixcICAhA69at4enpicmTJyMuLs6AiU3Dxo0bMWzYsLf2EeOzkMVOMQQFBWHnzp1YuHAhdu/eDY1GA19fXyiVygL7+/v7IyoqCt9++y1WrVqFM2fOYN68eYYNbYR0GeekpCSMGjUKFhYW2LZtG4KDg5GYmAhfX1/k5OSIkN546Pp6fuXp06dYsGCBgVIaP13Hed68edi7dy8WL16MsLAwODo6ws/PD2lpaQZOblx0HefPP/8cMTEx2LJlC7Zs2YKYmBhMmDDBwKmN244dO7By5cp/7SfKZ6FARZKTkyM0b95c2LFjh7YtJSVFaNKkiXDgwIF8/X/77TehXr16wv3797Vt586dE+rXry/ExsYaJLMx0nWcf/jhB6F58+ZCVlaWti0mJkaoV6+ecPHiRYNkNka6jvMrarVaGDRokDB8+HChXr16QnR0tCHiGi1dx/nx48dC/fr1hVOnTuXp37FjR76e30LXcU5JSRHq1asnnDhxQtt2/PhxoV69ekJSUpIhIhu12NhYYcyYMUKzZs2Ebt26CUOHDn1jX7E+CzmzU0SRkZHIyMiAl5eXts3Ozg4NGzbE1atX8/W/du0aKlasiNq1a2vbPDw8IJFI8OuvvxokszHSdZy9vLwQFBQECwsLbZtU+vJlnpqaqv/ARkrXcX5lw4YNyM3NxZgxYwwR0+jpOs4XLlyAra0t2rVrl6f/yZMn82yD8tJ1nC0sLGBtbY19+/YhPT0d6enp+Pnnn+Hq6go7OztDRjdKt27dgpmZGfbv34+mTZu+ta9Yn4VyvW3ZxMXGxgIAKleunKfdyclJe9/r4uLi8vVVKBSwt7fHs2fP9BfUyOk6zi4uLnBxccnTtmnTJlhYWMDd3V1/QY2cruMMAH/88QdCQ0OxZ88erm0oJF3H+dGjR6hWrRrCw8OxadMmxMXFoWHDhpg5c2aeDwvKS9dxVigUWLp0KebMmYNWrVpBIpHAyckJ27dv1/6xRG/m4+MDHx+fQvUV67OQP8UierWYSqFQ5Gk3NzcvcG1IVlZWvr5v608v6TrO/7Rt2zZs374dU6dOhaOjo14ymgJdxzkzMxNTp07F1KlTUbNmTUNENAm6jnN6ejqioqIQFBSEKVOmYP369ZDL5Rg8eDBevHhhkMzGSNdxFgQBd+7cQfPmzbFjxw5s3boVVapUwfjx45Genm6QzGWFWJ+FLHaK6NVukn8udsvJyYGlpWWB/QtaGJeTkwMrKyv9hDQBuo7zK4IgYOXKlVi0aBHGjRv3r0cHlHW6jvOiRYvg6uqKgQMHGiSfqdB1nOVyOdLT07FixQp4e3ujSZMmWLFiBQDgp59+0n9gI6XrOB8+fBjbt2/HsmXL0LJlS3h4eGDDhg14+vQp9uzZY5DMZYVYn4Usdoro1TRcfHx8nvb4+HhUqlQpX39nZ+d8fZVKJZKTk+Hk5KS/oEZO13EGXh6qO23aNGzYsAGzZs3C559/ru+YRk/XcQ4LC8PFixfRvHlzNG/eHH5+fgCAHj16YMOGDfoPbKSK8ntDLpfn2WVlYWGBatWq8TD/t9B1nK9duwZXV1fY2Nho28qVKwdXV1dERUXpN2wZI9ZnIYudInJzc4ONjQ0iIiK0bampqbh9+3aBa0Pc3d0RGxub541z5coVAEDLli31H9hI6TrOADB9+nQcOXIEgYGBGDlypIGSGjddxzk8PBwHDx7Evn37sG/fPixatAjAy/VRnO15s6L83lCpVLh586a2LTs7G9HR0ahRo4ZBMhsjXcfZ2dkZUVFReXajZGZm4smTJ9xNW8LE+izkAuUiUigUGDp0KJYvXw5HR0dUrVoVy5Ytg7OzM7p27Qq1Wo3ExETY2trCwsICTZs2RYsWLTB58mTMmzcPmZmZmDNnDvr06fPGGQrSfZz37t2LQ4cOYfr06fDw8EBCQoJ2W6/6UH66jvM/P2hfLfqsUqUK7O3tRXgGxkHXcW7VqhXeffddzJgxAwsWLIC9vT1Wr14NmUyG3r17i/10Si1dx7lPnz4ICQnB559/jkmTJgEAVq5cCXNzc/Tt21fkZ2PcSs1nod4Oai8DVCqV8M033witW7cWmjVrJvj5+WnPMxIdHS3Uq1dPCAsL0/Z//vy58NlnnwnNmjUTPD09hblz5wrZ2dlixTcauozzqFGjhHr16hX47/WfBeWn6+v5dZcvX+Z5dgpJ13FOS0sT5s6dK3h6egpNmzYVRo0aJdy7d0+s+EZD13G+f/++MGbMGMHDw0No3bq1MHHiRL6ei2DGjBl5zrNTWj4LJYIgCPorpYiIiIjExTU7REREZNJY7BAREZFJY7FDREREJo3FDhEREZk0FjtERERk0ljsEBERkUljsUNEREQmjcUOEZV5pnS6MVN6LkQlhcUOkYmYOXMm6tev/8Z/R44c0WlbPj4+ekz79/f5Z85GjRrB29sb06ZNw7Nnz0r0+z158gT169fH3r17Aby8XtL06dNx7do1bZ9hw4Zh2LBhJfp9C/Kmn1fz5s3Rs2dPbNmyRedt3rt3D4MGDdJDWiLjxmtjEZmQihUrYu3atQXeV1ovaPjPzCqVCo8ePcLy5ctx/fp1HDx4sMSuaebk5ITvv/8e1atXBwDcuXMHP//8M/r166ftM3fu3BL5XoXxz+cuCAKeP3+O3bt3Y+nSpTA3N8fgwYMLvb0jR47g+vXr+ohKZNRY7BCZEIVCgWbNmokdQycFZW7VqhXMzMwwY8YMnDhxAh988IHevtc/1alTp0S+V2G8KU+HDh3QuXNn7N27V6dih4gKxt1YRGWMWq3Gpk2b0KNHDzRp0gTNmjXDwIEDcfny5Tc+5s8//8SIESPQsmVLNG/eHCNHjsTvv/+ep8+1a9cwdOhQNG3aFB4eHpgxYwYSExOLnPOdd94BADx9+lTbduHCBQwePBgtW7aEp6cnAgIC8uzq0mg0WLFiBXx8fNC4cWP4+PggMDAQubm5APLuxoqIiMDw4cMBAMOHD9fuunp9N9Ynn3xS4FWvx48fj169euntuZuZmcHS0hISiUTblp2djcDAQHTt2hWNGzdGixYtMGrUKNy5cwcAsGbNGu0sUf369bFmzRrtmGzatAldunRB48aN8d5772Hbtm1FzkZkjFjsEJkYlUqV79/ri1aXL1+OoKAgDBgwAJs3b8bChQuRnJyMSZMmISsrK9/20tPT4evrCwcHB6xZswYrVqxAVlYWPv30U6SlpQEArl69ipEjR8LCwgIrV67EF198gStXrmD48OHIzs4u0vN49OgRAGh3Oe3btw+ffPIJKleujP/+97+YNWsWrl+/jgEDBuDFixcAgODgYOzatQsTJkxAaGgoBg0ahJCQEKxfvz7f9hs1aoQ5c+YAAObMmVPg7qtevXrh1q1biIqK0ralpqbi7Nmz6N27d4k899d/TkqlEk+ePMGSJUvw6NEj9OnTR9tv+vTpCAsLw+jRoxEaGopZs2bh3r17CAgIgCAI+Oijj9C/f38AwPfff4+PPvoIADBv3jysXr0avXr1woYNG9CtWzcsXrwY69at+9dsRKaCu7GITMjTp0/RqFGjfO0BAQEYPXo0ACA+Ph6TJ0/OswjX3Nwcn332Ge7evZtvt8r9+/eRlJSE4cOHo0WLFgCAWrVq4fvvv0dGRgZsbW0RGBgIV1dXbNy4ETKZDADQtGlTfPDBBwgLC8OQIUPemlulUmlvp6en4+bNm1iyZAlcXFzQoUMHaDQaLF++HN7e3ggMDNT2bdGiBbp3746QkBBMnz4dV65cQePGjbVrcDw8PGBpaQlbW9t839PGxka7y6pOnToF7r7q2rUr5s+fj4MHD2LChAkAgPDwcKjVavTo0QMAivXc3/TzqlmzJubOnatdbKxUKpGRkYHZs2eje/fu2ueWnp6OpUuX4vnz53B2doazszMAaH+Gjx49wg8//IApU6Zof/7e3t6QSCTYuHEjBg8eDAcHhzfmIzIVLHaITEjFihULnMV49SEIQFssJCYm4uHDh4iKisKpU6cAvPxQ/ae6devC0dERY8eORbdu3dC2bVu0adMG06ZNAwBkZWXhxo0b+PTTTyEIgrZwqVatGmrXro0LFy4U6QO/adOmWLBgASwsLPDgwQMkJCQgICAgT5/q1aujefPmuHLlCgDA09MTgYGBGDx4MHx8fNChQwcMHTr0rWP2NlZWVujcuTMOHTqkLXZ++eUXeHl5oVKlSsV+7q//vFJTUxEUFITHjx9j6dKlaN68ubafQqFASEgIACAuLg6PHj3CX3/99dafGwBcvnwZgiDAx8cnT0Hp4+OD9evX49dff0Xnzp2LPD5ExoLFDpEJUSgU2rUub3Lz5k3Mnz8fN2/ehKWlJerUqYMqVaoAKPgcLdbW1tixYwfWr1+Pw4cP4/vvv4eFhQV69+6N2bNnIzU1FRqNBsHBwQgODs73eHNz87fm+WeBplAo4OzsjHLlymnbkpOTAQAVKlTI9/gKFSrg9u3bAABfX19YW1sjLCwMy5cvx7Jly1C3bl3Mnj0brVu3fmuON+nduzf279+PyMhIVKhQAREREVi8eDEAFPu5//Pn1aJFC/Tr1w9+fn748ccf4erqqr3v3LlzWLx4MR4+fAhra2u4ubnBysoKwJvPrfNq3N60wDsuLu6t+YhMBYsdojLk1fqb+vXr45dffkGtWrUglUpx5swZHD169I2Pq1WrFpYtWwa1Wo0//vgDP//8M3bt2oXq1atj4MCBkEgkGDlyZIEfqpaWlm/NVJgCzd7eHgDw/PnzfPclJCRod8VIpVIMGTIEQ4YMwYsXL3DmzBls2LABn332GS5cuPDW7/EmXl5eqFixIg4fPoyKFSvC3NwcXbt2BfCyECzOcy+o/9KlSzFgwADMmjULu3btgkQiwePHjzFhwgR07twZGzduRLVq1SCRSLBjxw6cO3fujduzs7MDAGzduhXW1tb57n9V5BKZOi5QJipDHj58iOTkZAwfPhx16tSBVPryV8DZs2cBvDxy55+OHDmC1q1bIyEhATKZDM2bN8e8efNgZ2eHmJgY2NjYoGHDhnj48CHeeecd7b+6detizZo1iIiIKHZuV1dXVKxYEQcPHszTHh0djd9//127lmjgwIFYtGgRAKB8+fLo27cvhgwZgtTUVKSnp+fb7qs1Nm8jk8nQs2dPnDp1CkeOHEHnzp21Myr6eO5NmjTBxx9/jOvXr2Pfvn0AXh4Nl5OTg9GjR6N69erao7ReFTqvZnZe/TxfadWqFQAgKSkpT77ExESsWrVKO/NDZOo4s0NUhri6usLGxgYbNmyAXC6HXC7H0aNHsWfPHgAo8GisFi1aQKPRYMKECRg9ejSsra1x+PBhpKWlaWc4Xi2ADQgIQK9evaBWqxEaGoobN25g/Pjxxc4tlUoxZcoUzJo1S/s9kpKSsHbtWpQrVw6jRo0CALi7uyM0NBQVKlRA8+bNERcXhy1btsDDwwOOjo7IzMzMs91XC5dPnz6NcuXKwc3NrcDv37t3b4SGhkIqlebbXaWP5/7555/j8OHDCAwMRJcuXdCoUSPI5XIsW7YMn3zyCZRKJfbu3YvTp08DgPZ5vZrJOXjwIJo2bYr69eujV69e+Oqrr/D06VM0btwYjx49wooVK+Di4lJqTzRJVNI4s0NUhtja2iIoKAiCIGDSpEmYPn06YmJisH37dlhbW+e5bMIrTk5O2Lx5M2xtbfHll19izJgxuHXrFtasWaNdB+Pt7Y2QkBDExsbC398f06dPh0wmw5YtW0rsJId9+/bF6tWr8ejRI0yYMEG7iHfPnj2oWLEiAGDSpEkYO3YswsLC4Ovri6VLl8Lb2xurV68ucJt169ZFjx49sGPHDkydOvWN39vNzQ316tVD+fLl4eXllec+fTx3BwcHTJo0CQkJCVi3bh1q1KiBwMBAxMXFYdy4cdpD5rdt2waJRKL9uXXt2hXvvPMOZs6cqV3QvGTJEowaNQq7d++Gr68vNmzYgO7duyM0NLRQM1tEpkAi8KpxREREZMI4s0NEREQmjcUOERERmTQWO0RERGTSWOwQERGRSWOxQ0RERCaNxQ4RERGZNBY7REREZNJY7BAREZFJY7FDREREJo3FDhEREZk0FjtERERk0v4PDD/utfWv110AAAAASUVORK5CYII=", + "text/plain": [ + "
" + ] + }, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "zephyr.evaluate(X_test, y_test)" + "\n", + "_, roc_fig = res[\"zephyr_ml.primitives.postprocessing.roc_auc_score_and_curve\"]\n", + "\n", + "roc_fig\n" ] } ], "metadata": { - "interpreter": { - "hash": "2d6fabd7bf745a21519616ebdce3b2479184204dadf576aa19f086ff78438203" - }, "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "venv", "language": "python", "name": "python3" }, @@ -202,7 +764,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.16" + "version": "3.8.0" } }, "nbformat": 4, diff --git a/notebooks/visualization.ipynb b/notebooks/visualization.ipynb index 4a6091d..da2627b 100644 --- a/notebooks/visualization.ipynb +++ b/notebooks/visualization.ipynb @@ -7,7 +7,7 @@ "source": [ "# Visualization\n", "\n", - "In this tutorial, we will show you how to use Zephyr class to obtain intermediate results of the pipeline for visualization purposes. To know more about pipelines and Zephyr class please refer to the modeling notebook. We also used a demo feature matrix, to know how you can create features, please refer to feature_engineering notebook.\n", + "In this tutorial, we will show you how to use Zephyr class to obtain intermediate results of the pipeline for visualization purposes during the fitting stage. To know more about pipelines and Zephyr class please refer to the modeling notebook. We also used a demo feature matrix, to know how you can create features, please refer to feature_engineering notebook.\n", "\n", "## Load the Feature Matrix\n", "\n", @@ -16,7 +16,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 10, "id": "d6f954db", "metadata": {}, "outputs": [], @@ -33,18 +33,91 @@ "source": [ "## Prepare data\n", "\n", - "Prepare the data for training by creating a `y` variable to hold the labels, imputing missing values, and normlizing the data." + "Prepare the data for training by creating a `y` variable to hold the labels, imputing missing values, and normlizing the data. We then initialize a `Zephyr` instance, set our data, and split it into training and testing." ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 11, "id": "23ec49dd", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/raymondpan/zephyr/Zephyr-repo/venv/lib/python3.8/site-packages/sklearn/impute/_base.py:555: UserWarning: Skipping features without any observed values: [ 1 2 6 7 9 10 15 16 17 18]. At least one non-missing value is needed for imputation with strategy='mean'.\n", + " warnings.warn(\n", + "Performing set_feature_matrix. You are skipping the following steps:\n", + "0. generate_entityset or set_entityset\n", + "1. generate_label_times or set_label_times\n" + ] + }, + { + "data": { + "text/plain": [ + "( 0 1 2 3 4 5 6 7 8 9 ... \\\n", + " 10 -0.288675 0.0 0.0 -0.463185 -0.463185 -0.463185 0.0 0.0 0.0 0.0 ... \n", + " 5 -0.288675 0.0 0.0 -0.521570 -0.521570 -0.521570 0.0 0.0 0.0 0.0 ... \n", + " 3 -0.288675 0.0 0.0 -1.174466 -1.174466 -1.174466 0.0 0.0 0.0 0.0 ... \n", + " 11 -0.288675 0.0 0.0 2.064680 2.064680 2.064680 0.0 0.0 0.0 0.0 ... \n", + " 1 -0.288675 0.0 0.0 0.298409 0.298409 0.298409 0.0 0.0 0.0 0.0 ... \n", + " 9 -0.288675 0.0 0.0 0.658556 0.658556 0.658556 0.0 0.0 0.0 0.0 ... \n", + " 2 -0.288675 0.0 0.0 -0.527579 -0.527579 -0.527579 0.0 0.0 0.0 0.0 ... \n", + " 8 -0.288675 0.0 0.0 -0.650653 -0.650653 -0.650653 0.0 0.0 0.0 0.0 ... \n", + " 7 -0.288675 0.0 0.0 0.197664 0.197664 0.197664 0.0 0.0 0.0 0.0 ... \n", + " 4 -0.288675 0.0 0.0 0.002832 0.002832 0.002832 0.0 0.0 0.0 0.0 ... \n", + " \n", + " 80 81 82 83 84 85 86 87 88 89 \n", + " 10 0.0 3.464102 -0.288675 -0.288675 0.0 0.0 -1.080123 1.080123 0.0 0.0 \n", + " 5 0.0 -0.288675 -0.288675 -0.288675 0.0 0.0 0.925820 -0.925820 0.0 0.0 \n", + " 3 0.0 -0.288675 -0.288675 -0.288675 0.0 0.0 0.925820 -0.925820 0.0 0.0 \n", + " 11 0.0 -0.288675 3.464102 -0.288675 0.0 0.0 -1.080123 1.080123 0.0 0.0 \n", + " 1 0.0 -0.288675 -0.288675 -0.288675 0.0 0.0 0.925820 -0.925820 0.0 0.0 \n", + " 9 0.0 -0.288675 -0.288675 -0.288675 0.0 0.0 -1.080123 1.080123 0.0 0.0 \n", + " 2 0.0 -0.288675 -0.288675 -0.288675 0.0 0.0 0.925820 -0.925820 0.0 0.0 \n", + " 8 0.0 -0.288675 -0.288675 -0.288675 0.0 0.0 -1.080123 1.080123 0.0 0.0 \n", + " 7 0.0 -0.288675 -0.288675 -0.288675 0.0 0.0 -1.080123 1.080123 0.0 0.0 \n", + " 4 0.0 -0.288675 -0.288675 -0.288675 0.0 0.0 0.925820 -0.925820 0.0 0.0 \n", + " \n", + " [10 rows x 90 columns],\n", + " 0 1 2 3 4 5 6 7 8 9 ... \\\n", + " 6 -0.288675 0.0 0.0 1.946791 1.946791 1.946791 0.0 0.0 0.0 0.0 ... \n", + " 12 -0.288675 0.0 0.0 -0.650711 -0.650711 -0.650711 0.0 0.0 0.0 0.0 ... \n", + " 0 3.464102 0.0 0.0 -1.180770 -1.180770 -1.180770 0.0 0.0 0.0 0.0 ... \n", + " \n", + " 80 81 82 83 84 85 86 87 88 89 \n", + " 6 0.0 -0.288675 -0.288675 -0.288675 0.0 0.0 0.925820 -0.925820 0.0 0.0 \n", + " 12 0.0 -0.288675 -0.288675 3.464102 0.0 0.0 -1.080123 1.080123 0.0 0.0 \n", + " 0 0.0 -0.288675 -0.288675 -0.288675 0.0 0.0 0.925820 -0.925820 0.0 0.0 \n", + " \n", + " [3 rows x 90 columns],\n", + " 10 False\n", + " 5 False\n", + " 3 False\n", + " 11 False\n", + " 1 False\n", + " 9 True\n", + " 2 True\n", + " 8 False\n", + " 7 False\n", + " 4 True\n", + " Name: label, dtype: bool,\n", + " 6 False\n", + " 12 False\n", + " 0 True\n", + " Name: label, dtype: bool)" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "from sklearn.impute import SimpleImputer\n", "from sklearn.preprocessing import StandardScaler\n", + "from zephyr_ml import Zephyr\n", "\n", "# pop the target labels\n", "y = list(feature_matrix.pop('label'))\n", @@ -56,7 +129,12 @@ "\n", "# normalize the data\n", "scaler = StandardScaler()\n", - "X = scaler.fit_transform(X)" + "X = pd.DataFrame(scaler.fit_transform(X))\n", + "\n", + "zephyr = Zephyr()\n", + "zephyr.set_feature_matrix(feature_matrix=X, labels = y)\n", + "zephyr.generate_train_test_split(test_size=0.2, random_state=33)\n", + "\n" ] }, { @@ -78,25 +156,13 @@ " \"variable\": \"zephyr_ml.primitives.postprocessing.FindThreshold#1.threshold\"\n", " },\n", " {\n", - " \"name\": \"predictions\",\n", - " \"variable\": \"zephyr_ml.primitives.postprocessing.FindThreshold#1.predictions\"\n", + " \"name\": \"scores\",\n", + " \"variable\": \"zephyr_ml.primitives.postprocessing.FindThreshold#1.scores\"\n", " }\n", "]\n", "```\n", "\n", - "Where we have a _name_ and a _variable_ defining the intermediate outputs." - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "24511f3c", - "metadata": {}, - "outputs": [], - "source": [ - "from zephyr_ml import Zephyr\n", - "\n", - "zephyr = Zephyr('xgb_classifier')" + "Where we have a _name_ and a _variable_ defining the intermediate outputs. " ] }, { @@ -106,22 +172,22 @@ "source": [ "## Visualize\n", "\n", - "When training the pipeline using the `fit` function, you can specify `zephyr.fit(.., visual=True)` to indicate you are interested in obtaining the intermediate outputs." + "When training the pipeline using the `fit` function, you can specify `zephyr.fit_pipeline(.., visual=True)` to indicate you are interested in obtaining the intermediate outputs." ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 12, "id": "683393df", "metadata": {}, "outputs": [], "source": [ - "output = zephyr.fit(X, y, visual=True)" + "output = zephyr.fit_pipeline(pipeline = \"xgb_classifier\", visual=True)" ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 13, "id": "13221b40", "metadata": {}, "outputs": [ @@ -131,7 +197,7 @@ "dict_keys(['threshold', 'scores'])" ] }, - "execution_count": 5, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } @@ -150,13 +216,13 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 14, "id": "c7a88d5b", "metadata": {}, "outputs": [ { "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjsAAAHJCAYAAABjZPjUAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjYuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/P9b71AAAACXBIWXMAAA9hAAAPYQGoP6dpAABWmElEQVR4nO3de1hU1f4/8PdcGAaBESgRFEmyBDERU1AUFK2oU1qd8ldZaKKgWUbe0DyZec8UJJEwMdG0zM5Rs/T4PcdjmZUVillZplneULmoXEau41x+f8BsmbjIwMzeOL5fz+Mj7NmzWbOYy5vPWnttmclkMoGIiIjIQcmlbgARERGRPTHsEBERkUNj2CEiIiKHxrBDREREDo1hh4iIiBwaww4RERE5NIYdIiIicmgMO0REROTQGHaI2hix1/l0lHVFHeVxSI39SI6IYYeoEaNHj0ZgYKDFv379+mHMmDE4ePCgzX+eTqfDkiVLsHPnzlYfa/To0Rg9enST++Tn52PChAm4cOGCVcd+9dVXMWzYsNY0zyrDhg3Dq6++2ujtWq0WM2fORE5OjrCtOY/fnrZv347AwECcP3++1ccKDAzEqlWrmtzHVo/3X//6F956660b7nf06FGMHj0affr0QWRkJFasWAGdTtfkfYxGI9atW4eYmBiEhITg0UcfxWeffVZvv+3bt2P48OHo1asX7rvvPqSnp0Ov17f4MREBDDtETQoODsbHH3+Mjz/+GJs3b8bSpUvh5OSE8ePH4+TJkzb9WYWFhXj//fdFe2P/9ttvsX//fqvv9+KLLyI9Pd0OLWqZ3377DZ9++imMRqPUTbnprV69GiUlJU3uk5ubi7i4ODg7O+Ptt9/GuHHjsH79eixatKjJ+61cuRKpqakYOXIk1qxZg4EDByIpKQm7du0S9nn//fcxe/Zs3HnnnUhPT8fLL7+M7du3Y+rUqbZ4eHQLU0rdAKK2zM3NDaGhoRbbBg4ciIiICGzfvh2zZs2SpmES8vf3l7oJJKG1a9fC1dUVGRkZUKlUGDJkCNRqNRYuXIgXXngBnTp1qnefyspKbNy4EaNHj8aECRMAABEREfj111+xadMmDB8+HAaDARkZGRg0aBDS0tKE+wYHB2PEiBE4cOAABg0aJNrjJMfCyg6RlVxcXODs7AyZTGaxfe/evXjiiSfQq1cvDBo0CIsWLUJFRYVwe1VVFebNm4fBgwfjnnvuwUMPPYR169YBAM6fP4/77rsPADB79uwmh4mqq6vxzjvv4KGHHkKvXr0QExODzMzMBisb77zzDgYOHIg+ffrgxRdfRG5uLoCaoYLZs2cDAO677z5hmKiqqgopKSmIiYnBPffcg3vvvRdxcXH47bffhGP+dRhr2LBhSEtLw1tvvYWBAwciJCQE48ePx5kzZyzakpOTg9jYWPTu3Rvh4eGYNWsWioqKLPY5fvw44uLi0KdPHwwdOrTBYY66srOzMWbMGADAmDFjLIZyTCYT1q5di+joaISEhODpp5/Gzz//LNy+atUqPPDAA0hPT0d4eDgiIyNRWloKoGY455FHHsE999yD6OhorFq1CgaDQbhvUVERpk+fjkGDBqFXr1547LHHsGPHjnrt++mnn/DMM8+gV69eiI6OxnvvvWdx+9WrV/Hmm2/i/vvvR69evTB8+HBs3bq1ycd88eJFTJ48GX379sWgQYOwfv36Jvc3O378OCZPnowBAwagZ8+eiIqKwqJFi1BVVQWg5vd44cIFfPLJJ00OwX3zzTcYMmQIVCqVsO2hhx6C0WjEN9980+B9VCoVPvroI4wbN85iu5OTE6qrqwEAly9fRklJCaKjoy326d69Ozw9PfHll18263ESNYSVHaImmEwmYVjJZDKhpKQE77//PnQ6HZ588klhv507d2LGjBkYMWIEpkyZggsXLiA1NRV//PEH1q9fD5lMhiVLluCbb77BrFmzcPvtt+Orr77CsmXL4OHhgREjRiA9PR2TJ0/GpEmTEBMT02h7XnjhBfz444+YPHkygoKCkJ2djbfffhu5ublYuHChsO/hw4dx5coVzJ07FwaDASkpKRgzZgx27tyJ6OhoTJo0CatXr0Z6ejoCAwMBQJj7Mm3aNPj7++Ps2bNYuXIlpk+fjn//+9/1Ap7Zxo0b0bdvX7z55psoLS3F4sWLMWvWLHz88ccAgEOHDiEuLg4DBgzA22+/jdLSUqxcuRJjxozB1q1boVarUVBQgNjYWHTt2hXLly9HWVkZkpOTceXKlUZ/Pz179sTcuXOxYMECzJ07F/3797d4/DqdDq+//jr0ej2WLl2KSZMmYf/+/VAqa976Ll68iP379yM1NRUlJSVo37491qxZg9TUVMTGxmL27Nn47bffsGrVKuTl5WHJkiUAgKSkJFy5cgXz58+Hm5sbPv30U8yaNQs+Pj4YMGCA0IZ58+YhMTERr7zyCv75z39i+fLl6NatG4YOHYqqqio8++yzuHLlChITE9G5c2fs3bsXr732Gi5fvowXXnih3uOtqKhAbGwslEolFi5cCLlcjrS0NJw7dw59+vRptJ8KCwvx3HPPITQ0FEuXLoVKpcJXX32F9evXw9vbGxMmTEB6ejomTJiA4OBgvPjii/D29q53nKqqKly4cAEBAQEW2728vODm5obTp083+PMVCgWCgoIA1DyHr1y5gu3bt+Pbb7/FggULAAAajQZKpRIXL160uG9paSm0Wq0Q1IlagmGHqAmHDh1Cz549622fNm0aunXrBqDmzTs5ORlRUVFITk4W9unatSvGjh2L/fv3Izo6GgcPHsSgQYPwyCOPAAD69++Pdu3a4bbbboNKpUKPHj0A1AwTBQcHN9ier776Ct9++y1WrFghHGfQoEFQq9VCeLj77rsB1HzAZGVlwcfHBwBw55134vHHH8eOHTsQGxsrDEf16NEDfn5+0Ol0KC8vx5w5c/Dwww8DAMLDw1FWVoalS5fi8uXL6NChQ4Pt0mg0yMjIgEKhAACcO3cOq1atQnFxMTw9PZGSkoKAgACsWbNG2Kd379545JFHsG3bNjz33HPYsGEDDAYDMjMz4eXlBQAICAjAU0891ejvx83NDXfddRcA4K677hK+BmqqCZmZmfDw8ABQM5F5zpw5+OOPP4QPXr1ej1mzZqFfv34AaiotGRkZePrppzFnzhwAQGRkJDw8PDBnzhzExcXh7rvvxsGDB/HSSy/h/vvvF/rJw8PDotoB1DxPRo0aBQAIDQ3F//73P3z//fcYOnQotm/fjt9//x1btmwRgkpUVBT0ej0yMjLwzDPPCG03++STT3Dx4kXs2rVLeKy9e/fGAw880GgfAcDvv/+OHj16YOXKlXBzcwNQMxx74MABZGdnCyFHpVLBy8ur3tCt2dWrV4V+/ytXV1eUlZU12Q4A+Pe//43p06cDAKKjo/Hoo48CqKmY/u1vf8MHH3yAu+66Cw888ACuXLmCxYsXQ6FQoLKy8obHJmoMww5RE3r27In58+cDqAk1Wq0WX331FVJTU1FRUYGpU6fi1KlTyM/Px8SJEy0mF4eFhcHNzQ0HDhxAdHQ0+vfvjy1btiA/Px9DhgzBkCFD8NJLL1nVnoMHD0KpVOKhhx6y2P7oo49i5cqVOHjwoBB27r33XiHoADWhpkuXLjh06BBiY2PrHVulUgnDagUFBTh9+jTOnDmDffv2AUCTZ9v06tVLCDEAhJ9bWVkJtVqNn376CePHj7eolHXp0gXdunXDgQMH8Nxzz+Hw4cMIDQ0Vgg5Q80He0ByQ5rjrrrsswoKfnx+A6x/YZuaQCQBHjhxBVVUVhg0bZvG7NA/bHThwAHfffTf69++PVatW4dixY4iKisKQIUManL9lDlFAzYf57bffDq1WC6Dmd9m5c+d6FZlHH30UW7duxU8//YQhQ4ZY3JaTkwN/f3+LUOfr69toODGLjIxEZGQkrl27hj/++ANnz57F77//jqKionqBqik3mgTeWOWvrpCQEHzwwQc4ceIEVq5cifj4eGzatAkymQzz58+HSqXCnDlz8Nprr0GtViMhIQHl5eVwcXFpdjuJ/ophh6gJrq6u6NWrl8W2yMhIVFRU4L333sOYMWOEs1fmz58vBKO6CgsLAQCvvfYafHx88Nlnn2HhwoVYuHAh+vTpg3nz5gmVhhspLS2Fp6enRbAAIFRc6n6Q33777fXuf9tttwkftg35+uuvsWTJEpw6dQqurq4ICgpCu3btADS9/spfP4jk8prpgEajEVqtFkajEWvXrsXatWvr3dfZ2Vl4bOZA0tBjs5a53Q21qS5XV1fha/Pv0jyJ9q/Mv8vU1FS8++67+L//+z/897//hVwux8CBA7FgwQJ07txZ2L+hfjH3Y2lpaYOPzfx7a+j3ZP79/1WHDh1w+fLlBtsM1DzmFStW4MMPP0RFRQV8fX0REhIi9H1zmSs65eXl9W4rKyuDu7v7DY/h7+8Pf39/4Y+BWbNmIScnB2FhYXB1dcWSJUvw2muv4eLFi+jUqRNcXV2xdetW3HHHHVa1laguhh2iFrjnnnvwr3/9C+fPn4dGowFQM98lPDy83r7t27cHUFM5mTRpEiZNmoSLFy9i3759yMjIEObDNEf79u1RXFwMg8FgEXjMH8J1PwjNk23runTpUqNzO86dOycMzaxZswZdunSBTCbDhx9+iK+//rpZ7WuIq6srZDIZxo4dKwy91WUOBJ6eng1+YN/oVGhbMv8uk5OT0bVr13q3m4OIu7s7kpKSkJSUhFOnTuHzzz9HRkYG5s+fj8zMzGb9rPbt2+Ps2bP1tl+6dAkAGgw1np6eDd7nRn2UmZmJDRs2YP78+YiJiRFCyciRI5vVVjNXV1d07NixXhuuXLmC8vJyYWj3r4qKivDVV18hKioKt912m7DdPFxrfv7u27cPGo0Gffv2FSqUV65cQX5+fqNDu0TNwbOxiFrg559/hkKhQJcuXXDnnXfitttuw/nz59GrVy/hX8eOHZGSkoJjx46hqqoKDz74ILKysgAAnTp1wnPPPYdHHnlEmJD512pNQ8LDw6HX6/Gf//zHYrv5rKW+ffsK2w4fPmxR6fnpp59w4cIFYQKtudJh9ssvv6C6uhoTJkyAv7+/MCRhDjotXVnXzc0NwcHBOHXqlEX/3H333Vi1ahWys7MBAAMGDMCRI0dQUFAg3PePP/644cTU5vRbc/Xu3RtOTk4oKCiwaKtSqcSKFStw/vx5XLhwAUOGDBF+B3feeScSEhIwcODAepNrmxIWFoYLFy7gyJEjFts/++wzODk5ISQkpN59BgwYgPPnz+Po0aPCtqKiIvz4449N/qzDhw/jrrvuwpNPPikEnYKCAvz+++8Wla6/PicaMmjQIHz55ZcWw5r//e9/oVAoLCZn11VVVYVZs2bVO9PswIEDACBMkN+yZQuWLVtmsc/7778PhUKBoUOH3rBtRI1hZYeoCWVlZRYfJDqdDl988QW2bduGp59+WphfMnXqVMydO1d4U9ZqtcjIyEBBQQF69uwJtVqNnj17Ij09HU5OTggMDMTp06fxySef4MEHHwQA4UPou+++Q7du3dC7d+967Rk8eDD69++POXPmoKCgAEFBQTh48CDWrl2Lv//97xZzOYxGIyZMmIAXXngBxcXFSElJQffu3YUJoeYqxv/+9z8MHjwYPXv2hFKpxPLlyzFu3DjodDps375dOOW37mn01po2bRomTJiA6dOn49FHH4XBYEBWVhZ++uknvPjiiwCA559/Hlu3bsX48ePx8ssvw2AwIDU1FU5OTk0e29xvX375Jdq3b9/sIcGGeHp6Ij4+HitXrkRZWRn69++PgoICrFy5EjKZDEFBQXB3d4ePjw8WLVqEsrIy+Pv745dffsH+/fsxceLEZv+sJ554Aps3b8ZLL72ExMRE+Pn5Cc+tyZMnC7+fuh577DFs3LgRkydPxtSpU+Hm5obVq1ffcC5NSEgIMjIykJmZidDQUJw9exZr1qyBTqezmPir0Whw7NgxHDx4ECEhIVCr1fWOFR8fj3//+9+Ij49HXFwczpw5gxUrVuCpp54S5lfpdDocO3YMPj4+8PHxQadOnfDkk0/inXfegVKpRHBwMHJycpCZmYmRI0cKz9vRo0dj/PjxWLJkCYYNG4bvvvsOa9asQUJCAtd3olZh2CFqwrFjx/D0008L3zs7O8Pf3x9Tp07F+PHjhe3/7//9P7i6uuK9997Dxx9/jHbt2uHee+9FcnIyunTpAgBYsGAB3n77bWRlZeHSpUu47bbbMHLkSLzyyisAaiogcXFx+Pjjj7F//34cOHCg3ge9TCbDmjVrkJaWhg0bNqCoqAh+fn6YNm0a4uLiLPa9//770alTJyQlJUGv12Po0KF47bXXhHka/fv3x8CBA5GSkoLvvvsOmZmZSElJQXp6OiZNmoT27dsjNDQUmzZtwujRo5GTkyP8BW6tyMhIrFu3Dunp6UhMTISTkxN69uyJ9evXC5NrPT098dFHH2Hx4sV49dVX4erqivj4eOzevbvJY999990YPny4MNxWd0XelpgyZQo6dOiAzZs347333kP79u0RERGBadOmCcEqPT0dK1aswMqVK1FcXAxfX19Mnjy50bk+DXFxccGmTZuQkpIihKs777wTixcvbnR4SaVS4f3338eSJUuwePFiyGQyPPXUU+jSpUuTp+hPnDgRxcXF2LhxI9555x34+vriscceE55PWq0WGo0G48aNw5IlSzB+/HisX7/eYoK1Wbdu3ZCVlYVly5YhMTERnp6eGDt2LBITE4V9CgsL8fTTT2Py5Ml4+eWXAdScht+lSxf885//xIULF+Dr64vExESL11FkZCRSUlKwevVqfPzxx+jUqRPmzJkj6aU/yDHITLzqGxERETkwztkhIiIih8awQ0RERA6NYYeIiIgcGsMOEREROTSGHSIiInJoDDtERETk0Bh2iIiIyKFxUUHULINvNNpnuSG5XGa3Y5Ml9rWNGY1Q5J4DABi6+AN1LiXAvhYP+1o87Gvx2KKv5XKZcFmbG2HYAWA0mlBUVP8qvq2lVMrh6ekKrbYCen3Ty7lT67Cv7aC8HB1qL+xYdDoPqL06OPtaPOxr8bCvxWOrvvbycoVC0byww2EsIiIicmgMO0REROTQGHaIiIjIoTHsEBERkUNj2CEiIiKHxrBDREREDo1hh4iIiBwaww4RERE5NIYdIiIicmgMO0REROTQGHaIiIjIoTHsEBERkUNj2CEiIiKHxqueE9ENrdj3J3RqFwCATCaDs7MS1dV6mEwmAIC7sxKjw/ygUTtJ2UwiogYx7BDRDe04mo9KlbrJfW53VeHpezuL1CIiouZj2CGiG3oo2BsdvD0BAHK5DC5qFSqrdDAaTfjudBGO5l1FUeU1iVtJRNQwhh0iuqEnevki6M6OAAClUg5PT1cUF5dDrzfimsGIo3lXUV6tl7iVREQN4wRlIrohjVrR6G2uqpq/mcp0BrGaQ0RkFYYdImqQ3mgUvnZzbrwI7OZcE4RY2SGitophh4gapK26Hl7cVE2EHVZ2iKiNY9ghogaV1Qk7crms0f1cWdkhojaOYYeIGqRtZqXGXNkpZ2WHiNoohh0iatDVquadSm6u7JSxskNEbRTDDhE16GpV88KLefIyww4RtVUMO0TUoKvNDC/mYSydwQSd3niDvYmIxMewQ0QNam5lp53q+ho85TpWd4io7WHYIaIGaaubN+FYIZehnVPtGVmcpExEbRDDDhE1qKy6+de64iRlImrLGHaIqEHaZg5jATz9nIjaNoYdImpQc+fsAKzsEFHbxrBDRA1q7tlYQJ1LRjRzng8RkZgYdoioQVprwo75khE8G4uI2iDJw47RaERaWhqioqIQGhqKhIQE5ObmNrjvqlWrEBgY2OC/2bNni9xyIsdlMpksro11I66s7BBRGyZ52MnIyMDmzZuxcOFCbNmyBUajEfHx8dDpdPX2HTduHL755huLf+PHj0e7du0wduxY8RtP5KCq9UboDKZm7+/Kyg4RtWGShh2dToesrCwkJiYiOjoaQUFBSE1NRX5+Pvbs2VNvf1dXV3To0EH4d+nSJWzcuBFz585FYGCgBI+AyDFZcyYWwDk7RNS2SRp2jh8/jvLyckRERAjbNBoNgoODcejQoRvef8GCBejXrx/+/ve/27OZRLcca+brADwbi4jaNqWUPzw/Px8A4Ovra7Hd29tbuK0x+/btw5EjR7Bjxw6btEWptH3uUyjkFv+T/bCvbav8mmWFRqmUA0rLPq7b1xoXJwBAxTWDXV5Ltyo+r8XDvhaPFH0tadiprKwEAKhUKovtzs7OKC0tbfK+69evx9ChQ9GjR49Wt0Mul8HT07XVx2mMRuNit2OTJfa1bRgvXrX43tPTFXC1fI3U7Wsfr5rbqgwmu76WblV8XouHfS0eMfta0rCjVqsB1MzdMX8NANXV1XBxabwTLl68iOzsbGRmZtqkHUajCVpthU2OVZdCIYdG4wKtthIGA68GbU/sa9vKu1Jm8X1xcTlQe85Ag32trxm+Kq3Q1exLNsHntXjY1+KxVV9rNC7Nrg5JGnbMw1eFhYXw9/cXthcWFjY54Xjv3r3w8vLCoEGDbNYWvd5+T26DwWjX49N17GvbKC63vC6WXm8E/tKvdfvapfYN52qVnv1vB3xei4d9LR4x+1rSwcmgoCC4ubkhOztb2KbVanHs2DGEhYU1er+cnByEh4dDqZQ0qxE5LOsnKPPaWETUdkmaFlQqFWJjY5GcnAwvLy907twZy5cvh4+PD2JiYmAwGFBUVAR3d3eLYa5jx47hySeflLDlRI7NmutiAYCb6vo6OyaTCTKZzB7NIiJqEcmnnScmJmLkyJGYM2cORo0aBYVCgXXr1sHJyQl5eXmIjIzE7t27Le5z6dIleHh4SNNgoluAturajXeqw622smM01ZyRRUTUlkg+DqRQKJCUlISkpKR6t/n5+eHEiRP1tv/0009iNI3olmXtooLOSjkUchkMRhPKqw3C5SOIiNoCySs7RNT2WHPFcwCQyWTCUFYZLxlBRG0Mww4R1WNtZQeoM0mZl4wgojaGYYeI6mlR2GFlh4jaKIYdIrJgMplw1coJysD1Scqs7BBRW8OwQ0QWynUGGEzW30+o7PBioETUxjDsEJEF8+RklcK6tXLMlZ0yLixIRG0Mww4RWTDP13FXW3f6uLCwICs7RNTGMOwQkQXzgoLWhh1XVnaIqI1i2CEiC+ZLRbhbuTAgKztE1FYx7BCRhdLasKNhZYeIHATDDhFZuNrSOTvOPBuLiNomhh0isqCtDStuzk5W3c+tdtirnJUdImpjGHaIyIJ5grJGrbDqfq6s7BBRG8WwQ0QWhGEsZ2snKLOyQ0RtE8MOEVnQtjDssLJDRG0Vww4RWdAKZ2O1bM5Otd4IvcFo83YREbUUww4RWRAmKFt76rnq+hwfnn5ORG0Jww4RWTDP2dGorJugrFTIoVbWvKVwKIuI2hKGHSISGIwm4UKg7i7WVXaA6xcD5SRlImpLGHaISFC3IqOxcoIycH0oi5UdImpLGHaISGCu6rg4yaGQW//2wMoOEbVFDDtEJCht4ZlYZqzsEFFbxLBDRIKrwurJ1g9hAazsEFHbxLBDRIKWLihoxsoOEbVFDDtEJLi+oGDrKjtl1azsEFHbwbBDRALzBOWWh52ayk65jpUdImo7GHaISNDSS0WYuarMlR2GHSJqOxh2iEigbfUEZXNlh8NYRNR2MOwQkaD1E5Rrz8ZiZYeI2hCGHSIS2GrODi8ESkRtCcMOEQlsdTYWKztE1JYw7BCRQBjGau0EZVZ2iKgNYdghIoF5gnL71k5QrtbDZDLZrF1ERK0hedgxGo1IS0tDVFQUQkNDkZCQgNzc3Eb3v3btGlJSUoT9Y2Nj8dtvv4nYYiLHpDcYUXnNCKD1E5QNJqBKb7RZ24iIWkPysJORkYHNmzdj4cKF2LJlC4xGI+Lj46HT6Rrcf968edi+fTuWLFmCbdu2wcvLCwkJCbh69arILSdyLNo682zcWhh2XJzkkMtqvua8HSJqKyQNOzqdDllZWUhMTER0dDSCgoKQmpqK/Px87Nmzp97+ubm52LZtGxYvXoyoqCh069YNixYtgkqlwi+//CLBIyByHNrK66edK8yJxUoymazOwoKct0NEbUPL/nyzkePHj6O8vBwRERHCNo1Gg+DgYBw6dAjDhw+32P/AgQNwd3fH4MGDLfb/4osvRGsz0ZkrFfjxQqnUzbC5i9oqAIB7C+frmLk5K3C1Wo8yXjKCiNoIScNOfn4+AMDX19diu7e3t3BbXadPn0aXLl2wZ88eZGZmoqCgAMHBwXj11VfRrVu3VrVFqbR9kUuhkFv8T/YjVl+bTCZM2vozLpc1PMzqCLzaOdW8Huq8Jup+f6O+rhkCq0aV3miX19WthO8h4mFfi0eKvpY07FRWVgIAVCqVxXZnZ2eUltb/y7msrAxnz55FRkYGZs6cCY1Gg9WrV+PZZ5/F7t27cdttt7WoHXK5DJ6eri26b3NoNC52OzZZsndf5xZV4HKZDkq5DNGB3nb9WVJQyIHYAXfUvB7qvCw9PV0BV8vXSGN97eGqAi6Vw+SktOvr6lbC9xDxsK/FI2ZfSxp21Go1gJq5O+avAaC6uhouLvU7QalUoqysDKmpqUIlJzU1FUOGDMEnn3yC+Pj4FrXDaDRBq61o0X2bolDIodG4QKuthMHAM1PsSay+PnjyEgDgrg6uWP5oD7v9HKkVF5cD5eXwrPt9bTHrRn2tVtTM9ykoKq+5H7UY30PEw74Wj636WqNxaXZ1SNKwYx6+KiwshL+/v7C9sLAQgYGB9fb38fGBUqm0GLJSq9Xo0qULzp8/36q26O14mqzBYLTr8ek6e/f1b3k1Z/3dfbur4/9O6zw+vd5o8T3QeF+3c6pZa0dbec3x+0gkfA8RD/taPGL2taSDk0FBQXBzc0N2drawTavV4tixYwgLC6u3f1hYGPR6PY4ePSpsq6qqQm5uLu644w5R2ky3tpOXaioVd3u7SdyStuv6JSN4NhYRtQ2SVnZUKhViY2ORnJwMLy8vdO7cGcuXL4ePjw9iYmJgMBhQVFQEd3d3qNVq9OvXDwMHDsSsWbOwYMECeHh4IC0tDQqFAo899piUD4VuEb9fKgMAdO/AuSiNcVWZLwbKs7GIqG2QfNp5YmIiRo4ciTlz5mDUqFFQKBRYt24dnJyckJeXh8jISOzevVvYf9WqVQgPD8fkyZMxcuRIlJWVYePGjfDy8pLwUdCtQFt1DXnaagBA9w6s7DTGXNkp46KCRNRGSFrZAQCFQoGkpCQkJSXVu83Pzw8nTpyw2Obm5oZ58+Zh3rx5IrWQqIZ5CKuTxrnVa9E4MvOiguW8GCgRtRGSV3aIbha/m+frsKrTJPPFQFnZIaK2gmGHqJl+L6ydr+PN+TpNYWWHiNoahh2iZjIPY3G+TtNY2SGitoZhh6gZrhmMOHXFfNo5KztNceOFQImojeEsS6JmOFNUgWsGE1xVCnTSqG98h1uYa21lp6TyGpbuPSlsVysVeLZvZ3i7O0vVNCK6RTHsEDXD74W1Q1jebpDJZBK3pm3zcHGCUi6D3mjCtp/yLG5TKmSYHBUgUcuI6FbFsEPUDFxMsPncnJVY8fee+KX20hoAcDi3BIdzS1HOeTxEJAGGHaJm+J2Tk60S0dULEV2vL/QpA3A4txQGk0m6RhHRLYsTlIluwGQy4SRPO28Vhbxm6E9vYNghIvEx7BDdQGGZDqVVeijkMgTcxrDTEsrasMPKDhFJgWGH6AbMiwl29XKBs5IvmZZgZYeIpMR3bqIbuD45mfN1Wkopr3mrYWWHiKTAsEN0A8LKyd4MOy2lVLCyQ0TSYdghugHhmlg87bzFlLVrE+mNDDtEJD6GHaImlOv0yC2pAsBhrNYwV3YMDDtEJAGGHaIm/FE7hOXtpoJHOyeJW3PzUgiVHaPELSGiWxHDDlETfud8HZtgZYeIpMSwQ9SEs0UVAIAAr3YSt+TmpuCcHSKSEMMOURPytdUAAN/2vNJ5awhnYzHsEJEEGHaImpCnrZmc7KtxlrglNzdhUUGGHSKSAMMOURPyr9ZUdnw0rOy0hnC5CIYdIpIAww5RI8p1emir9ABY2Wkt8wrKrOwQkRQYdogakVc7X0ejVsJVpZS4NTc3DmMRkZQYdogakV87X8fHnVWd1uIwFhFJiWGHqBHmyo4v5+u0Gis7RCQlhh2iRgiVHc7XaTVWdohISgw7RI1gZcd2rld2eLkIIhIfww5RI/K5xo7NsLJDRFJi2CFqhLmywzV2Wk/JOTtEJCGGHaIG6PRGXC7XAWBlxxZY2SEiKTHsEDWgoHblZGelHB4uThK35uZnXlTQaAKMJgYeIhIXww5RA+peE0tWe8VuajnzBGWA1R0iEh/DDlED8jlfx6bMVz0HOG+HiMQnedgxGo1IS0tDVFQUQkNDkZCQgNzc3Eb3/+yzzxAYGFjv3/nz50VsNTk6Xu3cthR1qmN6A8MOEYlL8gv+ZGRkYPPmzVi6dCl8fHywfPlyxMfHY+fOnVCpVPX2P3HiBMLDw7FixQqL7V5eXmI1mW4BeVe5xo4t1a3scBiLiMQmaWVHp9MhKysLiYmJiI6ORlBQEFJTU5Gfn489e/Y0eJ/ff/8dgYGB6NChg8U/hUIhcuvJkXH1ZNuSy2Qwxx0uLEhEYpO0snP8+HGUl5cjIiJC2KbRaBAcHIxDhw5h+PDh9e5z4sQJDBs2zOZtUSptn/sUCrnF/47KZDLho8MXcPpKhWRtUMhleKq/P+72dLHJ8cxzdvw8Xezy3Lgp1HncSqVc+L6lz2ulQoZrBhMgl926fWqlW+U9pC1gX4tHir6WNOzk5+cDAHx9fS22e3t7C7fVVVpaioKCAuTk5GDz5s0oLi5GSEgIkpKSEBAQ0OJ2yOUyeHq6tvj+N6LR2OYDuK3KPnUFyz//Q+pm4NC5EnwxI7rVxzEYTcKp50FdvODp4di/v0bVGUX29HQFXC1fI9Y+r5VyOa4ZDHB1c4GnZztbtPCW4ejvIW0J+1o8Yva1pGGnsrISAOrNzXF2dkZpaWm9/U+ePAmgppLw5ptvoqqqCqtXr8azzz6LnTt34vbbb29RO4xGE7Ra21clFAo5NBoXaLWVMBgct3T/r4PnAAChnTUYECDB3CkTsPbbszh1uRwncovh7VZ/rpc1CrRV0BtNUMplUBkMKC4ut1FDbzLl5fCs/bK4uByoWWOxxc9r8x9xRSXlcJdz3k5z3CrvIW0B+1o8tuprjcal2dUhScOOWl0z+VOn0wlfA0B1dTVcXOonvn79+uG7776Dp6ensPZJeno6oqOjsX37dkyYMKHFbdHr7ffkNhiMdj2+lPQGI/aeuAQAGD/AHwO6SjNR/NvTRfj5ohbfnbqCR4I7tupY54trQri3mwomo+nWPVW6znNWrzdafA9Y/7yuWVjQgCqdwWFfD/biyO8hbQ37Wjxi9rWkg5Pm4avCwkKL7YWFhejYseEPLC8vL4tF3lxcXODn54eCggL7NZQadSi3BCWV1+Dp4oR+/p43voOdhN9R87Ozzxa3+li8JpZ9KHjJCCKSiKRhJygoCG5ubsjOzha2abVaHDt2DGFhYfX2//jjj9G/f39UVFwfciorK8OZM2dw1113idJmsvTf4zVVnfu63y5c/0gK/bvWhp0zxTC18nIEXGPHPngxUCKSiqRhR6VSITY2FsnJyfj8889x/PhxTJ06FT4+PoiJiYHBYMClS5dQVVXz4TN48GAYjUbMnDkTJ0+exNGjR/Hyyy/Dy8sLTzzxhJQP5ZZUrTfiy5OXAQAPBnlL2paQThq4OClQVHENf15u3fwrrp5sHwqGHSKSiOTn2CUmJmLkyJGYM2cORo0aBYVCgXXr1sHJyQl5eXmIjIzE7t27AdQMe23YsAEVFRUYNWoUxo4dC3d3d2zcuBHOzvwrXGzfni5Cuc4AbzcVQjprJG2LSilHWO3k6IPnWjeUxcqOffDK50QkFclXUFYoFEhKSkJSUlK92/z8/HDixAmLbT179kRWVpZYzaMm7KkdwooJ8oa8DVwsM/Ku2/DV75dw6FwJnu3r1+LjsLJjH9crO5z8SUTikryyQzenCp0BX5+6AgCICeogcWtqDLqrZumBH3JLoW/h6Ywmk6lOZYdhx5ZY2SEiqTDsUIt89ecVVOuN6OKhRpC3m9TNAQD08NHAw8UJFdcM+DX/aouOUVqpR1XtqZAd3TmMZUucoExEUmHYoRbZc7xmuYCYIG+LpQCkJJfLEH6HBwDg4NmSFh0j72pNVec2VxWceUkDm2Jlh4ikwndzslpp5TV8d6ZmEnBbGcIy61+73k5LJymb19jh5GTbY2WHiKQi+QRlR2UwmvDliUuoMl1CRYXOof6a/b2wDHqjCXd3cMWdt9nvmmItYV5v52jeVVToDGinUlh1/7zS2qudu3O+jq1xUUEikgrDjp3k5JYgacevUjfDrh4IbFtVHQDo7OGCTu3VuFhahSPnSzHoTusuX8HTzu2n5nIRrOwQkfgYduykp487nr63E65eM0KnM7R6Vd+2pr3aCf8vtJPUzWhQuL8HdhzNx8FzxVaHHfNp577tWdmxNeHUc4NjvRaIqO1j2LETN2clXn2gOzw9XVFcXM4Ly4ko/A5P7Diaj0PnSqy+Lys79iPM2XGw4E9EbR/DDjmcfl3aAwBOXirHjp/zoLLirKoL5jk7XGPH5ljZISKpMOyQw/Fsp0L3Dq74/VI5Fv/vZIuOwcqO7QmnnrOyQ0QiY9ghh5Q45E58dPhCi878Cb/DA64qvjRs7Xplh0O6RCQuvqOTQ+p/h6ew5g61DVxUkIikwkUFiUgUCi4qSEQSYdghIlGwskNEUmHYISJRKBVcVJCIpMGwQ0SiUMg4jEVE0mDYISJRKBUcxiIiaTDsEJEoOEGZiKTCsENEouAEZSKSCsMOEYlCuDaWkYsKEpG4GHaISBSs7BCRVFq8gnJpaSlycnJQWFiIBx98ECUlJQgICICs9owLIqK6OGeHiKTSorCzevVqrFmzBlVVVZDJZAgJCcHbb7+N4uJiZGVlQaPR2LqdRHSTY2WHiKRi9TDWBx98gFWrViEuLg7//Oc/Yaq9gnFsbCxyc3OxcuVKmzeSiG5+SlZ2iEgiVoedTZs2YcKECXjllVfQs2dPYfuQIUMwZcoUfPHFFzZtIBE5Bg5jEZFUrA47Fy9eRHh4eIO33Xnnnbh8+XKrG0VEjkcp5+UiiEgaVocdX19fHDlypMHbfvnlF/j6+ra6UUTkeFjZISKpWD1BeeTIkVi1ahXUajWio6MBABUVFfjvf/+LNWvWIC4uztZtJCIHwAnKRCQVq8NOQkICzp8/j+TkZCQnJwMAxowZAwAYMWIEJk6caNsWEpFDECo7BoYdIhJXi049X7BgAcaNG4fvv/8eJSUlcHd3R1hYGLp3727r9hGRgxAqOyaGHSISl9VhZ8SIEZg+fTqGDh2Krl272qFJROSIrld2eLkIIhKX1ROU8/Ly4OLiYo+2EJEDY2WHiKRiddgZMWIENmzYgMLCQps0wGg0Ii0tDVFRUQgNDUVCQgJyc3Obdd/PPvsMgYGBOH/+vE3aQkT2o1Rwzg4RScPqYawzZ84gJycHQ4YMgYeHB9q1a2dxu0wmw969e5t9vIyMDGzevBlLly6Fj48Pli9fjvj4eOzcuRMqlarR+124cAELFiywtvlEJBGFjJUdIpKG1WHH19cXI0aMsMkP1+l0yMrKwowZM4TT2FNTUxEVFYU9e/Zg+PDhDd7PaDQiKSkJPXv2xPfff2+TthCRfSkVtYsKsrJDRCKzOuy8+eabNvvhx48fR3l5OSIiIoRtGo0GwcHBOHToUKNh591338W1a9cwefJkhh2im4RSxkUFiUgaLTr1HAC++uorHDx4EFqtFp6enujXrx+ioqKsOkZ+fj4A1Ft12dvbW7jtr37++WdkZWVh69atKCgoaFnjG6BUWj196YYUtX/Jmv8n+2Ff20Gd14RSKRe+b2lfO6tq9jeYTHZ5vTkiPq/Fw74WjxR9bXXY0el0ePHFF/HNN99AoVDA09MTxcXFyMzMxIABA7BmzZom59rUVVlZCQD19nd2dkZpaWm9/SsqKjBjxgzMmDEDXbt2tVnYkctl8PR0tcmxGqLR8Ow1sbCvbajOy9LT0xVwtXyNWNvXntdqKjoGo8murzdHxOe1eNjX4hGzr60OO6tWrcLhw4exbNkyPPLII1AoFNDr9di1axfmz5+P1atX45VXXmnWsdRqNYCaAGX+GgCqq6sbPL190aJFCAgIwDPPPGNts5tkNJqg1VbY9JhATWrVaFyg1VbCwLVF7Ip9bQfl5fCs/bK4uBzQ1Xzd0r6uKKsCUDOMVVxcbuPGOiY+r8XDvhaPrfpao3FpdnXI6rCza9cuTJ48GY8++uj1gyiVePzxx3HlyhV89NFHzQ475uGrwsJC+Pv7C9sLCwsRGBhYb/9t27ZBpVKhT58+AACDwQAAGD58OF544QW88MIL1j4cgV5vvye3wWC06/HpOva1DdXpR73eaPE90IK+rj0LS8/fkdX4vBYP+1o8Yva11WGnqKgIwcHBDd4WHBxs1dBSUFAQ3NzckJ2dLYQdrVaLY8eOITY2tt7+e/bssfj+p59+QlJSEjIzM3mpCqI2jhcCJSKpWB12/P39cfjwYYszqMwOHTpUb7JxU1QqFWJjY5GcnAwvLy907twZy5cvh4+PD2JiYmAwGFBUVAR3d3eo1WrccccdFvc3T2Lu1KkTPDw8rH0oRCSi6ysoAyaTCbLas7OIiOzN6qnQzzzzDNasWYP33nsPeXl5uHbtGvLy8rB27VqsXbsWTz75pFXHS0xMxMiRIzFnzhyMGjUKCoUC69atg5OTE/Ly8hAZGYndu3db20wiamPM18YCWN0hInHJTCbrljM1Go14/fXXsW3bNou/zEwmE/7+979jyZIlN91fbAaDEUVFtp8wqVTK4enpiuLico4B2xn72g7Ky9EhoKZSe+l0nnA2Vkv7ukJnwJBVBwAAXycOgtpJYfs2Oxg+r8XDvhaPrfray8vVfhOU5XI5Fi9ejHHjxuHgwYMoLS1F+/btER4ejm7dulndWCK6NdSt7HBhQSISU4sWFTx8+DC+//57vPTSSwCAY8eOIS0tDQkJCbjnnnts2kAicgxKhh0ikojVc3b279+P559/Ht98842wTSaT4cyZM3j22WeRk5Nj0wYSkWOok3UYdohIVFaHnVWrVuGRRx7B5s2bhW09evTAp59+ir/97W9YsWKFTRtIRI5BJpPx9HMikoTVYefPP//E448/3uAk5McffxzHjx+3ScOIyPGY5+3ojZwASkTisTrsuLu74/Tp0w3elpubi3bt2rW6UUTkmK5XdiRuCBHdUqwOOw888ABWrlyJffv2WWz/+uuvsXLlSjzwwAM2axwRORYlKztEJAGrz8aaOnUqjh49ikmTJsHJyQkeHh4oKSmBXq9H7969MX36dHu0k4gcgIJzdohIAlaHHTc3N2zZsgX79+/HDz/8gJKSEri7u6Nfv36Ijo6GXG51sYiIbhHXKzsMO0QknhatsyOXyzF06FAMHToUAKDX61FWVsagQ0RNEsKOgWGHiMRjdTrR6/VIT0/Hzp07AQDZ2dkYNGgQIiIi8Pzzz6O0tNTmjSQix6CsXdqdw1hEJCarw05aWhpWr14NrVYLAFi0aBE8PDwwe/ZsnDt3DikpKTZvJBE5BoWMw1hEJD6rw86///1vTJs2Dc899xz+/PNPnDx5EpMmTcKYMWMwdepUfPHFF/ZoJxE5AKWCE5SJSHxWh53CwkL07t0bAPDll19CLpdj8ODBAAAfHx9cvXrVti0kIofByg4RScHqsOPt7Y3z588DAL744gv06NEDXl5eAIAjR47Ax8fHti0kIodhruww7BCRmKwOO8OHD8ebb76J8ePH4/Dhw3jyyScBAIsXL8aqVaswYsQImzeSiByDubJj4KKCRCQiq089nzJlCtq1a4dDhw5h+vTpePbZZwEAR48exbhx4/Diiy/avJFE5BhY2SEiKVgddmQyGSZOnIiJEydabN+yZYvNGkVEjomLChKRFLgKIBGJRsGwQ0QSYNghItEo5VxUkIjEx7BDRKJhZYeIpMCwQ0SiUfKq50QkAYYdIhINKztEJAWGHSISDSs7RCQFhh0iEs31yg4XFSQi8TRrnZ1hw4ZBVrvy6Y3IZDLs3bu3VY0iIsfEyg4RSaFZYScuLg5vvfUW3NzcMHToUHu3iYgcFBcVJCIpNCvsjB49Gl5eXpg+fTruu+8+3H///fZuFxE5IGEYy8CwQ0TiafacnUceeQRPPfUU3nzzTRgMBnu2iYgclLCooIlhh4jEY9W1saZMmQK1Wo1z584hICDAXm0iIgelrP3zipUdIhJTs8JOdXU1nJ2d4eXlhX/84x/2bhMROShWdohICs0axho2bBiOHDkCAEhPT0dBQYFdG0VEjolzdohICs0KO1evXkVhYSEA4J133mHYIaIWEc7GYmWHiETUrGGsXr16Yfr06XjrrbdgMpnw0ksvQaVSNbivtevsGI1GpKen41//+heuXr2KsLAwzJ07F126dGlw/19//RXLli3Dzz//DGdnZ8TExCApKQnu7u7N/plEJI3rlR0uKkhE4mlW2FmxYgU2bNiAkpIS7NixA8HBwfDy8rJJAzIyMrB582YsXboUPj4+WL58OeLj47Fz5856gery5cuIi4vD/fffj3nz5qG4uBivv/46Xn31Vbzzzjs2aQ8R2c/1RQUlbggR3VKaFXY6duyIWbNmAQCys7MxdepUBAUFtfqH63Q6ZGVlYcaMGYiOjgYApKamIioqCnv27MHw4cMt9r9w4QIiIyOxYMECKJVKBAQE4KmnnkJqamqr20JE9qdU8HIRRCQ+q6+N9cUXX9gk6ADA8ePHUV5ejoiICGGbRqNBcHAwDh06VG//3r17Y8WKFVAqazLan3/+iU8//RSDBg2ySXuIyL4UMl4ugojEZ9U6O7aWn58PAPD19bXY7u3tLdzWmAcffBBnzpxB586dkZ6e3uq2KJW2vyaqQiG3+J/sh31tB3VeE0qlXPi+NX2tclIAAAwm+7zmHA2f1+JhX4tHir6WNOxUVlYCQL25Oc7OzigtLW3yvsnJyaisrMTy5csxZswYfPrpp3B1dW1RO+RyGTw9W3bf5tBoXOx2bLLEvrahOi9LT09X4C+vr5b0dXt3NQBAprDva87R8HktHva1eMTsa0nDjlpd88an0+mEr4GaRQxdXJruhF69egGoWfdnyJAh+N///ofHH3+8Re0wGk3QaitadN+mKBRyaDQu0GorYeCMTLtiX9tBeTk8a78sLi4HdDVft6avqytrDlJVra85JjWJz2vxsK/FY6u+1mhcml0dkjTsmIevCgsL4e/vL2wvLCxEYGBgvf1PnTqFc+fOCZOZgZrJ0x4eHq1e+0evt9+T22Aw2vX4dB372obq9KNeb7T4HmhdX18zmPh7sgKf1+JhX4tHzL6WdHAyKCgIbm5uyM7OFrZptVocO3YMYWFh9fb/9ttvkZiYCK1WK2w7d+4ciouL0a1bN1HaTEQtd/3Uc05QJiLxSBp2VCoVYmNjkZycjM8//xzHjx/H1KlT4ePjg5iYGBgMBly6dAlVVVUAgOHDh8PDwwNJSUk4efIkcnJykJiYiJCQEAwdOlTKh0JEzSAsKsiwQ0QiknzaeWJiIkaOHIk5c+Zg1KhRUCgUWLduHZycnJCXl4fIyEjs3r0bAODh4YH3338fADBq1Ci89NJLCA4Oxrp166BQKKR8GETUDKzsEJEUJJ2zAwAKhQJJSUlISkqqd5ufnx9OnDhhsS0gIABr1qwRq3lEZEPCtbG4qCARiUjyyg4R3TpY2SEiKTDsEJFolJyzQ0QSYNghItFwgjIRSYFhh4hEo5TXvOVwGIuIxMSwQ0SiYWWHiKTAsENEouEEZSKSAsMOEYmGlR0ikgLDDhGJhmdjEZEUGHaISDRKxfVhLJOJgYeIxMGwQ0SiUchkwtcGZh0iEgnDDhGJxlzZAQC9gZeMICJxMOwQkWjqVnY4b4eIxMKwQ0SiUSquv+Xw9HMiEgvDDhGJps4oFis7RCQahh0iEo1MJhPW2mFlh4jEwrBDRKLiWjtEJDaGHSISFS8ZQURiY9ghIlHxkhFEJDaGHSISFSs7RCQ2hh0iEtX1OTtcVJCIxMGwQ0Si4jAWEYmNYYeIRMVhLCISG8MOEYmKlR0iEhvDDhGJSimvedth2CEisTDsEJGoWNkhIrEx7BCRqDhnh4jExrBDRKJiZYeIxMawQ0SiYmWHiMTGsENEouKigkQkNoYdIhKVMIxlYGWHiMTBsENEouIwFhGJjWGHiETFCcpEJDaGHSISlXlRQVZ2iEgskocdo9GItLQ0REVFITQ0FAkJCcjNzW10/5MnT2LChAno378/IiIikJiYiIsXL4rYYiJqDUXtuw4rO0QkFsnDTkZGBjZv3oyFCxdiy5YtMBqNiI+Ph06nq7dvcXEx4uLioFarsWnTJqxduxZFRUWIj49HdXW1BK0nImspFazsEJG4JA07Op0OWVlZSExMRHR0NIKCgpCamor8/Hzs2bOn3v579+5FRUUFli1bhu7du+Oee+7B8uXL8eeff+KHH36Q4BEQkbWUMs7ZISJxSRp2jh8/jvLyckRERAjbNBoNgoODcejQoXr7R0REICMjA2q1Wtgmrx3/12q19m8wEbWaUsGzsYhIXEopf3h+fj4AwNfX12K7t7e3cFtdfn5+8PPzs9iWmZkJtVqNsLCwVrVFqbR97lPUluvN/5P9sK/toM5rQqmUC9+3tq+dao9jhMkurztHwue1eNjX4pGiryUNO5WVlQAAlUplsd3Z2RmlpaU3vP+mTZvwwQcfYM6cOfDy8mpxO+RyGTw9XVt8/xvRaFzsdmyyxL62oTovS09PV8DV8jXS0r52dak5sFLlZNfXnSPh81o87GvxiNnXkoYd83CUTqezGJqqrq6Gi0vjnWAymbBy5UqsXr0akyZNwujRo1vVDqPRBK22olXHaIhCIYdG4wKtthIGA5fGtyf2tR2Ul8Oz9svi4nKg9pyB1va14ZoeAFBWUV1zXGoUn9fiYV+Lx1Z9rdG4NLs6JGnYMQ9fFRYWwt/fX9heWFiIwMDABu9z7do1zJ49G7t27cLs2bMxduxYm7RFr7ffk9tgMNr1+HQd+9qG6vSjXm+0+B5oeV/Lav+/pufvqrn4vBYP+1o8Yva1pIOTQUFBcHNzQ3Z2trBNq9Xi2LFjjc7BmTlzJv7zn/8gJSXFZkGHiMSj5ArKRCQySSs7KpUKsbGxSE5OhpeXFzp37ozly5fDx8cHMTExMBgMKCoqgru7O9RqNbZv347du3dj5syZCA8Px6VLl4RjmfchoraNl4sgIrFJPu08MTERI0eOxJw5czBq1CgoFAqsW7cOTk5OyMvLQ2RkJHbv3g0A2LVrFwBg2bJliIyMtPhn3oeI2jZeLoKIxCZpZQcAFAoFkpKSkJSUVO82Pz8/nDhxQvg+KytLzKYRkR2wskNEYpO8skNEtxbznB1WdohILAw7RCQqTlAmIrEx7BCRqBSs7BCRyBh2iEhU1ys7XMuEiMTBsENEohImKBtY2SEicTDsEJGohAnKJoYdIhIHww4RiUrJyg4RiYxhh4hEpTAvKsjKDhGJhGGHiETFyg4RiY1hh4hExTk7RCQ2hh0iEpVSwcoOEYmLYYeIRKWQsbJDROJi2CEiUV2v7HBRQSISB8MOEYmKVz0nIrEx7BCRqHghUCISG8MOEYmKFwIlIrEx7BCRqJS1iwqyskNEYmHYISJRKWrfdRh2iEgsDDtEJCpzZYfDWEQkFoYdIhJV3QnKJq61Q0QiYNghIlGZJygDAIs7RCQGhh0iEpWyTtjhvB0iEgPDDhGJyjLscBVlIrI/hh0iElXdsMNJykQkBoYdIhKVgsNYRCQyhh0iEpVMJkPttUBZ2SEiUTDsEJHoeDFQIhITww4RiY4LCxKRmBh2iEh0QmXHwLBDRPbHsENEohNWUeYKykQkAoYdIhKdsnaGsoGVHSISAcMOEYlOITNPUOaigkRkfww7RCQ6c2WHZ2MRkRgkDztGoxFpaWmIiopCaGgoEhISkJub26z7xcfHY9WqVSK0kohs6Xplh2GHiOxP8rCTkZGBzZs3Y+HChdiyZYsQYnQ6XaP30el0+Mc//oGvv/5axJYSka0Ic3YYdohIBJKGHZ1Oh6ysLCQmJiI6OhpBQUFITU1Ffn4+9uzZ0+B9fvjhBzzxxBPIycmBRqMRucVEZAus7BCRmCQNO8ePH0d5eTkiIiKEbRqNBsHBwTh06FCD99m/fz+ioqKwY8cOuLu7i9VUIrIhpYKLChKReJRS/vD8/HwAgK+vr8V2b29v4ba/mjp1ql3aolTaPvcpat/Qzf+T/bCv7aDOa0KplAvf26KvzevsmGT2ee05Cj6vxcO+Fo8UfS1p2KmsrAQAqFQqi+3Ozs4oLS0VrR1yuQyenq52O75G42K3Y5Ml9rUN1XlZenq6Aq6Wr5HW9LXaueatR+3ibNfXnqPg81o87GvxiNnXkoYdtVoNoGbujvlrAKiuroaLi3idYDSaoNVW2Py4CoUcGo0LtNpKGAxcT8Se2Nd2UF4Oz9ovi4vLgdpzBmzS17Xr65Reraw5NjWIz2vxsK/FY6u+1mhcml0dkjTsmIevCgsL4e/vL2wvLCxEYGCgqG3R6+335DYYjHY9Pl3HvrahOv2o1xstvgda19fy2gnK1dcM/H01A5/X4mFfi0fMvpZ0cDIoKAhubm7Izs4Wtmm1Whw7dgxhYWEStoyI7Mk8Z4cTlIlIDJJWdlQqFWJjY5GcnAwvLy907twZy5cvh4+PD2JiYmAwGFBUVAR3d3eLYS4iurkJFwJl2CEiEUg+7TwxMREjR47EnDlzMGrUKCgUCqxbtw5OTk7Iy8tDZGQkdu/eLXUziciGWNkhIjFJWtkBAIVCgaSkJCQlJdW7zc/PDydOnGj0vl988YU9m0ZEdqJgZYeIRCR5ZYeIbj2s7BCRmBh2iEh0rOwQkZgYdohIdEp5zVsPww4RiYFhh4hEx7OxiEhMDDtEJDoF5+wQkYgYdohIdEJlx8CwQ0T2x7BDRKK7PkGZy/ITkf0x7BCR6HjqORGJiWGHiETHU8+JSEwMO0QkOlZ2iEhMDDtEJDpWdohITAw7RCQ6paLmrYeVHSISA8MOEYmOiwoSkZgYdohIdFxUkIjExLBDRKJjZYeIxMSwQ0SiU3JRQSISEcMOEYmOp54TkZgYdohIdDz1nIjExLBDRKJjZYeIxMSwQ0SiY2WHiMTEsENEouPZWEQkJoYdIhKdUl7z1sOwQ0RiYNghItFxUUEiEhPDDhGJjsNYRCQmhh0iEp0wQdnARQWJyP4YdohIdMKp5yzsEJEIGHaISHSs7BCRmBh2iEh01ys7LO0Qkf0x7BCR6K5Xdhh2iMj+GHaISHSs7BCRmBh2iEh0SlZ2iEhEDDtEJDrzCsomAEZWd4jIzhh2iEh0SoVM+JrVHSKyN8nDjtFoRFpaGqKiohAaGoqEhATk5uY2un9xcTGmT5+OsLAwhIeHY/78+aisrBSxxUTUWuYJygBXUSYi+5M87GRkZGDz5s1YuHAhtmzZAqPRiPj4eOh0ugb3T0xMxNmzZ7FhwwasXLkS+/fvx7x588RtNBG1irJO2OH1sYjI3iQNOzqdDllZWUhMTER0dDSCgoKQmpqK/Px87Nmzp97+R44cwcGDB/HWW2+hZ8+eiIiIwIIFC/Dpp5+ioKBAgkdARC1hWdnhwoJEZF9KKX/48ePHUV5ejoiICGGbRqNBcHAwDh06hOHDh1vsn5OTgw4dOqBbt27CtvDwcMhkMhw+fBgPP/xwi9uiVNo+9ykUcov/yX7Y13ZQ5zWhVMqF723V13IZYDQBaV+fhlqpaNWxHJVMJoNKpYBOZ4CJE7ntin1tP85KOUb17YzOHi4ApHm/ljTs5OfnAwB8fX0ttnt7ewu31VVQUFBvX5VKBQ8PD+Tl5bW4HXK5DJ6eri2+/41oNC52OzZZYl/bkOr6l56eroCr5WuktX3t2U6FK+U67PqFVVkiR+fprsaMBwMtton5fi1p2DFPLFapVBbbnZ2dUVpa2uD+f93XvH91dXWL22E0mqDVVrT4/o1RKOTQaFyg1VbCwGsA2RX72g7Ky+FZ+2VxcTlQO43OVn2d8vee+P5Mcevb6cDkMhmcnZ1QXX2Np+jbGfvaftRKOYb39K55H4Ht3kM0GpdmV4ckDTtqtRpAzdwd89cAUF1dDReX+olPrVY3OHG5uroa7dq1a1Vb9Hr7fUAaDEa7Hp+uY1/bUJ1+1OuNFt8Dre/rnh3d0bOje4vvfytQKuXw9HRFcXE5n9d2xr62v7/2q5jv15JOcDAPSRUWFlpsLywsRMeOHevt7+PjU29fnU6HkpISeHt726+hREREdNOSNOwEBQXBzc0N2dnZwjatVotjx44hLCys3v5hYWHIz8/H2bNnhW0HDx4EAPTt29f+DSYiIqKbjqTDWCqVCrGxsUhOToaXlxc6d+6M5cuXw8fHBzExMTAYDCgqKoK7uzvUajV69+6Ne++9F1OnTsW8efNQUVGBuXPn4vHHH2+wEkREREQk+Xm6iYmJGDlyJObMmYNRo0ZBoVBg3bp1cHJyQl5eHiIjI7F7924ANacGpqenw8/PD88//zymTJmCwYMHc1FBIiIiapTMxAUFYDAYUVRUbvPjcsKbeNjXdlBejg4BNfPqLp3OE049Z1+Lh30tHva1eGzV115ers0+G0vyyg4RERGRPTHsEBERkUNj2CEiIiKHxrBDREREDo1hh4iIiBwaww4RERE5NIYdIiIicmgMO0REROTQuKggAJPJBKPRPt2gUMhbdQl7aj72tY0ZjVDkngMAGLr4A/Lrfxuxr8XDvhYP+1o8tuhruVwGmUzWrH0ZdoiIiMihcRiLiIiIHBrDDhERETk0hh0iIiJyaAw7RERE5NAYdoiIiMihMewQERGRQ2PYISIiIofGsENEREQOjWGHiIiIHBrDDhERETk0hh0iIiJyaAw7RERE5NAYdoiIiMihMey0gtFoRFpaGqKiohAaGoqEhATk5uY2un9xcTGmT5+OsLAwhIeHY/78+aisrBSxxTcva/v65MmTmDBhAvr374+IiAgkJibi4sWLIrb45mVtX9f12WefITAwEOfPn7dzKx2DtX197do1pKSkCPvHxsbit99+E7HFNy9r+/rKlSuYPn06BgwYgP79+2Pq1KkoKCgQscWOYc2aNRg9enST+4jx2ciw0woZGRnYvHkzFi5ciC1btsBoNCI+Ph46na7B/RMTE3H27Fls2LABK1euxP79+zFv3jxxG32Tsqavi4uLERcXB7VajU2bNmHt2rUoKipCfHw8qqurJWj9zcXa57XZhQsXsGDBApFa6Ris7et58+Zh+/btWLJkCbZt2wYvLy8kJCTg6tWrIrf85mNtX0+ZMgUXL17E+vXrsX79ely8eBEvvfSSyK2+uX344Yd4++23b7ifKJ+NJmqR6upqU58+fUwffvihsK20tNQUEhJi2rlzZ739f/jhB1P37t1Nf/zxh7Dt66+/NgUGBpry8/NFafPNytq+/uc//2nq06ePqbKyUth28eJFU/fu3U3ffvutKG2+WVnb12YGg8E0atQo05gxY0zdu3c35ebmitHcm5q1fX3u3DlTYGCgad++fRb7Dx06lM/rG7C2r0tLS03du3c3ff7558K2vXv3mrp3724qLi4Wo8k3tfz8fNPEiRNNoaGhpoceesgUGxvb6L5ifTaystNCx48fR3l5OSIiIoRtGo0GwcHBOHToUL39c3Jy0KFDB3Tr1k3YFh4eDplMhsOHD4vS5puVtX0dERGBjIwMqNVqYZtcXvNU12q19m/wTczavjZ79913ce3aNUycOFGMZjoEa/v6wIEDcHd3x+DBgy32/+KLLyyOQfVZ29dqtRqurq7YsWMHysrKUFZWhk8//RQBAQHQaDRiNv2m9Ouvv8LJyQmfffYZevfu3eS+Yn02Km12pFtMfn4+AMDX19diu7e3t3BbXQUFBfX2ValU8PDwQF5env0a6gCs7Ws/Pz/4+flZbMvMzIRarUZYWJj9GuoArO1rAPj555+RlZWFrVu3ck6DFazt69OnT6NLly7Ys2cPMjMzUVBQgODgYLz66qsWHxRUn7V9rVKpsHTpUsydOxf9+vWDTCaDt7c3PvjgA+EPJ2rcsGHDMGzYsGbtK9ZnI39rLWSePKVSqSy2Ozs7NzgvpLKyst6+Te1P11nb13+1adMmfPDBB5gxYwa8vLzs0kZHYW1fV1RUYMaMGZgxYwa6du0qRhMdhrV9XVZWhrNnzyIjIwPTpk3D6tWroVQq8eyzz+LKlSuitPlmZW1fm0wm/Pbbb+jTpw8+/PBDvP/+++jUqRNefPFFlJWVidLmW4VYn40MOy1kHiL56+S26upquLi4NLh/QxPhqqur0a5dO/s00kFY29dmJpMJb7/9NhYtWoRJkybd8IwAsr6vFy1ahICAADzzzDOitM+RWNvXSqUSZWVlSE1NRWRkJEJCQpCamgoA+OSTT+zf4JuYtX39f//3f/jggw+wfPly9O3bF+Hh4Xj33Xdx4cIFbN26VZQ23yrE+mxk2Gkhc9mtsLDQYnthYSE6duxYb38fH596++p0OpSUlMDb29t+DXUA1vY1UHOKblJSEt59913Mnj0bU6ZMsXczHYK1fb1t2zZ8++236NOnD/r06YOEhAQAwPDhw/Huu+/av8E3sZa8hyiVSoshK7VajS5duvBU/xuwtq9zcnIQEBAANzc3YVv79u0REBCAs2fP2rextxixPhsZdlooKCgIbm5uyM7OFrZptVocO3aswXkhYWFhyM/Pt3ihHDx4EADQt29f+zf4JmZtXwPAzJkz8Z///AcpKSkYO3asSC29+Vnb13v27MGuXbuwY8cO7NixA4sWLQJQM0eK1Z6mteQ9RK/X4+jRo8K2qqoq5Obm4o477hClzTcra/vax8cHZ8+etRhGqaiowPnz5zlca2NifTZygnILqVQqxMbGIjk5GV5eXujcuTOWL18OHx8fxMTEwGAwoKioCO7u7lCr1ejduzfuvfdeTJ06FfPmzUNFRQXmzp2Lxx9/vNHqBNWwtq+3b9+O3bt3Y+bMmQgPD8elS5eEY5n3oYZZ29d//ZA1T/bs1KkTPDw8JHgENw9r+7pfv34YOHAgZs2ahQULFsDDwwNpaWlQKBR47LHHpH44bZq1ff34449j3bp1mDJlCl555RUAwNtvvw1nZ2c88cQTEj+am5tkn402O4n9FqTX603Lli0zDRgwwBQaGmpKSEgQ1hfJzc01de/e3bRt2zZh/8uXL5tefvllU2hoqKl///6mN954w1RVVSVV828q1vR1XFycqXv37g3+q/v7oIZZ+7yu6/vvv+c6O1awtq+vXr1qeuONN0z9+/c39e7d2xQXF2c6efKkVM2/qVjb13/88Ydp4sSJpvDwcNOAAQNMkydP5vO6BWbNmmWxzo5Un40yk8lksl10IiIiImpbOGeHiIiIHBrDDhERETk0hh0iIiJyaAw7RERE5NAYdoiIiMihMewQERGRQ2PYISIiIofGsENEdsWlvIhIagw7RGQ3n3/+OWbNmgUAyM7ORmBgoMX1icQ2bNgwvPrqq60+zqpVqxAYGNjkPm3h8RJRDV4bi4jsZsOGDVI3gYiIlR0iIiJybAw7RGQXo0ePxsGDB3Hw4EGLIZ9Tp05h/Pjx6N27NwYNGoTk5GTo9Xrh9sDAQKSnp+OJJ55ASEgI0tPTAQAXL17EtGnTEB4ejt69e+P555/HsWPHLH7mrl278OijjyIkJAQDBgzAjBkzUFBQYLHPtWvXsGzZMgwaNAihoaEYN24czp49a7HPgQMH8Oyzz6Jv377o378/pk+fjry8vCYf75YtW/Dggw8iJCQEsbGxuHjxYov6jYhsj2GHiOzijTfeQHBwMIKDg/Hxxx+jrKwMAPDmm2+ib9++ePfdd/G3v/0Na9euxZYtWyzu++6772LEiBFIS0vDgw8+iKKiIjzzzDP49ddf8frrryMlJQVGoxHPPfcc/vzzTwDA4cOHMXPmTMTExGDt2rWYPXs2vv/+e0yfPt3i2Lt378bJkyexdOlSvPHGG/jll18wdepU4fYdO3Zg3Lhx8PX1xYoVKzB79mwcOXIETz/9NK5cudLgY/3ggw/wxhtvYMiQIcjIyEDv3r3x+uuv27I7iagVOGeHiOzirrvugpubGwAgNDRUmKg7ZswYvPjiiwCAAQMGYO/evfj+++8RGxsr3Ldfv36Ii4sTvk9NTUVJSQk++ugjdO7cGQAwePBgPPzww1i5ciXS0tJw+PBhqNVqTJgwASqVCgDg4eGBo0ePwmQyQSaTAQA6duyIjIwMODk5AQDOnj2L1atXo6ysDO3atUNycjIiIyORkpIi/Px7770XDz/8MNatW4eZM2daPE6TyYSMjAw8/PDD+Mc//gEAiIyMRFlZWb0QR0TSYGWHiETVr18/4WuZTIbOnTtDq9Va7NOjRw+L77/77jv06NEDHTt2hF6vh16vh1wux+DBg/Htt98CAMLCwlBZWYnhw4cjJSUFOTk5iIyMxOTJk4WgAwAhISFC0AEAPz8/AIBWq8Xp06dx6dIlDB8+3OLn+/v7o0+fPjh48GC9x3Pq1ClcuXIFQ4cOtdj+t7/9zZpuISI7YmWHiETl4uJi8b1cLq+3Fk+7du0svi8pKcHZs2fRs2fPBo9ZWVmJPn36IDMzExs2bMD69euRmZmJ22+/HS+88AJGjx7d6LHl8pq/+YxGI0pKSgAAt99+e72fcfvtt9ebIwQApaWlAABPT0+L7R06dGiwrUQkPoYdImrz3N3dER4eXm8Iycw8bBUVFYWoqChUVlbi+++/x8aNG7Fo0SL07t0bISEhN/w5Hh4eAIDLly/Xu+3SpUv1Ag1wPeT8dT6POTgRkfQ4jEVEdmOumrRWeHg4Tp8+jYCAAPTq1Uv49+mnn2Lr1q1QKBR466238OSTT8JkMsHFxQVDhw4VFjRs7plRAQEB6NChA3bt2mWxPTc3Fz/++CPuvffeevfp2rUrfH198Z///Mdi+759+1r4aInI1hh2iMhuNBoNTp8+je+++67evBxrjB07FkajEWPHjsXu3bvx3Xff4fXXX8emTZsQEBAAoGay86+//opXX30VBw4cwJdffolFixbBw8MDAwYMaNbPkcvlmDZtGr755htMnz4d+/fvx44dOxAXF4f27dtbTJo2k8lkmDFjBvbt24c5c+bgm2++QXp6Oj766KMWP14isi2GHSKym+eeew5OTk5ISEhAVVVVi4/TsWNHbNmyBZ07d8a8efPwwgsv4Oeff8bixYsxduxYAMCQIUOQnJyMkydPYvLkyZg2bRpcXFywceNGYXiqOZ544gmkpaXh9OnTeOmll7B06VL06dMHW7dubXQezvDhw5Gamooff/wRkyZNwr59+7BgwYIWP14isi2ZiVfpIyIiIgfGyg4RERE5NIYdIiIicmgMO0REROTQGHaIiIjIoTHsEBERkUNj2CEiIiKHxrBDREREDo1hh4iIiBwaww4RERE5NIYdIiIicmgMO0REROTQ/j9MhQWg0HdyfwAAAABJRU5ErkJggg==\n", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjoAAAHJCAYAAACMppPqAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/OQEPoAAAACXBIWXMAAA9hAAAPYQGoP6dpAABLmklEQVR4nO3de1xUdf7H8fcwA4IgAusF87LZRUlTsRQ18dpmbVlr5VYWmprXMvJG1q6VqZXl/bJ4y0s313bNtXL97VZb2d3bttWuWVZm3gAVBLkIwszvD5zRCTQG55zDjK/n4+FDOHNm+M5nRubt93ZsLpfLJQAAgCAUYnUDAAAAjELQAQAAQYugAwAAghZBBwAABC2CDgAACFoEHQAAELQIOgAAIGgRdAAAQNAi6AA1jNl7eAbLnqHB8jysRh0RbAg6wFkMHDhQLVu29PrToUMHDRo0SFu3bvX7zyspKdHTTz+tN99887wfa+DAgRo4cOA5z8nIyNCIESN04MABnx77kUceUe/evc+neT7p3bu3HnnkkbPenpeXp4cffljbt2/3HKvK8zfS+vXr1bJlS+3fv/+8H6tly5ZauHDhOc/x1/P961//qmefffYXz/vqq680cOBAtW/fXsnJyZozZ45KSkqq/HMyMjLUoUMHbdmypcJt7733nvr37682bdqoe/fuevrpp1VQUODT8wDORNABzqFVq1Z69dVX9eqrr2rNmjWaMWOGQkNDdd9992n37t1+/VlZWVl64YUXVFpa6tfHPZtPPvlEmzdv9vl+999/vxYtWmRAi6rn66+/1uuvvy6n02l1UwLe4sWLdezYsXOes2/fPg0ZMkS1atXSvHnzNHToUK1atUrTp0+v0s84dOiQhg4dquPHj1e47e2339bo0aNVu3ZtzZs3T3/4wx/02WefafDgwab9u0DwcVjdAKAmi4qKUmJiotexa665Rl26dNH69es1adIkaxpmoWbNmlndBFho+fLlioyMVHp6usLCwtSjRw+Fh4dr2rRpGjVqlC666KJK7+d0OrVhw4Zz9hgtXLhQl156qZ5//nmFhYVJkjp06KDrrrtO69ev1x133GHIc0Jwo0cH8FFERIRq1aolm83mdfydd97RbbfdpjZt2qhr166aPn26CgsLPbefOHFCU6ZMUffu3XXllVfqhhtu0IoVKyRJ+/fv17XXXitJevTRR885NFRcXKw//elPuuGGG9SmTRv16dNHy5Ytq7RH409/+pOuueYatW/fXvfff7/27dsnqXxo5dFHH5UkXXvttZ6hoRMnTmj27Nnq06ePrrzySl111VUaMmSIvv76a89j/nzoqnfv3lqwYIGeffZZXXPNNWrbtq3uu+8+/fjjj15t2b59u1JSUtSuXTslJSVp0qRJys7O9jpn165dGjJkiNq3b69evXrpjTfeOGsdJGnLli0aNGiQJGnQoEFewzcul0vLly9Xz5491bZtW91555368ssvPbcvXLhQ1113nRYtWqSkpCQlJycrNzdXUvkQzk033aQrr7xSPXv21MKFC1VWVua5b3Z2tiZMmKCuXbuqTZs2+t3vfqcNGzZUaN8XX3yhu+66S23atFHPnj31/PPPe91+/PhxPfPMM/rNb36jNm3aqG/fvlq3bt05n/PBgwc1ZswYXX311eratatWrVp1zvPddu3apTFjxqhz585q3bq1unXrpunTp+vEiROSyl/HAwcO6G9/+9s5h90++ugj9ejRwxNEJOmGG26Q0+nURx99dNaf/8033+iJJ55Qv3799Nxzz1V6zg8//KDk5GSvx65Xr54uueQSvf/++1V6nsDP0aMDnIPL5fJ0mbtcLh07dkwvvPCCSkpKdPvtt3vOe/PNNzVx4kTdfPPNGjt2rA4cOKC5c+fqu+++06pVq2Sz2fT000/ro48+0qRJk1SvXj198MEHeu655xQTE6Obb75ZixYt0pgxYzR69Gj16dPnrO0ZNWqU/vOf/2jMmDFKSEjQli1bNG/ePO3bt0/Tpk3znLtjxw4dPXpUjz/+uMrKyjR79mwNGjRIb775pnr27KnRo0dr8eLFWrRokVq2bClJnrku48ePV7NmzbR3717Nnz9fEyZM0N///vcK4c7txRdf1NVXX61nnnlGubm5euqppzRp0iS9+uqrkqRt27ZpyJAh6ty5s+bNm6fc3FzNnz9fgwYN0rp16xQeHq7MzEylpKTo4osv1syZM5Wfn69Zs2bp6NGjZ319Wrdurccff1xTp07V448/rk6dOnk9/5KSEj322GMqLS3VjBkzNHr0aG3evFkOR/mvvoMHD2rz5s2aO3eujh07prp162rp0qWaO3euUlJS9Oijj+rrr7/WwoULdejQIT399NOSpLS0NB09elRPPvmkoqKi9Prrr2vSpEmKj49X586dPW2YMmWKUlNT9dBDD+kvf/mLZs6cqUsvvVS9evXSiRMndPfdd+vo0aNKTU1V48aN9c477+iPf/yjjhw5olGjRlV4voWFhUpJSZHD4dC0adMUEhKiBQsW6KefflL79u3PWqesrCzdc889SkxM1IwZMxQWFqYPPvhAq1atUoMGDTRixAgtWrRII0aMUKtWrXT//ferQYMGFR7nxIkTOnDggJo3b+51PC4uTlFRUdqzZ89Z29CoUSO9/fbbio+Pr3RujiTFxMTo4MGDXsdOnjypQ4cO+TQHCDgTQQc4h23btql169YVjo8fP16XXnqppPLwMWvWLHXr1k2zZs3ynHPxxRdr8ODB2rx5s3r27KmtW7eqa9euuummmyRJnTp1Uu3atfWrX/1KYWFhuuKKKySVDw21atWq0vZ88MEH+uSTTzRnzhzP43Tt2lXh4eGe4HD55ZdLkux2u1auXKn4+HhJ0iWXXKJ+/fppw4YNSklJ8QxBXXHFFWrSpIlKSkpUUFCgyZMn68Ybb5QkJSUlKT8/XzNmzNCRI0dUv379StsVHR2t9PR02e12SdJPP/2khQsXKicnR7GxsZo9e7aaN2+upUuXes5p166dbrrpJr322mu65557tHr1apWVlWnZsmWKi4uTJDVv3vycwxVRUVG67LLLJEmXXXaZ52tJCgsL07JlyxQTEyOpfNLy5MmT9d133ykhIUGSVFpaqkmTJqlDhw6SyntY0tPTdeedd2ry5MmSpOTkZMXExGjy5MkaMmSILr/8cm3dulUPPPCAfvOb33jqFBMT49UTIZW/TwYMGCBJSkxM1Ntvv63PPvtMvXr10vr16/Xtt99q7dq1npDSrVs3lZaWKj09XXfddZen7W5/+9vfdPDgQW3cuNHzXNu1a6frrrvurDWSpG+//VZXXHGF5s+fr6ioKEnlQ7Aff/yxtmzZ4gk4YWFhiouLqzBc6+aeV+N+jDNFRkYqPz//rG34+XOpzO23364lS5Zo2bJl6t+/v06cOKF58+bp+PHjql279i/eH6gMQQc4h9atW+vJJ5+UVB5o8vLy9MEHH2ju3LkqLCzUuHHj9MMPPygjI0MjR470mjDZsWNHRUVF6eOPP1bPnj3VqVMnrV27VhkZGerRo4d69OihBx54wKf2bN26VQ6HQzfccIPX8VtuuUXz58/X1q1bPUHnqquu8oQcqTzQNG3aVNu2bVNKSkqFxw4LC/MMpWVmZmrPnj368ccf9d5770nSOf9H3aZNG0+AkeT5uUVFRQoPD9cXX3yh++67z6uHrGnTprr00kv18ccf65577tGOHTuUmJjoCTlS+Yf42eZ8/JLLLrvM68O1SZMmklRhEqw7YErS559/rhMnTqh3795er6V7qO7jjz/W5Zdfrk6dOmnhwoXauXOnunXrph49elQ6X8sdoKTyIc969eopLy9PUvlr2bhx4wo9MbfccovWrVunL774Qj169PC6bfv27WrWrJlXoGvUqNFZg4lbcnKykpOTdfLkSX333Xfau3evvv32W2VnZ1cpgLj90oTvs/X4VdWDDz6osrIyLViwQLNnz1ZoaKh+//vf69prr9X3339/Xo+NCxdBBziHyMhItWnTxutYcnKyCgsL9fzzz2vQoEGeVSpPPvmkJxSdKSsrS5L0xz/+UfHx8XrjjTc0bdo0TZs2Te3bt9eUKVM8PQy/JDc3V7GxsV6hQpKnp+XMD/F69epVuP+vfvUrzwdtZT788EM9/fTT+uGHHxQZGamEhATP/6TPtb9KRESE1/chIeXT/5xOp/Ly8uR0OrV8+XItX768wn1r1arleW7uMFLZc/PVz3sAzmzTmSIjIz1fu1/LESNGVPqY7tdy7ty5WrJkif7v//5P//znPxUSEqJrrrlGU6dOVePGjT3nV1YXdx1zc3MrfW7u162y18n9+v9c/fr1deTIkUrbLJU/5zlz5uiVV15RYWGhGjVqpLZt23pqX1XunpzKlnvn5+erTp06Pj3ezzkcDk2cOFEPPvig9u3bpwYNGig6Olr33HOP6tate16PjQsXQQeohiuvvFJ//etftX//fkVHR0sqn9+SlJRU4Vz3L+iwsDCNHj1ao0eP1sGDB/Xee+8pPT3dM/+lKurWraucnByVlZV5hR33B/CZH4LuibVnOnz48Fnncvz000+e4ZilS5eqadOmstlseuWVV/Thhx9WqX2ViYyMlM1m0+DBgz3DbWdyh4HY2NhKP6x/abmzP7lfy1mzZuniiy+ucLs7hNSpU0dpaWlKS0vTDz/8oH/9619KT0/Xk08+qWXLllXpZ9WtW1d79+6tcPzw4cOSVGmgiY2NrfQ+v1SjZcuWafXq1XryySfVp08fTyDp379/ldrqFhkZqYYNG1Zow9GjR1VQUOAZzq2uLVu2qKSkRN26dfP0WpWWlurbb7/Vrbfeel6PjQsXq66Aavjyyy9lt9vVtGlTXXLJJfrVr36l/fv3q02bNp4/DRs21OzZs7Vz506dOHFC119/vVauXClJuuiii3TPPffopptu8ky+/HkvTWWSkpJUWlqqf/zjH17H3auTrr76as+xHTt2ePXwfPHFFzpw4IBnsqy7h8Ptv//9r4qLizVixAg1a9bMMwzhDjnV3TE3KipKrVq10g8//OBVn8svv1wLFy70TEzt3LmzPv/8c2VmZnru+91333lWip1NVepWVe3atVNoaKgyMzO92upwODRnzhzt379fBw4cUI8ePTyvwSWXXKLhw4frmmuuqTCR9lw6duyoAwcO6PPPP/c6/sYbbyg0NFRt27atcJ/OnTtr//79+uqrrzzHsrOz9Z///OecP2vHjh267LLLdPvtt3tCTmZmpr799luvHq6fvycq07VrV73//vteQ5n//Oc/ZbfbvSZiV8c///lPPfbYYzp58qTn2Guvvaa8vDzPfCjAV/ToAOeQn5/v9SFSUlKid999V6+99pruvPNOz3yScePG6fHHH5fdblevXr2Ul5en9PR0ZWZmqnXr1goPD1fr1q21aNEihYaGqmXLltqzZ4/+9re/6frrr5ckzwfQp59+qksvvVTt2rWr0J7u3burU6dOmjx5sjIzM5WQkKCtW7dq+fLluvXWW73mbjidTo0YMUKjRo1STk6OZs+erRYtWuiWW26RdLr34u2331b37t3VunVrORwOzZw5U0OHDlVJSYnWr1/vWdZ75lJ5X40fP14jRozQhAkTdMstt6isrEwrV67UF198ofvvv1+SdO+992rdunW67777PHM15s6dq9DQ0HM+trtu77//vurWrVvlYcDKxMbGatiwYZo/f77y8/PVqVMnZWZmav78+bLZbEpISFCdOnUUHx+v6dOnKz8/X82aNdN///tfbd68WSNHjqzyz7rtttu0Zs0aPfDAA0pNTVWTJk08760xY8Z4Xp8z/e53v9OLL76oMWPGaNy4cYqKitLixYt/ce5M27ZtlZ6ermXLlikxMVF79+7V0qVLVVJSoqKiIs950dHR2rlzp7Zu3aq2bdsqPDy8wmMNGzZMf//73zVs2DANGTJEP/74o+bMmaM77rjDM5+qpKREO3fuVHx8vNc8sV9y11136S9/+YseeeQR9e/fX7t27dLs2bN14403VtpbClQFQQc4h507d+rOO+/0fF+rVi01a9ZM48aN03333ec5/vvf/16RkZF6/vnn9eqrr6p27dq66qqrNGvWLDVt2lSSNHXqVM2bN08rV67U4cOH9atf/Ur9+/fXQw89JKm852PIkCF69dVXtXnzZn388ccVPuRtNpuWLl2qBQsWaPXq1crOzlaTJk00fvx4DRkyxOvc3/zmN7rooouUlpam0tJS9erVS3/84x898zI6deqka665RrNnz9ann36qZcuWafbs2Vq0aJFGjx6tunXrKjExUS+99JIGDhyo7du3e5ah+yo5OVkrVqzQokWLlJqaqtDQULVu3VqrVq3yTKSNjY3Vn//8Zz311FN65JFHFBkZqWHDhmnTpk3nfOzLL79cffv29Qyxbdy4sVptdBs7dqzq16+vNWvW6Pnnn1fdunXVpUsXjR8/3hOqFi1apDlz5mj+/PnKyclRo0aNNGbMmLPO7alMRESEXnrpJc2ePdsTrC655BI99dRTZx1SCgsL0wsvvKCnn35aTz31lGw2m+644w41bdr0nMvwR44cqZycHL344ov605/+pEaNGul3v/ud5/2Ul5en6OhoDR06VE8//bTuu+8+rVq1ymsytdull16qlStX6rnnnlNqaqpiY2M1ePBgpaames7JysrSnXfeqTFjxujBBx+sck1atGihpUuXavbs2Ro1apTq1aunUaNG+RQggZ+zubiCGwAACFLM0QEAAEGLoAMAAIIWQQcAAAQtgg4AAAhaBB0AABC0CDoAACBoEXQAAEDQYsNAlW9t73Qas51QSIjNsMe+IDmdsu/7SZJU1rSZdMaW9dTaPNTaPNTaPNTaPP6odUiIzXOpmnMh6EhyOl3Kzq54Nd7z5XCEKDY2Unl5hSotPfcW7aiiggLVP3XhwOw9h6RTV56m1uah1uah1uah1ubxV63j4iJlt/9y0GHoCgAABC2CDgAACFoEHQAAELQIOgAAIGgRdAAAQNAi6AAAgKBF0AEAAEGLoAMAAIIWQQcAAAQtgg4AAAhaBB0AABC0CDoAACBoEXQAAEDQ4urlCFhz3vteJeERkiSbzaZatRwqLi6Vy+WyuGWVc4TY1D/xIl0cV9vqpgDABYOgg4ByZojZ8FWGisLCLWyN744VndT0m66wuhkAcMEg6CCgFJ4s83w9OKmJympHSpJCQmyKCA9T0YkSOZ01r0fnu8MFenf3ER0rOml1UwDggkLQQUDJKTypi099PbTzr6XI8qDjcIQoNjZSOTkFKi11Wta+s3l/9xG9u/uICkvKfvlkAIDfMBkZASW7MDB7RGqH2SVJBQQdADAVQQcBJSdAh34iTwUdenQAwFwEHQSUnIISq5tQLbXDykeJz5xjBAAwHkEHASVQJ/OeOXRVU5e/A0AwIuggoATqHB330FWZ06WSMoIOAJiFoIOAEqhzdCJC7Z6vC0tKLWwJAFxYCDoIKDmFgTlHxx5iU7ij/J8bK68AwDwEHQSUQO3RkU7P02HlFQCYh6CDgJIToHN0JJaYA4AVCDoIGMWlTuUXB25IcC8xL2CJOQCYhqCDgBGo83PcGLoCAPMRdBAwAnVpudvpoStWXQGAWQg6CBiBPD9HkmqHcr0rADAbQQcB42iQDF0RdADAPAQdBIzsAL3OlRtzdADAfAQdBIxA3kNHYnk5AFiBoIOAcTTge3ROLS9nMjIAmIagg4AR8JOR6dEBANMRdBAwAn55+alVV4VsGAgApiHoIGBkB8mqK3p0AMA8BB0EhDKnS8cCfDIyy8sBwHyWBx2n06kFCxaoW7duSkxM1PDhw7Vv376znv/GG2+oZcuWFf7s37/fxFbDbLknTsrpsroV54dVVwBgPofVDUhPT9eaNWs0Y8YMxcfHa+bMmRo2bJjefPNNhYWFVTj/m2++UVJSkubMmeN1PC4uzqwmwwLu+Tl1wy1/y1abe9UVQQcAzGNpj05JSYlWrlyp1NRU9ezZUwkJCZo7d64yMjL01ltvVXqfb7/9Vi1btlT9+vW9/tjtdpNbDzO5NwuMjQy1uCXV55mjc7JMTleAd08BQICw9L/Hu3btUkFBgbp06eI5Fh0drVatWmnbtm3q27dvhft888036t27t9/b4nD4P/PZ7SFef6P6covL956Jq326l8/hCJEc3jWuybWuG3E6pJ10uhRZKzDDeSDUOlhQa/NQa/OYXWtLg05GRoYkqVGjRl7HGzRo4LntTLm5ucrMzNT27du1Zs0a5eTkqG3btkpLS1Pz5s2r3Y6QEJtiYyOrff9fEh0dYdhjXyiKXDZJUsO6p2sZGxspRXq/bjW51jEul0JsktMlhdaupdjocKubdF5qcq2DDbU2D7U2j1m1tjToFBUVSVKFuTi1atVSbm5uhfN3794tSXK5XHrmmWd04sQJLV68WHfffbfefPNN1atXr1rtcDpdyssrrNZ9z8VuD1F0dITy8opUVub0++NfSA4cyZckRYae/h9ATk6BdGrFeaDUunaoXfklZTqYdVxhZYE5VydQah0MqLV5qLV5/FXr6OiIKvUKWRp0wsPL/0dbUlLi+VqSiouLFRFRMel16NBBn376qWJjY2Wzlf8Pf9GiRerZs6fWr1+vESNGVLstpaXGvbHLypyGPv6F4Eh+sSQp9ozJyKWlTulnda3pta4dVh508opO1uh2VkVNr3UwodbmodbmMavWlg5GuoessrKyvI5nZWWpYcOGld4nLi7OE3IkKSIiQk2aNFFmZqZxDYXl3KuuYiICdzKyxKaBAGA2S4NOQkKCoqKitGXLFs+xvLw87dy5Ux07dqxw/quvvqpOnTqpsPD0MFN+fr5+/PFHXXbZZaa0GdZwB53Y2hW3HAgkpy/sSdABADNYGnTCwsKUkpKiWbNm6V//+pd27dqlcePGKT4+Xn369FFZWZkOHz6sEydOSJK6d+8up9Ophx9+WLt379ZXX32lBx98UHFxcbrtttusfCowmGd5ee0g6dE5yRXMAcAMlq+jS01NVf/+/TV58mQNGDBAdrtdK1asUGhoqA4dOqTk5GRt2rRJUvlQ1+rVq1VYWKgBAwZo8ODBqlOnjl588UXVqlXL4mcCo7hcLuWcuvxDXO3A3TBQkqIYugIAU1n+qWG325WWlqa0tLQKtzVp0kTffPON17HWrVtr5cqVZjUPNUBBSZmKT01YC/yhK4IOAJjJ8h4d4JfknJqfExEaonBHYG6y51Y7lAt7AoCZCDqo8bILy+fnxAV4b47E9a4AwGwEHdR47hVXcQE+EVniCuYAYDaCDmq84OrRYegKAMxE0EGNl13g3kMn8Ht0WF4OAOYi6KDG8/ToRAZ+jw5DVwBgLoIOajz3HJ1fBVGPDkNXAGAOgg5qvJxC967Igd+j415eTo8OAJiDoIMa72hQrbpieTkAmImggxovxxN0gqBHxzMZmaADAGYg6KBGKyl16nhx+QqlYOjRcQed4lKnSp0ui1sDAMGPoIMazb3iyh5iU51wyy/Ndt7cq64kqbCEJeYAYDSCDmq001ctD1WIzWZxa85fqD1Eofby58E8HQAwHkEHNZp7s8BgmJ/jxoU9AcA8BB3UaNmepeWBPz/HjU0DAcA8gT/poYb6OvO4ntj0jQpOlsnpcknMO62WolOrk4Jhs0C38iuYFxN0AMAEBB2DHMw9oT3ZhVY3I2i0io+2ugl+49kdmSXmAGA4go5Brm1RX38fFS1bWJjyjheprNRpdZMCVkSoXU1jI6xuht949tJh1RUAGI6gY6CL6kYoNjZSOTl2lRJ0cApzdADAPExGBkzGqisAMA9BBzBZbXp0AMA0BB3AZAxdAYB5CDqAyWqfuoI5q64AwHgEHcBkDF0BgHkIOoDJIlleDgCmIegAJnOvuqJHBwCMR9ABTObZGZmgAwCGI+gAJmPVFQCYh6ADmMy96qqQVVcAYDiCDmCyM4euXC4uaw8ARiLoACZzD12VOV0qKSPoAICRCDqAySJOrbqSWGIOAEYj6AAms4fYFO4o/6fHyisAMBZBB7AAuyMDgDkIOoAFWGIOAOYg6AAW4MKeAGAOgg5gAYauAMAcBB3AAlzYEwDMQdABLOC+sCerrgDAWAQdwAIMXQGAOQg6gAUIOgBgDoIOYAHPHB1WXQGAoQg6gAU8y8vp0QEAQxF0AAswdAUA5iDoABaIDGV5OQCYgaADWMDdo8PQFQAYi6ADWIChKwAwB0EHsACrrgDAHAQdwALuVVf06ACAsSwPOk6nUwsWLFC3bt2UmJio4cOHa9++fVW67xtvvKGWLVtq//79BrcS8K8zh65cLpfFrQGA4GV50ElPT9eaNWs0bdo0rV27Vk6nU8OGDVNJSck573fgwAFNnTrVpFYC/uUeunJJKjrptLYxABDELA06JSUlWrlypVJTU9WzZ08lJCRo7ty5ysjI0FtvvXXW+zmdTqWlpal169Ymthbwn3BHiEJs5V+zxBwAjGNp0Nm1a5cKCgrUpUsXz7Ho6Gi1atVK27ZtO+v9lixZopMnT2rkyJFmNBPwO5vNpgiuYA4AhnNY+cMzMjIkSY0aNfI63qBBA89tP/fll19q5cqVWrdunTIzM/3WFofD/5nPbg/x+ht+cMbr5HCEeL4PxFpHhtlVUFKm/qu2y2Z1Y3xlU/m429lutkl3XtVYD//mctOaFIwC8X0dqKi1ecyutaVBp6ioSJIUFhbmdbxWrVrKzc2tcH5hYaEmTpyoiRMn6uKLL/Zb0AkJsSk2NtIvj1WZ6OgIwx77gnPGWyU2NlKK9H7dAqnWyZfX1/rPD0g6Z2aomX6hwS6X9N7uo3rm94mmNCfYBdL7OtBRa/OYVWtLg054eLik8rk67q8lqbi4WBERFQswffp0NW/eXHfddZdf2+F0upSXV+jXx5TK02p0dITy8opUVsaEU78oKFDsqS9zcgqkU3PWA7HWk6+7TA90/XXAhRx7SIii6oQr//gJlTkr1nrP0QIN//MXKiktK3+NUG2B+L4OVNTaPP6qdXR0RJV6hSwNOu4hq6ysLDVr1sxzPCsrSy1btqxw/muvvaawsDC1b99eklRWVj63oW/fvho1apRGjRpV7baUlhr3xi4rcxr6+BeUM+pYWur0+l4KvFpH17L0n2C1OBwhiq1TS47S0kprXbdWqCSp1OkKqNeiJgu093Ugo9bmMavWlv6WTUhIUFRUlLZs2eIJOnl5edq5c6dSUlIqnP/zlVhffPGF0tLStGzZMrVo0cKUNgM4N8ep5WRlzkDrqwIQjCwNOmFhYUpJSdGsWbMUFxenxo0ba+bMmYqPj1efPn1UVlam7Oxs1alTR+Hh4fr1r3/tdX/3hOWLLrpIMTExFjwDAD9nPxV0Sgk6AGoAy6eXp6amqn///po8ebIGDBggu92uFStWKDQ0VIcOHVJycrI2bdpkdTMBVJG7R6eUeQ4AagDLJwjY7XalpaUpLS2twm1NmjTRN998c9b7durU6Zy3AzCfw35q6MoluVwu2WwBt3geQBCxvEcHQHCxnxFsmKcDwGoEHQB+5e7RkZinA8B6BB0AfuUIOf1rhaADwGoEHQB+5Z6MLBF0AFiPoAPAr87IOQQdAJYj6ADwK5vNxhJzADUGQQeA3znYNBBADUHQAeB3nr10CDoALEbQAeB37r106NEBYDWCDgC/c9jLf7UQdABYjaADwO+YowOgpiDoAPA7d9Bhjg4AqxF0APid3dOjw/JyANYi6ADwu9P76NCjA8BaBB0AfucZunIRdABYi6ADwO/s9OgAqCEIOgD8zn0Fc1ZdAbAaQQeA37l3RiboALAaQQeA37G8HEBNQdAB4HcsLwdQUxB0APgdy8sB1BQEHQB+xyUgANQUBB0AfudedcUcHQBWI+gA8LtTFy+nRweA5Qg6APzOYWcfHQA1A0EHgN+xvBxATUHQAeB3DpaXA6ghCDoA/I5VVwBqCoIOAL/jop4AagqCDgC/8ywvdxF0AFiLoAPA7xzu5eX06ACwGEEHgN+5e3SYowPAagQdAH7nsLPqCkDNQNAB4HfsowOgpiDoAPA7O8vLAdQQBB0Afsc+OgBqCoIOAL9zsI8OgBqCoAPA79hHB0BNQdAB4HfsjAygpiDoAPA7LuoJoKYg6ADwO/c+OiwvB2A1gg4Av7PbWHUFoGYg6ADwu9M7IxN0AFiLoAPA79hHB0BN4ajuHXNzc7V9+3ZlZWXp+uuv17Fjx9S8eXPZTnVZA7hwcVFPADVFtYLO4sWLtXTpUp04cUI2m01t27bVvHnzlJOTo5UrVyo6Otrf7QQQQOxc6wpADeHz0NXLL7+shQsXasiQIfrLX/4i16kNwVJSUrRv3z7Nnz/f740EEFhO74zM8nIA1vI56Lz00ksaMWKEHnroIbVu3dpzvEePHho7dqzeffddvzYQQOBhMjKAmsLnoHPw4EElJSVVetsll1yiI0eOnHejAAQ2JiMDqCl8DjqNGjXS559/Xult//3vf9WoUaPzbhSAwMYcHQA1hc9Bp3///lqyZIlWrFihH3/8UZJUWFiof/7zn1q6dKluvfVWnx7P6XRqwYIF6tatmxITEzV8+HDt27fvrOf/73//07333qv27durc+fOevzxx3X8+HFfnwYAA7HqCkBN4XPQGT58uG699VbNmjVLffv2lSQNGjRIY8eOVc+ePTVy5EifHi89PV1r1qzRtGnTtHbtWjmdTg0bNkwlJSUVzj1y5IiGDBmixo0ba/369UpPT9eOHTv0yCOP+Po0ABiIoSsANUW1lpdPnTpVQ4cO1WeffaZjx46pTp066tixo1q0aOHT45SUlGjlypWaOHGievbsKUmaO3euunXrprfeessTpNwOHDig5ORkTZ06VQ6HQ82bN9cdd9yhuXPnVudpADCInaADoIbwOejcfPPNmjBhgnr16qWLL774vH74rl27VFBQoC5duniORUdHq1WrVtq2bVuFoNOuXTvNmTPH8/3333+v119/XV27dj2vdgDwL8cZc3RcLhcbiQKwjM9B59ChQ4qIiPDLD8/IyJCkChOYGzRo4LntbK6//nr9+OOPaty4sRYtWnTebXE4/H81DLs9xOtv+MEZr5PDEeL5nlqbpyq1Dg+zn/4mxCYHr0u18L42D7U2j9m1rlaPzurVq3XJJZeoQYMG5/XDi4qKJElhYWFex2vVqqXc3Nxz3nfWrFkqKirSzJkzNWjQIL3++uuKjIysVjtCQmyKja3efasiOto/wRCSznirxMZGSj97zam1ec5V69DapafPq1tb4aH2s56LX8b72jzU2jxm1drnoPPjjz9q+/bt6tGjh2JiYlS7dm2v2202m955550qPVZ4eLik8rk67q8lqbi4+Bd7jdq0aSNJWrRokXr06KG3335b/fr18+GZnOZ0upSXV1it+56L3R6i6OgI5eUVqYwdYv2joECxp77MySmQTs1Zp9bmqUqtT5ws83x9+Gi+ompV+7J6FzTe1+ah1ubxV62joyOq1Cvk82+fRo0a6eabb65Woyp7LEnKyspSs2bNPMezsrLUsmXLCuf/8MMP+umnnzwTlyWpYcOGiomJUWZm5nm1pbTUuDd2WZnT0Me/oJxRx9JSp9f3ErU20zlrfcYc5BMlZQpnOOC88L42D7U2j1m19jnoPPPMM3774QkJCYqKitKWLVs8QScvL087d+5USkpKhfM/+eQTPffcc/roo488Fw796aeflJOTo0svvdRv7QJwfuxnzD1m5RUAK1W7P/mDDz7Q1q1blZeXp9jYWHXo0EHdunXz6THCwsKUkpKiWbNmKS4uTo0bN9bMmTMVHx+vPn36qKysTNnZ2apTp47Cw8PVt29fLVu2TGlpaZo4caJyc3M1ffp0tW3bVr169aruUwHgZzabTY4Qm0qdLnZHBmApn4NOSUmJ7r//fn300Uey2+2KjY1VTk6Oli1bps6dO2vp0qUVJhefS2pqqkpLSzV58mSdOHFCHTt21IoVKxQaGqr9+/fr2muv1TPPPKPbbrtNMTExeuGFFzRjxgwNGDBAdrtd1157rR555BHZ7Ux2BGoS+6mgU+pkGACAdXwOOgsXLtSOHTv03HPP6aabbpLdbldpaak2btyoJ598UosXL9ZDDz1U5cez2+1KS0tTWlpahduaNGmib775xutY8+bNtXTpUl+bDcBkjhCbiiWVltGjA8A6Ps8Q3Lhxo8aMGaNbbrnF04vicDjUr18/jRkzRm+++abfGwkg8HAZCAA1gc9BJzs7W61atar0tlatWp336icAwcG9SSBzdABYyeeg06xZM+3YsaPS27Zt21Zhl2MAFyb3yit6dABYyec5OnfddZdmzJih8PBw3XTTTapXr56OHDmijRs3avny5RozZowR7QQQYNw9OgQdAFbyOegMGDBAO3fu1KxZszR79mzPcZfLpVtvvVUjRozwawMBBKYzL+wJAFbxOeiEhIToqaee0tChQ7V161bl5uaqbt26SkpKYtM+AB52z2RklpcDsE61NgzcsWOHPvvsMz3wwAOSpJ07d2rBggUaPny4rrzySr82EEBgYtUVgJrA58nImzdv1r333quPPvrIc8xms+nHH3/U3Xffre3bt/u1gQACkyfosI8OAAv5HHQWLlyom266SWvWrPEcu+KKK/T666/rt7/9rebMmePXBgIITI4QlpcDsJ7PQef7779Xv379ZLPZKtzWr18/7dq1yy8NAxDYHKd+uzB0BcBKPgedOnXqaM+ePZXetm/fPtWuXfu8GwUg8NlZXg6gBvA56Fx33XWaP3++3nvvPa/jH374oebPn6/rrrvOb40DELgcrLoCUAP4vOpq3Lhx+uqrrzR69GiFhoYqJiZGx44dU2lpqdq1a6cJEyYY0U4AAYZ9dADUBD4HnaioKK1du1abN2/Wv//9bx07dkx16tRRhw4d1LNnT4WE+NxJBCAIsbwcQE1QrX10QkJC1KtXL/Xq1UuSVFpaqvz8fEIOAA+WlwOoCXxOJqWlpVq0aJHefPNNSdKWLVvUtWtXdenSRffee69yc3P93kgAgcdOjw6AGsDnoLNgwQItXrxYeXl5kqTp06crJiZGjz76qH766Sev618BuHC5L+rJHB0AVvI56Pz973/X+PHjdc899+j777/X7t27NXr0aA0aNEjjxo3Tu+++a0Q7AQQYh40eHQDW8znoZGVlqV27dpKk999/XyEhIerevbskKT4+XsePH/dvCwEEJIed5eUArOdz0GnQoIH2798vSXr33Xd1xRVXKC4uTpL0+eefKz4+3r8tBBCQWF4OoCbwOej07dtXzzzzjO677z7t2LFDt99+uyTpqaee0sKFC3XzzTf7vZEAAg+TkQHUBD4vLx87dqxq166tbdu2acKECbr77rslSV999ZWGDh2q+++/3++NBBB42EcHQE3gc9Cx2WwaOXKkRo4c6XV87dq1fmsUgMDHPjoAagJ2+ANgCMepDUTLXAQdANYh6AAwhJ0eHQA1AEEHgCG4ejmAmoCgA8AQp/fRoUcHgHUIOgAMwT46AGoCgg4AQ7CPDoCagKADwBDsowOgJqjSPjq9e/eW7dQF+n6JzWbTO++8c16NAhD43MvLCToArFSloDNkyBA9++yzioqKUq9evYxuE4AgwNAVgJqgSkFn4MCBiouL04QJE3TttdfqN7/5jdHtAhDgPJORy1heDsA6VZ6jc9NNN+mOO+7QM888o7KyMiPbBCAIMEcHQE3g07Wuxo4dq/DwcP30009q3ry5UW0CEATYRwdATVCloFNcXKxatWopLi5Of/jDH4xuE4AgwBwdADVBlYauevfurc8//1yStGjRImVmZhraKACBj6ErADVBlYLO8ePHlZWVJUn605/+RNAB8Is8Vy8n6ACwUJWGrtq0aaMJEybo2Weflcvl0gMPPKCwsLBKz2UfHQASPToAaoYqBZ05c+Zo9erVOnbsmDZs2KBWrVopLi7O6LYBCGCeOTosLwdgoSoFnYYNG2rSpEmSpC1btmjcuHFKSEgwtGEAAhs9OgBqAp+Wl0vSu+++a0Q7AAQZ9/Jy5ugAsBIX9QRgCK51BaAmIOgAMIT91G8Xgg4AKxF0ABiCHh0ANQFBB4AhPBf1dLrkchF2AFiDoAPAEO7l5ZJURs4BYBGCDgBDOM4IOuylA8AqBB0AhvAKOszTAWARy4OO0+nUggUL1K1bNyUmJmr48OHat2/fWc/fvXu3RowYoU6dOqlLly5KTU3VwYMHTWwxgKpw2E//emEvHQBWsTzopKena82aNZo2bZrWrl0rp9OpYcOGqaSkpMK5OTk5GjJkiMLDw/XSSy9p+fLlys7O1rBhw1RcXGxB6wGcjf10hw49OgAsY2nQKSkp0cqVK5WamqqePXsqISFBc+fOVUZGht56660K57/zzjsqLCzUc889pxYtWujKK6/UzJkz9f333+vf//63Bc8AwNnYbLbT17si6ACwiKVBZ9euXSooKFCXLl08x6Kjo9WqVStt27atwvldunRRenq6wsPDPcdCTu3VkZeXZ3yDAfjk9PWumIwMwBo+X+vKnzIyMiRJjRo18jreoEEDz21natKkiZo0aeJ1bNmyZQoPD1fHjh3Pqy0Oh/8zn/3UHAW73fIRwuBxxuvkcIR4vqfW5vGl1qEhNhVLks1myL+xYMf72jzU2jxm19rSoFNUVCRJCgsL8zpeq1Yt5ebm/uL9X3rpJb388suaPHmy4uLiqt2OkBCbYmMjq33/XxIdHWHYY19wznirxMZGSpHerxu1Nk9Vau1whEglZYqMCjf031iw431tHmptHrNqbWnQcQ9BlZSUeA1HFRcXKyLi7AVwuVyaP3++Fi9erNGjR2vgwIHn1Q6n06W8vMLzeozK2O0hio6OUF5ekcrYR8Q/CgoUe+rLnJwC6dScdWptHl9q7Z6QfPRYoeqF8T9lX/G+Ng+1No+/ah0dHVGlXiFLg457yCorK0vNmjXzHM/KylLLli0rvc/Jkyf16KOPauPGjXr00Uc1ePBgv7SltNS4N3ZZmdPQx7+gnFHH0lKn1/cStTZTVWptt5UnneKSMl6X88D72jzU2jxm1drS/2IlJCQoKipKW7Zs8RzLy8vTzp07zzrn5uGHH9Y//vEPzZ49228hB4Ax3HvpsI8OAKtY2qMTFhamlJQUzZo1S3FxcWrcuLFmzpyp+Ph49enTR2VlZcrOzladOnUUHh6u9evXa9OmTXr44YeVlJSkw4cPex7LfQ6AmsPB8nIAFrN80Dw1NVX9+/fX5MmTNWDAANntdq1YsUKhoaE6dOiQkpOTtWnTJknSxo0bJUnPPfeckpOTvf64zwFQc9hZXg7AYpb26EiS3W5XWlqa0tLSKtzWpEkTffPNN57vV65caWbTAJwnd48OQ1cArGJ5jw6A4MXQFQCrEXQAGMYTdMoIOgCsQdABYBh6dABYjaADwDB2lpcDsBhBB4BhHDZ6dABYi6ADwDAOO8vLAViLoAPAMMzRAWA1gg4Aw7CPDgCrEXQAGMZOjw4AixF0ABiGfXQAWI2gA8AwjpBTy8tdBB0A1iDoADCMnR4dABYj6AAwjIOrlwOwGEEHgGFO76NDjw4AaxB0ABiGfXQAWI2gA8AwdvbRAWAxgg4Aw9CjA8BqBB0AhnEvL2fVFQCrEHQAGMbTo8M+OgAsQtABYJjT++iwvByANQg6AAzDRT0BWI2gA8Aw7KMDwGoEHQCGYdUVAKsRdAAYxk7QAWAxgg4Aw3iWlxN0AFiEoAPAMExGBmA1gg4Aw5xeXk7QAWANgg4Aw5yejMw+OgCsQdABYBiWlwOwGkEHgGGYowPAagQdAIZheTkAqxF0ABiG5eUArEbQAWAYBxf1BGAxgg4Aw3jm6NChA8AiBB0AhrHTowPAYgQdAIbhop4ArEbQAWAY9tEBYDWCDgDD2G3sowPAWgQdAIZx2E8vL3e5CDsAzEfQAWAY9xwdiZVXAKxB0AFgGK+gw/AVAAsQdAAY5sygwxXMAViBoAPAMF5Bh7ErABYg6AAwjN2rR4egA8B8BB0AhrHZbJ6wwxwdAFYg6AAwFLsjA7ASQQeAoQg6AKxE0AFgqNNBh1VXAMxH0AFgKOboALASQQeAoRi6AmAly4OO0+nUggUL1K1bNyUmJmr48OHat29fle43bNgwLVy40IRWAqguT9BhHx0AFrA86KSnp2vNmjWaNm2a1q5d6wkwJSUlZ71PSUmJ/vCHP+jDDz80saUAqsN9YU+GrgBYwdKgU1JSopUrVyo1NVU9e/ZUQkKC5s6dq4yMDL311luV3uff//63brvtNm3fvl3R0dEmtxiAr+w2hq4AWMfSoLNr1y4VFBSoS5cunmPR0dFq1aqVtm3bVul9Nm/erG7dumnDhg2qU6eOWU0FUE0OO6uuAFjHYeUPz8jIkCQ1atTI63iDBg08t/3cuHHjDGmLw+H/zGc/1WXv/ht+cMbr5HCEeL6n1ubxtdbuoOOy2Qz5dxbMeF+bh1qbx+xaWxp0ioqKJElhYWFex2vVqqXc3FzT2hESYlNsbKRhjx8dHWHYY19wznirxMZGSpHerxu1Nk9Vax0eVv5rJjwizNB/Z8GM97V5qLV5zKq1pUEnPDxcUvlcHffXklRcXKyICPPebE6nS3l5hX5/XLs9RNHREcrLK1JZGd32flFQoNhTX+bkFEin5qxTa/P4XOtTc3OO5RWVv2aoMt7X5qHW5vFXraOjI6rUK2Rp0HEPWWVlZalZs2ae41lZWWrZsqWpbSktNe6NXVbmNPTxLyhn1LG01On1vUStzVTVWp8auVLJSV6b6uJ9bR5qbR6zam3pYGRCQoKioqK0ZcsWz7G8vDzt3LlTHTt2tLBlAPzFEVL+a4ZVVwCsYGmPTlhYmFJSUjRr1izFxcWpcePGmjlzpuLj49WnTx+VlZUpOztbderU8RraAhA43JOR2UcHgBUsn16empqq/v37a/LkyRowYIDsdrtWrFih0NBQHTp0SMnJydq0aZPVzQRQTaf30WE4AID5LO3RkSS73a60tDSlpaVVuK1Jkyb65ptvznrfd99918imAfCD0/vo0KMDwHyW9+gACG5c1BOAlQg6AAzlDjrM0QFgBYIOAEPZ6dEBYCGCDgBDeZaXlxF0AJiPoAPAUJ45Oi6CDgDzEXQAGMozdEWPDgALEHQAGOr0qiv20QFgPoIOAEOxMzIAKxF0ABiKfXQAWImgA8BQLC8HYCWCDgBDcfVyAFYi6AAwlGfoqozJyADMR9ABYKjTl4CwuCEALkgEHQCGsrO8HICFCDoADMWqKwBWIugAMBT76ACwEkEHgKFYXg7ASgQdAIZieTkAKxF0ABjKwUU9AViIoAPAUJ7l5S6CDgDzEXQAGMrOhoEALETQAWAolpcDsBJBB4Ch3MvLCToArEDQAWAo96or9tEBYAWCDgBDsY8OACsRdAAYijk6AKxE0AFgKAerrgBYiKADwFDsowPASgQdAIayszMyAAsRdAAYijk6AKxE0AFgKIed5eUArEPQAWAoh628R8clwg4A8xF0ABjKvTOyxPAVAPMRdAAYyj1HR5JKnSwxB2Augg4AQ50ZdBi6AmA2gg4AQ9lDGLoCYB2CDgBD2Ww2uafpsJcOALMRdAAYzr3EnB4dAGYj6AAwnOcyEAQdACYj6AAwnJ3dkQFYhKADwHCnLwPB8nIA5iLoADAc17sCYBWCDgDDMUcHgFUIOgAM55mjw/JyACYj6AAwnCOE5eUArEHQAWA494U9GboCYDaCDgDD2W1MRgZgDYIOAMO5e3RYXg7AbAQdAIZjeTkAqxB0ABiO5eUArGJ50HE6nVqwYIG6deumxMREDR8+XPv27Tvr+Tk5OZowYYI6duyopKQkPfnkkyoqKjKxxQB8xSUgAFjF8qCTnp6uNWvWaNq0aVq7dq2cTqeGDRumkpKSSs9PTU3V3r17tXr1as2fP1+bN2/WlClTzG00AJ94lpezjw4Ak1kadEpKSrRy5UqlpqaqZ8+eSkhI0Ny5c5WRkaG33nqrwvmff/65tm7dqmeffVatW7dWly5dNHXqVL3++uvKzMy04BkAqAqudQXAKg4rf/iuXbtUUFCgLl26eI5FR0erVatW2rZtm/r27et1/vbt21W/fn1deumlnmNJSUmy2WzasWOHbrzxxmq3xeHwf+az20O8/oYfnPE6ORwhnu+ptXmqU+vQU6/TP3Yd1u4jhYa0KxjZbDaFhdlVUlIml4veMCNRa+NE1bJrYFJTxdUOk2T+72tLg05GRoYkqVGjRl7HGzRo4LntTJmZmRXODQsLU0xMjA4dOlTtdoSE2BQbG1nt+/+S6OgIwx77ghN2+svY2Egp0vt1o9bm8aXWDWLKz/18f64+359rVJMA1FAJjWN0V1Izr2Nm/b62NOi4JxGHhYV5Ha9Vq5Zycyv+MiwqKqpwrvv84uLiarfD6XQpL8///8u020MUHR2hvLwilZXRZe8XBQWKPfVlTk6BdGoqF7U2T3VqPbhDY9WPcKi4lNfGFyE2m2rVClVx8Uk56WUwFLU2Tp1aDiU3q1v+O1v++30dHR1RpV4hS4NOeHi4pPK5Ou6vJam4uFgRERWTXnh4eKWTlIuLi1W7du3zakupgb+Ay8qchj7+BeWMOpaWOr2+l6i1mXypdWx4qFKubmJwi4KPwxGi2NhI5eQU8L42GLU23s/ratbva0snNLiHobKysryOZ2VlqWHDhhXOj4+Pr3BuSUmJjh07pgYNGhjXUAAAEJAsDToJCQmKiorSli1bPMfy8vK0c+dOdezYscL5HTt2VEZGhvbu3es5tnXrVknS1VdfbXyDAQBAQLF06CosLEwpKSmaNWuW4uLi1LhxY82cOVPx8fHq06ePysrKlJ2drTp16ig8PFzt2rXTVVddpXHjxmnKlCkqLCzU448/rn79+lXaAwQAAC5slq/FTU1NVf/+/TV58mQNGDBAdrtdK1asUGhoqA4dOqTk5GRt2rRJUvnyv0WLFqlJkya69957NXbsWHXv3p0NAwEAQKVsLjYMUFmZU9nZBX5/XCa3GaCgQPWbl8/tOrznkGd5ObU2D7U2D7U2D7U2j79qHRcXWaVVV5b36AAAABiFoAMAAIIWQQcAAAQtgg4AAAhaBB0AABC0CDoAACBoEXQAAEDQIugAAICgxYaBklwul5xOY8pgt4ec12Xo8TNOp+z7fpIklTVtJoWczurU2jzU2jzU2jzU2jz+qHVIiE02m+0XzyPoAACAoMXQFQAACFoEHQAAELQIOgAAIGgRdAAAQNAi6AAAgKBF0AEAAEGLoAMAAIIWQQcAAAQtgg4AAAhaBB0AABC0CDoAACBoEXQAAEDQIugAAICgRdA5D06nUwsWLFC3bt2UmJio4cOHa9++fWc9PycnRxMmTFDHjh2VlJSkJ598UkVFRSa2OHD5Wuvdu3drxIgR6tSpk7p06aLU1FQdPHjQxBYHLl9rfaY33nhDLVu21P79+w1uZXDwtdYnT57U7NmzPeenpKTo66+/NrHFgcvXWh89elQTJkxQ586d1alTJ40bN06ZmZkmtjg4LF26VAMHDjznOUZ/NhJ0zkN6errWrFmjadOmae3atXI6nRo2bJhKSkoqPT81NVV79+7V6tWrNX/+fG3evFlTpkwxt9EBypda5+TkaMiQIQoPD9dLL72k5cuXKzs7W8OGDVNxcbEFrQ8svr6v3Q4cOKCpU6ea1Mrg4Gutp0yZovXr1+vpp5/Wa6+9pri4OA0fPlzHjx83ueWBx9dajx07VgcPHtSqVau0atUqHTx4UA888IDJrQ5sr7zyiubNm/eL5xn+2ehCtRQXF7vat2/veuWVVzzHcnNzXW3btnW9+eabFc7/97//7WrRooXru+++8xz78MMPXS1btnRlZGSY0uZA5Wut//KXv7jat2/vKioq8hw7ePCgq0WLFq5PPvnElDYHKl9r7VZWVuYaMGCAa9CgQa4WLVq49u3bZ0ZzA5qvtf7pp59cLVu2dL333nte5/fq1Yv39S/wtda5ubmuFi1auP71r395jr3zzjuuFi1auHJycsxockDLyMhwjRw50pWYmOi64YYbXCkpKWc914zPRnp0qmnXrl0qKChQly5dPMeio6PVqlUrbdu2rcL527dvV/369XXppZd6jiUlJclms2nHjh2mtDlQ+VrrLl26KD09XeHh4Z5jISHlb/W8vDzjGxzAfK2125IlS3Ty5EmNHDnSjGYGBV9r/fHHH6tOnTrq3r271/nvvvuu12OgIl9rHR4ersjISG3YsEH5+fnKz8/X66+/rubNmys6OtrMpgek//3vfwoNDdUbb7yhdu3anfNcMz4bHX55lAtQRkaGJKlRo0Zexxs0aOC57UyZmZkVzg0LC1NMTIwOHTpkXEODgK+1btKkiZo0aeJ1bNmyZQoPD1fHjh2Na2gQ8LXWkvTll19q5cqVWrduHXMYfOBrrffs2aOmTZvqrbfe0rJly5SZmalWrVrpkUce8fqQQEW+1josLEwzZszQ448/rg4dOshms6lBgwZ6+eWXPf9pwtn17t1bvXv3rtK5Znw28opVk3uiVFhYmNfxWrVqVToPpKioqMK55zofp/la65976aWX9PLLL2vixImKi4szpI3BwtdaFxYWauLEiZo4caIuvvhiM5oYNHytdX5+vvbu3av09HSNHz9eixcvlsPh0N13362jR4+a0uZA5WutXS6Xvv76a7Vv316vvPKKXnjhBV100UW6//77lZ+fb0qbLxRmfDYSdKrJPSzy84lsxcXFioiIqPT8yia9FRcXq3bt2sY0Mkj4Wms3l8ulefPmafr06Ro9evQvzvyH77WePn26mjdvrrvuusuU9gUTX2vtcDiUn5+vuXPnKjk5WW3bttXcuXMlSX/729+Mb3AA87XW//d//6eXX35ZM2fO1NVXX62kpCQtWbJEBw4c0Lp160xp84XCjM9Ggk41ubvasrKyvI5nZWWpYcOGFc6Pj4+vcG5JSYmOHTumBg0aGNfQIOBrraXyZbhpaWlasmSJHn30UY0dO9boZgYFX2v92muv6ZNPPlH79u3Vvn17DR8+XJLUt29fLVmyxPgGB7Dq/A5xOBxew1Th4eFq2rQpy/l/ga+13r59u5o3b66oqCjPsbp166p58+bau3evsY29wJjx2UjQqaaEhARFRUVpy5YtnmN5eXnauXNnpfNAOnbsqIyMDK9/JFu3bpUkXX311cY3OID5WmtJevjhh/WPf/xDs2fP1uDBg01qaeDztdZvvfWWNm7cqA0bNmjDhg2aPn26pPI5UfTynFt1foeUlpbqq6++8hw7ceKE9u3bp1//+temtDlQ+Vrr+Ph47d2712vopLCwUPv372eI1s/M+GxkMnI1hYWFKSUlRbNmzVJcXJwaN26smTNnKj4+Xn369FFZWZmys7NVp04dhYeHq127drrqqqs0btw4TZkyRYWFhXr88cfVr1+/s/ZKoJyvtV6/fr02bdqkhx9+WElJSTp8+LDnsdznoHK+1vrnH7DuiZ0XXXSRYmJiLHgGgcPXWnfo0EHXXHONJk2apKlTpyomJkYLFiyQ3W7X7373O6ufTo3ma6379eunFStWaOzYsXrooYckSfPmzVOtWrV02223WfxsApsln41+WaR+gSotLXU999xzrs6dO7sSExNdw4cP9+wfsm/fPleLFi1cr732muf8I0eOuB588EFXYmKiq1OnTq4nnnjCdeLECauaH1B8qfWQIUNcLVq0qPTPma8HKufr+/pMn332Gfvo+MDXWh8/ftz1xBNPuDp16uRq166da8iQIa7du3db1fyA4mutv/vuO9fIkSNdSUlJrs6dO7vGjBnD+7oaJk2a5LWPjhWfjTaXy+XyT2QCAACoWZijAwAAghZBBwAABC2CDgAACFoEHQAAELQIOgAAIGgRdAAAQNAi6AAAgKBF0AFgKLbqAmAlgg4Aw/zrX//SpEmTJElbtmxRy5Ytva43ZLbevXvrkUceOe/HWbhwoVq2bHnOc2rC8wXAta4AGGj16tVWNwHABY4eHQAAELQIOgAMMXDgQG3dulVbt271Gub54YcfdN9996ldu3bq2rWrZs2apdLSUs/tLVu21KJFi3Tbbbepbdu2WrRokSTp4MGDGj9+vJKSktSuXTvde++92rlzp9fP3Lhxo2655Ra1bdtWnTt31sSJE5WZmel1zsmTJ/Xcc8+pa9euSkxM1NChQ7V3716vcz7++GPdfffduvrqq9WpUydNmDBBhw4dOufzXbt2ra6//nq1bdtWKSkpOnjwYLXqBsC/CDoADPHEE0+oVatWatWqlV599VXl5+dLkp555hldffXVWrJkiX77299q+fLlWrt2rdd9lyxZoptvvlkLFizQ9ddfr+zsbN1111363//+p8cee0yzZ8+W0+nUPffco++//16StGPHDj388MPq06ePli9frkcffVSfffaZJkyY4PXYmzZt0u7duzVjxgw98cQT+u9//6tx48Z5bt+wYYOGDh2qRo0aac6cOXr00Uf1+eef684779TRo0crfa4vv/yynnjiCfXo0UPp6elq166dHnvsMX+WE0A1MUcHgCEuu+wyRUVFSZISExM9k3IHDRqk+++/X5LUuXNnvfPOO/rss8+UkpLiuW+HDh00ZMgQz/dz587VsWPH9Oc//1mNGzeWJHXv3l033nij5s+frwULFmjHjh0KDw/XiBEjFBYWJkmKiYnRV199JZfLJZvNJklq2LCh0tPTFRoaKknau3evFi9erPz8fNWuXVuzZs1ScnKyZs+e7fn5V111lW688UatWLFCDz/8sNfzdLlcSk9P14033qg//OEPkqTk5GTl5+dXCHAAzEePDgBTdejQwfO1zWZT48aNlZeX53XOFVdc4fX9p59+qiuuuEINGzZUaWmpSktLFRISou7du+uTTz6RJHXs2FFFRUXq27evZs+ere3btys5OVljxozxhBxJatu2rSfkSFKTJk0kSXl5edqzZ48OHz6svn37ev38Zs2aqX379tq6dWuF5/PDDz/o6NGj6tWrl9fx3/72t76UBYBB6NEBYKqIiAiv70NCQirstVO7dm2v748dO6a9e/eqdevWlT5mUVGR2rdvr2XLlmn16tVatWqVli1bpnr16mnUqFEaOHDgWR87JKT8/3tOp1PHjh2TJNWrV6/Cz6hXr16FOUGSlJubK0mKjY31Ol6/fv1K2wrAXAQdADVenTp1lJSUVGHYyM09VNWtWzd169ZNRUVF+uyzz/Tiiy9q+vTpateundq2bfuLPycmJkaSdOTIkQq3HT58uEKYkU4HnJ/P33GHJgDWYugKgGHcvSXnKykpSXv27FHz5s3Vpk0bz5/XX39d69atk91u17PPPqvbb79dLpdLERER6tWrl2ezwqqugGrevLnq16+vjRs3eh3ft2+f/vOf/+iqq66qcJ+LL75YjRo10j/+8Q+v4++99141ny0AfyLoADBMdHS09uzZo08//bTCPBxfDB48WE6nU4MHD9amTZv06aef6rHHHtNLL72k5s2bSyqf2Py///1PjzzyiD7++GO9//77mj59umJiYtS5c+cq/ZyQkBCNHz9eH330kSZMmKDNmzdrw4YNGjJkiOrWres1QdrNZrNp4sSJeu+99zR58mR99NFHWrRokf785z9X+/kC8B+CDgDD3HPPPQoNDdXw4cN14sSJaj9Ow4YNtXbtWjVu3FhTpkzRqFGj9OWXX+qpp57S4MGDJUk9evTQrFmztHv3bo0ZM0bjx49XRESEXnzxRc+QVFXcdtttWrBggfbs2aMHHnhAM2bMUPv27bVu3bqzzrvp27ev5s6dq//85z8aPXq03nvvPU2dOrXazxeA/9hcXHEPAAAEKXp0AABA0CLoAACAoEXQAQAAQYugAwAAghZBBwAABC2CDgAACFoEHQAAELQIOgAAIGgRdAAAQNAi6AAAgKBF0AEAAEHr/wGtYmJ5hLGwVgAAAABJRU5ErkJggg==", "text/plain": [ "
" ] @@ -182,11 +248,19 @@ "plt.title(f\"Best obtained threshold at {threshold}\")\n", "plt.show()" ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "aa0b487c", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "venv", "language": "python", "name": "python3" }, @@ -200,7 +274,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.16" + "version": "3.8.0" } }, "nbformat": 4, diff --git a/setup.py b/setup.py index a6b7217..c4a2158 100644 --- a/setup.py +++ b/setup.py @@ -54,9 +54,9 @@ 'Sphinx>=3,<3.3', 'sphinx_rtd_theme>=0.2.4,<0.5', 'autodocsumm>=0.1.10', - 'mistune>=0.7,<2', + 'mistune>=0.7,<3.1', 'Jinja2>=2,<3.1', - + # fails on Sphinx < v3.4 'alabaster<=0.7.12', # fails on Sphins < v5.0 @@ -65,7 +65,7 @@ 'sphinxcontrib-htmlhelp<2.0.5', 'sphinxcontrib-serializinghtml<1.1.10', 'sphinxcontrib-qthelp<1.0.7', - + # style check 'flake8>=3.7.7,<4', 'isort>=4.3.4,<5', diff --git a/tests/test_core.py b/tests/test_core.py index f8240cb..1627cf6 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -1,13 +1,7 @@ -import os -import pickle - -from mlblocks import MLBlock import numpy as np import pandas as pd -import pytest - +from mlblocks import MLBlock from zephyr_ml.core import DEFAULT_METRICS, Zephyr -import logging class TestZephyr: @@ -50,8 +44,7 @@ def base_dfs(): 'COD_MAT_DOC': [77889900, 12345690], 'DES_MEDIUM': ['Description of notification 1', 'Description of notification 2'], 'COD_NOTIF': [567890123, 32109877], - 'DAT_MALF_START': [pd.Timestamp('2021-12-25 18:07:10'), - pd.Timestamp('2022-02-28 06:04:00')], + 'DAT_MALF_START': [pd.Timestamp('2021-12-25 18:07:10'), pd.Timestamp('2022-02-28 06:04:00')], 'DAT_MALF_END': [pd.Timestamp('2022-01-08 11:07:17'), pd.Timestamp('2022-03-01 17:00:13')], 'IND_BREAKDOWN_DUR': [14.1378, 2.4792], 'FUNCT_LOC_DES': ['location description 1', 'location description 2'], @@ -61,19 +54,15 @@ def base_dfs(): work_orders_df = pd.DataFrame({ 'COD_ELEMENT': [0, 0], 'COD_ORDER': [12345, 67890], - 'DAT_BASIC_START': [pd.Timestamp('2022-01-01 00:00:00'), - pd.Timestamp('2022-03-01 00:00:00')], - 'DAT_BASIC_END': [pd.Timestamp('2022-01-09 00:00:00'), - pd.Timestamp('2022-03-02 00:00:00')], + 'DAT_BASIC_START': [pd.Timestamp('2022-01-01 00:00:00'), pd.Timestamp('2022-03-01 00:00:00')], + 'DAT_BASIC_END': [pd.Timestamp('2022-01-09 00:00:00'), pd.Timestamp('2022-03-02 00:00:00')], 'COD_EQUIPMENT': [98765, 98765], 'COD_MAINT_PLANT': ['ABC', 'ABC'], 'COD_MAINT_ACT_TYPE': ['XYZ', 'XYZ'], 'COD_CREATED_BY': ['A1234', 'B6789'], 'COD_ORDER_TYPE': ['A', 'B'], - 'DAT_REFERENCE': [pd.Timestamp('2022-01-01 00:00:00'), - pd.Timestamp('2022-03-01 00:00:00')], - 'DAT_CREATED_ON': [pd.Timestamp('2022-03-01 00:00:00'), - pd.Timestamp('2022-04-18 00:00:00')], + 'DAT_REFERENCE': [pd.Timestamp('2022-01-01 00:00:00'), pd.Timestamp('2022-03-01 00:00:00')], + 'DAT_CREATED_ON': [pd.Timestamp('2022-03-01 00:00:00'), pd.Timestamp('2022-04-18 00:00:00')], 'DAT_VALID_END': [pd.NaT, pd.NaT], 'DAT_VALID_START': [pd.NaT, pd.NaT], 'COD_SYSTEM_STAT': ['ABC XYZ', 'LMN OPQ'], @@ -117,7 +106,6 @@ def base_train_test_split(self): 'feature 2': [0] * 150 + [1] * 150, }) y_train = X_train['feature 2'].to_list() - X_test = pd.DataFrame({ 'feature 1': np.random.random((100)), 'feature 2': [0] * 25 + [1] * 50 + [0] * 25, @@ -132,43 +120,40 @@ def setup_class(cls): 'feature 2': [0] * 150 + [1] * 150, }) cls.train_y = cls.train['feature 2'].to_list() - cls.test = pd.DataFrame({ 'feature 1': np.random.random((100)), 'feature 2': [0] * 25 + [1] * 50 + [0] * 25, }) cls.test_y = cls.test['feature 2'].to_list() - cls.random = pd.DataFrame({ 'feature 1': list(range(100)), 'feature 2': np.random.random(100), 'feature 3': np.random.random(100), }) cls.random_y = [1 if x > 0.5 else 0 for x in np.random.random(100)] - cls.kwargs = { - "generate_entityset": {"dfs": TestZephyr.base_dfs(), "es_type": "pidata"}, - "generate_label_times": {"labeling_fn": "brake_pad_presence", "num_samples": 10, "gap": "20d"}, - "generate_feature_matrix_and_labels": {"target_dataframe_name": "turbines", "cutoff_time_in_index": True, "agg_primitives": ["count", "sum", "max"], "verbose": True}, + "generate_entityset": { + "dfs": TestZephyr.base_dfs(), + "es_type": "pidata"}, + "generate_label_times": { + "labeling_fn": "brake_pad_presence", + "num_samples": 10, + "gap": "20d"}, + "generate_feature_matrix": { + "target_dataframe_name": "turbines", + "cutoff_time_in_index": True, + "agg_primitives": [ + "count", + "sum", + "max"], + "verbose": True}, "generate_train_test_split": {}, "fit_pipeline": {}, - "evaluate": {} - } + "evaluate": {}} def test_initialize_class(self): - zephyr = Zephyr() - assert zephyr.entityset is None - assert zephyr.labeling_function is None - assert zephyr.label_times is None - assert zephyr.pipeline is None - assert zephyr.pipeline_hyperparameters is None - assert zephyr.feature_matrix_and_labels is None - assert zephyr.X_train is None - assert zephyr.X_test is None - assert zephyr.y_train is None - assert zephyr.y_test is None - assert zephyr.is_fitted is None - assert zephyr.results is None + _ = Zephyr() + def test_generate_entityset(self): zephyr = Zephyr() @@ -188,15 +173,19 @@ def test_generate_feature_matrix_and_labels(self): zephyr = Zephyr() zephyr.generate_entityset(**self.__class__.kwargs["generate_entityset"]) zephyr.generate_label_times(**self.__class__.kwargs["generate_label_times"]) - zephyr.generate_feature_matrix_and_labels(**self.__class__.kwargs["generate_feature_matrix_and_labels"]) - feature_matrix_and_labels = zephyr.get_feature_matrix_and_labels() - assert feature_matrix_and_labels is not None + zephyr.generate_feature_matrix( + **self.__class__.kwargs["generate_feature_matrix"]) + feature_matrix, label_col_name, features= zephyr.get_feature_matrix() + assert feature_matrix is not None + assert label_col_name in feature_matrix.columns + assert features is not None def test_generate_train_test_split(self): zephyr = Zephyr() zephyr.generate_entityset(**self.__class__.kwargs["generate_entityset"]) zephyr.generate_label_times(**self.__class__.kwargs["generate_label_times"]) - zephyr.generate_feature_matrix_and_labels(**self.__class__.kwargs["generate_feature_matrix_and_labels"]) + zephyr.generate_feature_matrix( + **self.__class__.kwargs["generate_feature_matrix"]) zephyr.generate_train_test_split(**self.__class__.kwargs["generate_train_test_split"]) train_test_split = zephyr.get_train_test_split() assert train_test_split is not None @@ -208,9 +197,6 @@ def test_generate_train_test_split(self): def test_set_train_test_split(self): zephyr = Zephyr() - zephyr.generate_entityset(**self.__class__.kwargs["generate_entityset"]) - zephyr.generate_label_times(**self.__class__.kwargs["generate_label_times"]) - zephyr.generate_feature_matrix_and_labels(**self.__class__.kwargs["generate_feature_matrix_and_labels"]) zephyr.set_train_test_split(*self.base_train_test_split()) train_test_split = zephyr.get_train_test_split() assert train_test_split is not None @@ -222,9 +208,6 @@ def test_set_train_test_split(self): def test_fit_pipeline_no_visual(self): zephyr = Zephyr() - zephyr.generate_entityset(**self.__class__.kwargs["generate_entityset"]) - zephyr.generate_label_times(**self.__class__.kwargs["generate_label_times"]) - zephyr.generate_feature_matrix_and_labels(**self.__class__.kwargs["generate_feature_matrix_and_labels"]) zephyr.set_train_test_split(*self.base_train_test_split()) output = zephyr.fit_pipeline(**self.__class__.kwargs["fit_pipeline"]) assert output is None @@ -233,14 +216,10 @@ def test_fit_pipeline_no_visual(self): def test_fit_pipeline_visual(self): zephyr = Zephyr() - zephyr.generate_entityset(**self.__class__.kwargs["generate_entityset"]) - zephyr.generate_label_times(**self.__class__.kwargs["generate_label_times"]) - zephyr.generate_feature_matrix_and_labels(**self.__class__.kwargs["generate_feature_matrix_and_labels"]) zephyr.set_train_test_split(*self.base_train_test_split()) output = zephyr.fit_pipeline(visual=True, **self.__class__.kwargs["fit_pipeline"]) assert isinstance(output, dict) assert list(output.keys()) == ['threshold', 'scores'] - pipeline = zephyr.get_fitted_pipeline() assert pipeline is not None @@ -250,14 +229,10 @@ def test_predict_no_visual(self): zephyr.fit_pipeline(**self.__class__.kwargs["fit_pipeline"]) predicted = zephyr.predict() _, _, _, test_y = self.base_train_test_split() - print(predicted) assert predicted == test_y def test_predict_visual(self): zephyr = Zephyr() - zephyr.generate_entityset(**self.__class__.kwargs["generate_entityset"]) - zephyr.generate_label_times(**self.__class__.kwargs["generate_label_times"]) - zephyr.generate_feature_matrix_and_labels(**self.__class__.kwargs["generate_feature_matrix_and_labels"]) zephyr.set_train_test_split(*self.base_train_test_split()) zephyr.fit_pipeline(**self.__class__.kwargs["fit_pipeline"]) predicted, output = zephyr.predict(visual=True) @@ -268,9 +243,6 @@ def test_predict_visual(self): def test_evaluate(self): zephyr = Zephyr() - zephyr.generate_entityset(**self.__class__.kwargs["generate_entityset"]) - zephyr.generate_label_times(**self.__class__.kwargs["generate_label_times"]) - zephyr.generate_feature_matrix_and_labels(**self.__class__.kwargs["generate_feature_matrix_and_labels"]) zephyr.set_train_test_split(*self.base_train_test_split()) zephyr.fit_pipeline(**self.__class__.kwargs["fit_pipeline"]) scores = zephyr.evaluate(metrics=[ @@ -279,7 +251,6 @@ def test_evaluate(self): "sklearn.metrics.f1_score", "sklearn.metrics.recall_score" ]) - assert isinstance(scores, dict) assert all(metric in scores for metric in [ "sklearn.metrics.accuracy_score", @@ -291,16 +262,10 @@ def test_evaluate(self): def test_get_entityset_types(self): zephyr = Zephyr() entityset_types = zephyr.GET_ENTITYSET_TYPES() - - # Check that it returns a dictionary assert isinstance(entityset_types, dict) - - # Check that it contains expected keys assert "pidata" in entityset_types assert "scada" in entityset_types assert "vibrations" in entityset_types - - # Check structure of returned data for es_type, info in entityset_types.items(): assert isinstance(info, dict) assert "obj" in info @@ -311,14 +276,8 @@ def test_get_entityset_types(self): def test_get_labeling_functions(self): zephyr = Zephyr() labeling_functions = zephyr.GET_LABELING_FUNCTIONS() - - # Check that it returns a dictionary assert isinstance(labeling_functions, dict) - - # Check that it contains expected labeling functions assert "brake_pad_presence" in labeling_functions - - # Check structure of returned data for func_name, info in labeling_functions.items(): assert isinstance(info, dict) assert "obj" in info @@ -329,17 +288,10 @@ def test_get_labeling_functions(self): def test_get_evaluation_metrics(self): zephyr = Zephyr() evaluation_metrics = zephyr.GET_EVALUATION_METRICS() - - # Check that it returns a dictionary assert isinstance(evaluation_metrics, dict) - - # Check that it contains expected metrics expected_metrics = DEFAULT_METRICS - for metric in expected_metrics: assert metric in evaluation_metrics - - # Check structure of returned data for metric_name, info in evaluation_metrics.items(): assert isinstance(info, dict) assert "obj" in info @@ -347,18 +299,3 @@ def test_get_evaluation_metrics(self): assert isinstance(info["obj"], MLBlock) assert hasattr(info["obj"], "metadata") assert isinstance(info["desc"], str) - - # def test_guide_handler_warnings(self): - # zephyr = Zephyr() - - # # Test skipping steps warning - # with pytest.warns(UserWarning, match="You are skipping the following steps"): - # zephyr.generate_feature_matrix_and_labels(**self.kwargs["generate_feature_matrix_and_labels"]) - - # # Test stale data warning - # with pytest.warns(UserWarning, match="This data is potentially stale"): - # zephyr.get_entityset() - - # # Test inconsistent state warning - # with pytest.warns(UserWarning, match="Unable to perform"): - # zephyr.get_label_times() diff --git a/tests/test_entityset.py b/tests/test_entityset.py index 6d7901f..ab3ec76 100644 --- a/tests/test_entityset.py +++ b/tests/test_entityset.py @@ -118,11 +118,15 @@ def scada_dfs(base_dfs): }) return {**base_dfs, 'scada': scada_df} + def create_pidata_entityset(pidata_dfs): - return _create_entityset(pidata_dfs, es_type = "pidata") + return _create_entityset(pidata_dfs, es_type="pidata") + def create_scada_entityset(scada_dfs): - return _create_entityset(scada_dfs, es_type = "scada") + return _create_entityset(scada_dfs, es_type="scada") + + def test_create_pidata_missing_entities(pidata_dfs): error_msg = 'Missing dataframes for entities notifications.' diff --git a/zephyr_ml/core.py b/zephyr_ml/core.py index 87179c6..92086d5 100644 --- a/zephyr_ml/core.py +++ b/zephyr_ml/core.py @@ -1,11 +1,6 @@ from zephyr_ml.entityset import _create_entityset, VALIDATE_DATA_FUNCTIONS -from zephyr_ml.labeling import get_labeling_functions, get_labeling_functions_map, LABELING_FUNCTIONS -# from zephyr_ml.entityset import _create_entityset, VALIDATE_DATA_FUNCTIONS -from zephyr_ml.labeling import ( - get_labeling_functions, - get_labeling_functions_map, - LABELING_FUNCTIONS, -) +from zephyr_ml.labeling import get_labeling_functions +from zephyr_ml.labeling import get_labeling_functions_map from zephyr_ml.feature_engineering import process_signals import composeml as cp from inspect import getfullargspec @@ -15,12 +10,11 @@ from sklearn.model_selection import train_test_split import os import json -from mlblocks import MLPipeline, MLBlock, get_primitives_paths, add_primitives_path -from itertools import chain +from mlblocks import MLPipeline, MLBlock +from functools import wraps import logging -import matplotlib.pyplot as plt from functools import wraps -import inspect +import copy DEFAULT_METRICS = [ "sklearn.metrics.accuracy_score", "sklearn.metrics.precision_score", @@ -43,7 +37,7 @@ def __init__(self, producers_and_getters, set_methods): self.producer_to_step_map = {} self.getter_to_step_map = {} - + self.terms = [] self.skipped = [] for idx, (producers, getters) in enumerate(self.producers_and_getters): @@ -64,13 +58,13 @@ def get_necessary_steps(self, actual_next_step): option_strs.append(opt.__name__) step_strs.append(f"{step}. {' or '.join(option_strs)}") return "\n".join(step_strs) - + def get_get_steps_in_between(self, cur_step, next_step): step_strs = [] for step in range(cur_step + 1, next_step): step_strs.append(f"{step} {self.producers_and_getters[step][1][0]}") return step_strs - + def get_last_up_to_date(self, next_step): latest_up_to_date = 0 for step in range(next_step): @@ -78,53 +72,51 @@ def get_last_up_to_date(self, next_step): latest_up_to_date = step return latest_up_to_date - def join_steps(self, step_strs): return "\n".join(step_strs) - + def get_steps_in_between(self, cur_step, next_step): step_strs = [] - for step in range(cur_step+1, next_step): + for step in range(cur_step + 1, next_step): option_strs = [] for opt in self.producers_and_getters[step][0]: option_strs.append(opt.__name__) step_strs.append(f"{step}. {' or '.join(option_strs)}") return step_strs - + def perform_producer_step(self, zephyr, method, *method_args, **method_kwargs): step_num = self.producer_to_step_map[method.__name__] res = method(zephyr, *method_args, **method_kwargs) self.current_step = step_num self.terms[step_num] = self.cur_term return res - - + def try_log_skipping_steps_warning(self, name, next_step): steps_skipped = self.get_steps_in_between(self.current_step, next_step) if len(steps_skipped) > 0: for step in range(self.current_step + 1, next_step): self.skipped[step] = True necc_steps = self.join_steps(steps_skipped) - LOGGER.warning(f"Performing {name}. You are skipping the following steps:\n{necc_steps}") - + LOGGER.warning( + f"Performing {name}. You are skipping the following steps:\n{necc_steps}") def try_log_using_stale_warning(self, name, next_step): latest_up_to_date = self.get_last_up_to_date(next_step) - steps_needed = self.get_steps_in_between(latest_up_to_date-1, next_step) - if len(steps_needed) >0: + steps_needed = self.get_steps_in_between(latest_up_to_date - 1, next_step) + if len(steps_needed) > 0: necc_steps = self.join_steps(steps_needed) LOGGER.warning(f"Performing {name}. You are in a stale state and \ using potentially stale data to perform this step. \ Re-run the following steps to return to a present state:\n: \ {steps_needed}") - def try_log_making_stale_warning(self, name, next_step): next_next_step = next_step + 1 prod_steps = f"{next_next_step}. {' or '.join(self.producers_and_getters[next_next_step][0])}" # add later set methods - get_steps = self.join_steps(self.get_get_steps_in_between(next_step, self.current_step + 1)) - + get_steps = self.join_steps( + self.get_get_steps_in_between( + next_step, self.current_step + 1)) LOGGER.warning(f"Performing {name}. You are beginning a new iteration. Any data returned \ by the following get methods will be considered stale:\n{get_steps}. To continue with this iteration, please perform:\n{prod_steps}") @@ -132,25 +124,25 @@ def try_log_making_stale_warning(self, name, next_step): # stale must be before b/c user must have regressed with progress that contains skips # return set method, and next possible up to date key method def try_log_inconsistent_warning(self, name, next_step): - set_method_str= f"{self.producers_and_getters[next_step][0][1].__name__}" + set_method_str = f"{self.producers_and_getters[next_step][0][1].__name__}" latest_up_to_date = self.get_last_up_to_date(next_step) LOGGER.warning(f"Unable to perform {name} because some steps have been skipped. \ You can call the corresponding set method: {set_method_str} or re run steps \ starting at or before {latest_up_to_date}") - + def log_get_inconsistent_warning(self, name, next_step): prod_steps = f"{next_step}. {' or '.join(self.producers_and_getters[next_step][0])}" latest_up_to_date = self.get_last_up_to_date(next_step) - LOGGER.warning(f"Unable to perform {name} because {prod_steps} has not been run yet. Run steps starting at or before {latest_up_to_date} ") - + LOGGER.warning( + f"Unable to perform {name} because {prod_steps} has not been run yet. Run steps starting at or before {latest_up_to_date} ") def log_get_stale_warning(self, name, next_step): latest_up_to_date = self.get_last_up_to_date(next_step) LOGGER.warning(f"Performing {name}. This data is potentially stale. \ Re-run steps starting at or before {latest_up_to_date} to ensure data is up to date.") - - # tries to perform step if possible -> warns that data might be stale + # tries to perform step if possible -> warns that data might be stale + def try_perform_forward_producer_step(self, zephyr, method, *method_args, **method_kwargs): name = method.__name__ next_step = self.producer_to_step_map[name] @@ -159,52 +151,49 @@ def try_perform_forward_producer_step(self, zephyr, method, *method_args, **meth # next_step == 0, set method (already warned), or previous step is up to term res = self.perform_producer_step(zephyr, method, *method_args, **method_kwargs) return res - # next_step == 0, set method, or previous step is up to term + def try_perform_backward_producer_step(self, zephyr, method, *method_args, **method_kwargs): name = method.__name__ next_step = self.producer_to_step_map[name] self.try_log_making_stale_warning(next_step) - self.cur_term +=1 + self.cur_term += 1 for i in range(0, next_step): if self.terms[i] != -1: self.terms[i] = self.cur_term res = self.perform_producer_step(zephyr, method, *method_args, **method_kwargs) return res - def try_perform_producer_step(self, zephyr, method, *method_args, **method_kwargs): name = method.__name__ next_step = self.producer_to_step_map[name] if next_step >= self.current_step: - res = self.try_perform_forward_producer_step(zephyr, method, *method_args, **method_kwargs) + res = self.try_perform_forward_producer_step( + zephyr, method, *method_args, **method_kwargs) return res else: - res = self.try_perform_backward_producer_step(zephyr, method, *method_args, **method_kwargs) + res = self.try_perform_backward_producer_step( + zephyr, method, *method_args, **method_kwargs) return res - # dont update current step or terms - def try_perform_stale_or_inconsistent_producer_step(self, zephyr, method, *method_args, **method_kwargs): + + def try_perform_stale_or_inconsistent_producer_step( + self, zephyr, method, *method_args, **method_kwargs): name = method.__name__ next_step = self.producer_to_step_map[name] - if self.terms[next_step-1] == -1: #inconsistent + if self.terms[next_step - 1] == -1: # inconsistent self.try_log_inconsistent_warning(name, next_step) else: # need to include a case where performing using stale data that was skipped in current iteration # overwrite current iteration's ? # no not possible b/c if there is a current iteration after this step, it must have updated this step's iteration - # + # self.try_log_using_stale_warning(name, next_step) res = self.perform_producer_step(zephyr, method, *method_args, **method_kwargs) return res - - - - - def try_perform_getter_step(self, zephyr, method, *method_args, **method_kwargs): name = method.__name__ # either inconsistent, stale, or up to date @@ -220,21 +209,17 @@ def try_perform_getter_step(self, zephyr, method, *method_args, **method_kwargs) res = method(zephyr, *method_args, **method_kwargs) return res - - - - - def guide_step(self, zephyr, method, *method_args, **method_kwargs): method_name = method.__name__ if method_name in self.producer_to_step_map: - #up-todate + # up-todate next_step = self.producer_to_step_map[method_name] - if method_name in self.set_methods or next_step == 0 or self.terms[next_step-1] == self.cur_term: + if method_name in self.set_methods or next_step == 0 or self.terms[next_step - 1] == self.cur_term: res = self.try_perform_producer_step(zephyr, method, *method_args, **method_kwargs) return res - else: #stale or inconsistent - res = self.try_perform_stale_or_inconsistent_producer_step(zephyr, method, *method_args, **method_kwargs) + else: # stale or inconsistent + res = self.try_perform_stale_or_inconsistent_producer_step( + zephyr, method, *method_args, **method_kwargs) return res elif method_name in self.getter_to_step_map: res = self.try_perform_getter_step(zephyr, method, *method_args, **method_kwargs) @@ -243,17 +228,6 @@ def guide_step(self, zephyr, method, *method_args, **method_kwargs): print(f"Method {method_name} does not need to be wrapped") - - - - - - - - - - - def guide(method): @wraps(method) @@ -262,38 +236,45 @@ def guided_step(self, *method_args, **method_kwargs): return guided_step + class Zephyr: def __init__(self): - self.entityset = None - self.labeling_function = None - self.label_times = None - self.pipeline = None - self.pipeline_hyperparameters = None - self.feature_matrix_and_labels = None - self.X_train = None - self.X_test = None - self.y_train = None - self.y_test = None - self.is_fitted = None - self.results = None + self._entityset = None + self._label_times = None + self._label_times_meta = None + + self._label_col_name = "label" + self._feature_matrix = None + + self._pipeline = None + + self._X_train = None + self._X_test = None + self._y_train = None + self._y_test = None - # tuple of 2 arrays: producers and attributes step_order = [ - ([self.generate_entityset, self.set_entityset], [self.get_entityset]), - ([self.generate_label_times, self.set_label_times], [self.get_label_times]), - ([self.generate_feature_matrix_and_labels, self.set_feature_matrix_and_labels], [self.get_feature_matrix_and_labels]), - ([self.generate_train_test_split, self.set_train_test_split], [self.get_train_test_split]), - ([self.fit_pipeline, self.set_fitted_pipeline], [self.get_fitted_pipeline]), - ([self.predict, self.evaluate], []) - ] - set_methods = set([self.set_entityset.__name__, self.set_label_times.__name__, self.set_feature_matrix_and_labels.__name__, self.set_train_test_split.__name__, self.set_fitted_pipeline.__name__]) - self.guide_handler = GuideHandler(step_order, set_methods) - - - + ([ + self.generate_entityset, self.set_entityset], [ + self.get_entityset]), ([ + self.generate_label_times, self.set_label_times], [ + self.get_label_times]), ([ + self.generate_feature_matrix, self.set_feature_matrix], [ + self.get_feature_matrix]), ([ + self.generate_train_test_split, self.set_train_test_split], [ + self.get_train_test_split]), ([ + self.fit_pipeline, self.set_fitted_pipeline], [ + self.get_fitted_pipeline]), ([ + self.predict, self.evaluate], [])] + set_methods = set([self.set_entityset.__name__, + self.set_label_times.__name__, + self.set_feature_matrix.__name__, + self.set_train_test_split.__name__, + self.set_fitted_pipeline.__name__]) + self.guide_handler = GuideHandler(step_order, set_methods) def GET_ENTITYSET_TYPES(self): """ @@ -304,21 +285,30 @@ def GET_ENTITYSET_TYPES(self): info_map[es_type] = {"obj": es_type, "desc": " ".join((val_fn.__doc__.split()))} return info_map - + def GET_LABELING_FUNCTIONS(self): return get_labeling_functions() - + def GET_EVALUATION_METRICS(self): info_map = {} for metric in DEFAULT_METRICS: primitive = self._get_ml_primitive(metric) - info_map[metric] = {"obj": primitive, "desc": primitive.metadata["description"] } + info_map[metric] = {"obj": primitive, "desc": primitive.metadata["description"]} return info_map @guide - def generate_entityset(self, dfs, es_type, custom_kwargs_mapping=None, - signal_dataframe_name = None, signal_column = None, signal_transformations = None, - signal_aggregations = None, signal_window_size = None, signal_replace_dataframe = False, **sigpro_kwargs): + def generate_entityset( + self, + dfs, + es_type, + custom_kwargs_mapping=None, + signal_dataframe_name=None, + signal_column=None, + signal_transformations=None, + signal_aggregations=None, + signal_window_size=None, + signal_replace_dataframe=False, + **sigpro_kwargs): """ Generate an entityset @@ -334,73 +324,60 @@ def generate_entityset(self, dfs, es_type, custom_kwargs_mapping=None, """ entityset = _create_entityset(dfs, es_type, custom_kwargs_mapping) - #perform signal processing - if signal_dataframe_name is not None and signal_column is not None: + # perform signal processing + if signal_dataframe_name is not None and signal_column is not None: if signal_transformations is None: signal_transformations = [] if signal_aggregations is None: signal_aggregations = [] - process_signals(entityset, signal_dataframe_name, signal_column, signal_transformations, - signal_aggregations, signal_window_size, signal_replace_dataframe, **sigpro_kwargs) - - self.entityset = entityset - return self.entityset + process_signals( + entityset, + signal_dataframe_name, + signal_column, + signal_transformations, + signal_aggregations, + signal_window_size, + signal_replace_dataframe, + **sigpro_kwargs) + + self._entityset = entityset + return self._entityset @guide - def set_entityset(self, entityset=None, es_type=None, entityset_path = None, custom_kwargs_mapping=None): + def set_entityset( + self, + entityset=None, + es_type=None, + entityset_path=None, + custom_kwargs_mapping=None): if entityset_path is not None: entityset = ft.read_entityset(entityset_path) if entityset is None: - raise ValueError("No entityset passed in. Please pass in an entityset object via the entityest parameter or an entityset path via the entityset_path parameter.") - + raise ValueError( + "No entityset passed in. Please pass in an entityset object via the entityest parameter or an entityset path via the entityset_path parameter.") + dfs = entityset.dataframe_dict validate_func = VALIDATE_DATA_FUNCTIONS[es_type] validate_func(dfs, custom_kwargs_mapping) - self.entityset = entityset + self._entityset = entityset @guide def get_entityset(self): - if self.entityset is None: + if self._entityset is None: raise ValueError("No entityset has been created or set in this instance.") - return self.entityset - - - - - # @guide - # def set_labeling_function(self, name=None, func=None): - # if name is not None: - # labeling_fn_map = get_labeling_functions_map() - # if name in labeling_fn_map: - # self.labeling_function = labeling_fn_map[name] - # return - # else: - # raise ValueError( - # f"Unrecognized name argument:{name}. Call get_predefined_labeling_functions to view predefined labeling functions" - # ) - # elif func is not None: - # if callable(func): - # self.labeling_function = func - # return - # else: - # raise ValueError(f"Custom function is not callable") - # raise ValueError("No labeling function given.") - - # @guide - # def get_labeling_function(self): - # return self.labeling_function - + return self._entityset + @guide def generate_label_times( self, labeling_fn, num_samples=-1, subset=None, column_map={}, verbose=False, **kwargs ): - assert self.entityset is not None, "entityset has not been set" - - if isinstance(labeling_fn, str): # get predefined labeling function + assert self._entityset is not None, "entityset has not been set" + + if isinstance(labeling_fn, str): # get predefined labeling function labeling_fn_map = get_labeling_functions_map() if labeling_fn in labeling_fn_map: labeling_fn = labeling_fn_map[labeling_fn] @@ -409,11 +386,9 @@ def generate_label_times( f"Unrecognized name argument:{labeling_fn}. Call get_predefined_labeling_functions to view predefined labeling functions" ) - assert callable(labeling_fn), "Labeling function is not callable" - - labeling_function, df, meta = labeling_fn(self.entityset, column_map) + labeling_function, df, meta = labeling_fn(self._entityset, column_map) data = df if isinstance(subset, float) or isinstance(subset, int): @@ -442,69 +417,122 @@ def generate_label_times( if thresh is not None: label_times = label_times.threshold(thresh) - self.label_times = label_times + self._label_times = label_times + self._label_col_name = "label" + self._label_times_meta = meta return label_times, meta - + @guide - def set_label_times(self, label_times): - assert(isinstance(label_times, cp.LabelTimes)) - self.label_times = label_times + def set_label_times(self, label_times, label_col_name, meta = None): + assert (isinstance(label_times, cp.LabelTimes)) + self._label_times = label_times + self._label_col_name = label_col_name + self._label_times_meta = meta @guide - def get_label_times(self, visualize = True): + def get_label_times(self, visualize=True): if visualize: - cp.label_times.plots.LabelPlots(self.label_times).distribution() - return self.label_times + cp.label_times.plots.LabelPlots(self._label_times).distribution() + return self._label_times, self._label_times_meta @guide - def generate_feature_matrix_and_labels(self, target_dataframe_name = None, instance_ids = None, - agg_primitives = None, trans_primitives = None, groupby_trans_primitives = None, - allowed_paths = None, max_depth = 2, ignore_dataframes = None, ignore_columns=None, - primitive_options=None, seed_features=None, - drop_contains=None, drop_exact=None, where_primitives=None, max_features=-1, - cutoff_time_in_index=False, save_progress=None, features_only=False, training_window=None, - approximate=None, chunk_size=None, n_jobs=1, dask_kwargs=None, verbose=False, return_types=None, - progress_callback=None, include_cutoff_time=True, - - signal_dataframe_name = None, signal_column = None, signal_transformations = None, - signal_aggregations = None, signal_window_size = None, signal_replace_dataframe = False, **sigpro_kwargs): + def generate_feature_matrix( + self, + target_dataframe_name=None, + instance_ids=None, + agg_primitives=None, + trans_primitives=None, + groupby_trans_primitives=None, + allowed_paths=None, + max_depth=2, + ignore_dataframes=None, + ignore_columns=None, + primitive_options=None, + seed_features=None, + drop_contains=None, + drop_exact=None, + where_primitives=None, + max_features=-1, + cutoff_time_in_index=False, + save_progress=None, + features_only=False, + training_window=None, + approximate=None, + chunk_size=None, + n_jobs=1, + dask_kwargs=None, + verbose=False, + return_types=None, + progress_callback=None, + include_cutoff_time=True, + + add_interesting_values = False, + max_interesting_values = 5, + interesting_dataframe_name = None, + interesting_values = None, + + signal_dataframe_name=None, + signal_column=None, + signal_transformations=None, + signal_aggregations=None, + signal_window_size=None, + signal_replace_dataframe=False, + **sigpro_kwargs): + + entityset_copy = copy.deepcopy(self._entityset) # perform signal processing - if signal_dataframe_name is not None and signal_column is not None: + if signal_dataframe_name is not None and signal_column is not None: + # first make copy of entityset if signal_transformations is None: signal_transformations = [] if signal_aggregations is None: signal_aggregations = [] - process_signals(self.entityset, signal_dataframe_name, signal_column, signal_transformations, - signal_aggregations, signal_window_size, signal_replace_dataframe, **sigpro_kwargs) + process_signals( + entityset_copy, + signal_dataframe_name, + signal_column, + signal_transformations, + signal_aggregations, + signal_window_size, + signal_replace_dataframe, + **sigpro_kwargs) + # add interesting values for where primitives + if add_interesting_values: + entityset_copy.add_interesting_values(max_values = max_interesting_values, verbose = verbose,dataframe_name = interesting_dataframe_name, values = interesting_values) + + feature_matrix, features = ft.dfs( - entityset=self.entityset, cutoff_time=self.label_times, - target_dataframe_name = target_dataframe_name, instance_ids =instance_ids, - agg_primitives = agg_primitives, trans_primitives = trans_primitives, groupby_trans_primitives = groupby_trans_primitives, - allowed_paths = allowed_paths, max_depth = max_depth, ignore_dataframes = ignore_dataframes, ignore_columns=ignore_columns, - primitive_options=primitive_options, seed_features=seed_features, - drop_contains=drop_contains, drop_exact=drop_exact, where_primitives=where_primitives, max_features=max_features, - cutoff_time_in_index=cutoff_time_in_index, save_progress=save_progress, features_only=features_only, training_window=training_window, - approximate=approximate, chunk_size=chunk_size, n_jobs=n_jobs, dask_kwargs=dask_kwargs, verbose=verbose, return_types=return_types, - progress_callback=progress_callback, include_cutoff_time=include_cutoff_time, + entityset=entityset_copy, cutoff_time=self._label_times, + target_dataframe_name=target_dataframe_name, instance_ids=instance_ids, + agg_primitives=agg_primitives, trans_primitives=trans_primitives, groupby_trans_primitives=groupby_trans_primitives, + allowed_paths=allowed_paths, max_depth=max_depth, ignore_dataframes=ignore_dataframes, ignore_columns=ignore_columns, + primitive_options=primitive_options, seed_features=seed_features, + drop_contains=drop_contains, drop_exact=drop_exact, where_primitives=where_primitives, max_features=max_features, + cutoff_time_in_index=cutoff_time_in_index, save_progress=save_progress, features_only=features_only, training_window=training_window, + approximate=approximate, chunk_size=chunk_size, n_jobs=n_jobs, dask_kwargs=dask_kwargs, verbose=verbose, return_types=return_types, + progress_callback=progress_callback, include_cutoff_time=include_cutoff_time, ) - self.feature_matrix_and_labels = self._clean_feature_matrix(feature_matrix) - self.features = features - return self.feature_matrix_and_labels, features + self._feature_matrix = self._clean_feature_matrix(feature_matrix, label_col_name=self._label_col_name) + self._features = features + + return self._feature_matrix, self._features, entityset_copy @guide - def get_feature_matrix_and_labels(self): - return self.feature_matrix_and_labels - + def get_feature_matrix(self): + return self._feature_matrix, self._label_col_name, self._features @guide - def set_feature_matrix_and_labels(self, feature_matrix, label_col_name="label"): - assert label_col_name in feature_matrix.columns - self.feature_matrix_and_labels = self._clean_feature_matrix( + def set_feature_matrix(self, feature_matrix, labels = None,label_col_name="label"): + assert isinstance(feature_matrix, pd.DataFrame) and (labels is not None or label_col_name in feature_matrix.columns ) + if labels is not None: + feature_matrix[label_col_name] = labels + self._feature_matrix = self._clean_feature_matrix( feature_matrix, label_col_name=label_col_name ) + self._label_col_name = label_col_name @guide def generate_train_test_split( @@ -515,14 +543,15 @@ def generate_train_test_split( shuffle=True, stratify=False, ): - feature_matrix, labels = self.feature_matrix_and_labels - + feature_matrix = self._feature_matrix.copy() + labels = feature_matrix.pop(self._label_col_name) + if not isinstance(stratify, list): if stratify: stratify = labels else: stratify = None - + X_train, X_test, y_train, y_test = train_test_split( feature_matrix, labels, @@ -532,91 +561,89 @@ def generate_train_test_split( shuffle=shuffle, stratify=stratify, ) - self.X_train = X_train - self.X_test = X_test - self.y_train = y_train - self.y_test = y_test - return + self._X_train = X_train + self._X_test = X_test + self._y_train = y_train + self._y_test = y_test + + return X_train, X_test, y_train, y_test @guide def set_train_test_split(self, X_train, X_test, y_train, y_test): - self.X_train = X_train - self.X_test = X_test - self.y_train = y_train - self.y_test = y_test + self._X_train = X_train + self._X_test = X_test + self._y_train = y_train + self._y_test = y_test @guide def get_train_test_split(self): - if self.X_train is None or self.X_test is None or self.y_train is None or self.y_test is None: + if self._X_train is None or self._X_test is None or self._y_train is None or self._y_test is None: return None - return self.X_train, self.X_test, self.y_train, self.y_test + return self._X_train, self._X_test, self._y_train, self._y_test - - @guide def set_fitted_pipeline(self, pipeline): - self.pipeline = pipeline - + self._pipeline = pipeline + @guide def fit_pipeline( - self, pipeline = "xgb_classifier", pipeline_hyperparameters=None, X=None, y=None, visual=False, **kwargs + self, pipeline="xgb_classifier", pipeline_hyperparameters=None, X=None, y=None, visual=False, **kwargs ): # kwargs indicate the parameters of the current pipeline - self.pipeline = self._get_mlpipeline(pipeline, pipeline_hyperparameters) + self._pipeline = self._get_mlpipeline(pipeline, pipeline_hyperparameters) if X is None: - X = self.X_train + X = self._X_train if y is None: - y = self.y_train - + y = self._y_train if visual: outputs_spec, visual_names = self._get_outputs_spec(False) else: outputs_spec = None - outputs = self.pipeline.fit(X, y, output_=outputs_spec, **kwargs) + outputs = self._pipeline.fit(X, y, output_=outputs_spec, **kwargs) if visual and outputs is not None: return dict(zip(visual_names, outputs)) - + @guide def get_fitted_pipeline(self): - return self.pipeline + return self._pipeline - @guide - def get_pipeline_hyperparameters(self): - return self.pipeline_hyperparameters @guide def predict(self, X=None, visual=False, **kwargs): if X is None: - X = self.X_test + X = self._X_test if visual: outputs_spec, visual_names = self._get_outputs_spec() else: outputs_spec = "default" - - outputs = self.pipeline.predict(X, output_=outputs_spec, **kwargs) - print(outputs) + outputs = self._pipeline.predict(X, output_=outputs_spec, **kwargs) if visual and visual_names: prediction = outputs[0] - return prediction, dict(zip(visual_names, outputs[-len(visual_names) :])) + return prediction, dict(zip(visual_names, outputs[-len(visual_names):])) return outputs - - - @guide - def evaluate(self, X=None, y=None,metrics=None, global_args = None, local_args = None, global_mapping = None, local_mapping = None): + def evaluate( + self, + X=None, + y=None, + metrics=None, + global_args=None, + local_args=None, + global_mapping=None, + local_mapping=None): if X is None: - X = self.X_test + X = self._X_test if y is None: - y = self.y_test + y = self._y_test - final_context = self.pipeline.predict(X, output_=-1) + final_context = self._pipeline.predict(X, output_=-1) # remap items, if any if global_mapping is not None: @@ -625,25 +652,23 @@ def evaluate(self, X=None, y=None,metrics=None, global_args = None, local_args = cur_item = final_context.pop(cur) final_context[new] = cur_item - if metrics is None: metrics = DEFAULT_METRICS if global_args is None: global_args = {} - + if local_args is None: local_args = {} if local_mapping is None: local_mapping = {} - results = {} for metric in metrics: try: metric_primitive = self._get_ml_primitive(metric) - + if metric in local_mapping: metric_context = {} metric_mapping = local_mapping[metric] @@ -653,20 +678,20 @@ def evaluate(self, X=None, y=None,metrics=None, global_args = None, local_args = else: metric_context = final_context - if metric in local_args: metric_args = local_args[metric] else: metric_args = {} - res = metric_primitive.produce(y_true = self.y_test, **metric_context, **metric_args) + res = metric_primitive.produce(y_true=self._y_test, **metric_context, **metric_args) results[metric_primitive.name] = res except Exception as e: - LOGGER.error(f"Unable to run evaluation metric: {metric_primitive.name}", exc_info = e) - self.results = results + LOGGER.error( + f"Unable to run evaluation metric: {metric_primitive.name}", + exc_info=e) + self._results = results return results - def _clean_feature_matrix(self, feature_matrix, label_col_name="label"): labels = feature_matrix.pop(label_col_name) @@ -678,7 +703,9 @@ def _clean_feature_matrix(self, feature_matrix, label_col_name="label"): string_cols = feature_matrix.select_dtypes(include="category").columns feature_matrix = pd.get_dummies(feature_matrix, columns=string_cols) - return feature_matrix, labels + feature_matrix[label_col_name] = labels + + return feature_matrix def _get_mlpipeline(self, pipeline, hyperparameters=None): if isinstance(pipeline, str) and os.path.isfile(pipeline): @@ -705,7 +732,7 @@ def _get_outputs_spec(self, default=True): outputs_spec = ["default"] if default else [] try: - visual_names = self.pipeline.get_output_names("visual") + visual_names = self._pipeline.get_output_names("visual") outputs_spec.append("visual") except ValueError: visual_names = [] @@ -713,8 +740,6 @@ def _get_outputs_spec(self, default=True): return outputs_spec, visual_names - - if __name__ == "__main__": obj = Zephyr() print(obj.GET_EVALUATION_METRICS()) @@ -866,13 +891,12 @@ def _get_outputs_spec(self, default=True): # ) # obj.set_entityset(entityset_path = "/Users/raymondpan/zephyr/Zephyr-repo/brake_pad_es", es_type = 'scada') - + # obj.set_labeling_function(name="brake_pad_presence") # obj.generate_label_times(labeling_fn="brake_pad_presence", num_samples=10, gap="20d") # # print(obj.get_label_times()) - # obj.generate_feature_matrix_and_labels( # target_dataframe_name="turbines", # cutoff_time_in_index=True, @@ -888,5 +912,4 @@ def _get_outputs_spec(self, default=True): # ) # obj.set_and_fit_pipeline() - # obj.evaluate() diff --git a/zephyr_ml/core_prev.py b/zephyr_ml/core_prev.py index 124b70c..ca16567 100644 --- a/zephyr_ml/core_prev.py +++ b/zephyr_ml/core_prev.py @@ -155,7 +155,7 @@ def predict(self, X: pd.DataFrame, visual: bool = False, **kwargs) -> pd.Series: if visual and visual_names: prediction = outputs[0] - return prediction, dict(zip(visual_names, outputs[-len(visual_names) :])) + return prediction, dict(zip(visual_names, outputs[-len(visual_names):])) return outputs diff --git a/zephyr_ml/entityset.py b/zephyr_ml/entityset.py index 620abb5..5d189d9 100644 --- a/zephyr_ml/entityset.py +++ b/zephyr_ml/entityset.py @@ -2,7 +2,7 @@ import featuretools as ft -from zephyr_ml.metadata import get_mapped_kwargs, get_es_types +from zephyr_ml.metadata import get_mapped_kwargs # def _create_entityset(entities, es_type, es_kwargs): @@ -109,7 +109,6 @@ def _validate_data(dfs, es_type, es_kwargs): if not isinstance(es_type, list): es_type = [es_type] - entities = set( chain( [ @@ -209,7 +208,7 @@ def _validate_data(dfs, es_type, es_kwargs): def validate_scada_data(dfs, new_kwargs_mapping=None): """ - SCADA data is signal data from the Original Equipment Manufacturer Supervisory Control + SCADA data is signal data from the Original Equipment Manufacturer Supervisory Control And Data Acquisition (OEM-SCADA) system, a signal data source. """ entity_kwargs = get_mapped_kwargs("scada", new_kwargs_mapping) @@ -257,7 +256,7 @@ def validate_vibrations_data(dfs, new_kwargs_mapping=None): def _create_entityset(entities, es_type, new_kwargs_mapping=None): - + validate_func = VALIDATE_DATA_FUNCTIONS[es_type] es_kwargs = validate_func(entities, new_kwargs_mapping) diff --git a/zephyr_ml/feature_engineering.py b/zephyr_ml/feature_engineering.py index 7683200..34c35d8 100644 --- a/zephyr_ml/feature_engineering.py +++ b/zephyr_ml/feature_engineering.py @@ -2,7 +2,7 @@ def process_signals(es, signal_dataframe_name, signal_column, transformations, aggregations, - window_size = None, replace_dataframe=False, **kwargs): + window_size=None, replace_dataframe=False, **kwargs): ''' Process signals using SigPro. @@ -54,6 +54,7 @@ def process_signals(es, signal_dataframe_name, signal_column, transformations, a signal_dataframe_name, time_index=time_index, index='_index') + else: df_name = '{}_processed'.format(signal_df.ww.name) diff --git a/zephyr_ml/labeling/__init__.py b/zephyr_ml/labeling/__init__.py index ec58696..72b2014 100644 --- a/zephyr_ml/labeling/__init__.py +++ b/zephyr_ml/labeling/__init__.py @@ -41,8 +41,6 @@ def get_labeling_functions_map(): return functions - - def get_helper_functions(): functions = {} for function in UTIL_FUNCTIONS: diff --git a/zephyr_ml/primitives/jsons/zephyr_ml.primitives.postprocessing.confusion_matrix.json b/zephyr_ml/primitives/jsons/zephyr_ml.primitives.postprocessing.confusion_matrix.json index 5770c5b..766ca5f 100644 --- a/zephyr_ml/primitives/jsons/zephyr_ml.primitives.postprocessing.confusion_matrix.json +++ b/zephyr_ml/primitives/jsons/zephyr_ml.primitives.postprocessing.confusion_matrix.json @@ -1,53 +1,49 @@ { - "name": "zephyr_ml.primitives.postprocessing.confusion_matrix", - "contributors": [ - "Raymond Pan " + "name": "zephyr_ml.primitives.postprocessing.confusion_matrix", + "contributors": ["Raymond Pan "], + "description": "Create and plot confusion matrix.", + "classifiers": { + "type": "helper" + }, + "modalities": [], + "primitive": "zephyr_ml.primitives.postprocessing.confusion_matrix", + "produce": { + "args": [ + { + "name": "y_true", + "type": "ndarray" + }, + { + "name": "y_pred", + "type": "ndarray" + } ], - "description": "Create and plot confusion matrix.", - "classifiers": { - "type": "helper" - }, - "modalities": [], - "primitive": "zephyr_ml.primitives.postprocessing.confusion_matrix", - "produce": { - "args": [ - { - "name": "y_true", - "type": "ndarray" - }, - { - "name": "y_pred", - "type": "ndarray" - } - ], - "output": [ - { - "name": "confusion_matrix", - "type": "ndarray" - } - ] + "output": [ + { + "name": "confusion_matrix", + "type": "ndarray" + }, + { + "name": "figure", + "type": "matplotlib.figure.Figure" + } + ] + }, - }, - - "hyperparameters": { - "fixed": { - "labels": { - "type": "ndarray", - "default": null - }, - "sample_weight": { - "type": "ndarray", - "default": null - }, - "normalize": { - "type": "str", - "default": null - }, - "show_plot": { - "type": "bool", - "default": true - } - } + "hyperparameters": { + "fixed": { + "labels": { + "type": "ndarray", + "default": null + }, + "sample_weight": { + "type": "ndarray", + "default": null + }, + "normalize": { + "type": "str", + "default": null + } } - -} \ No newline at end of file + } +} diff --git a/zephyr_ml/primitives/jsons/zephyr_ml.primitives.postprocessing.roc_auc_score_and_curve.json b/zephyr_ml/primitives/jsons/zephyr_ml.primitives.postprocessing.roc_auc_score_and_curve.json index 343c1d3..778bde9 100644 --- a/zephyr_ml/primitives/jsons/zephyr_ml.primitives.postprocessing.roc_auc_score_and_curve.json +++ b/zephyr_ml/primitives/jsons/zephyr_ml.primitives.postprocessing.roc_auc_score_and_curve.json @@ -1,52 +1,49 @@ { - "name": "zephyr_ml.primitives.postprocessing.roc_auc_score_and_curve", - "contributors": [ - "Raymond Pan " + "name": "zephyr_ml.primitives.postprocessing.roc_auc_score_and_curve", + "contributors": ["Raymond Pan "], + "description": "Calculate ROC AUC score and plot curve.", + "classifiers": { + "type": "helper" + }, + "modalities": [], + "primitive": "zephyr_ml.primitives.postprocessing.roc_auc_score_and_curve", + "produce": { + "args": [ + { + "name": "y_true", + "type": "ndarray" + }, + { + "name": "y_proba", + "type": "ndarray" + } ], - "description": "Calculate ROC AUC score and plot curve.", - "classifiers": { - "type": "helper" - }, - "modalities": [], - "primitive": "zephyr_ml.primitives.postprocessing.roc_auc_score_and_curve", - "produce": { - "args": [ - { - "name": "y_true", - "type": "ndarray" - }, - { - "name": "y_proba", - "type": "ndarray" - } - ], - "output": [ - { - "name": "score", - "type": "float" - } - ] - }, - - "hyperparameters": { - "fixed": { - "pos_label": { - "type": "int, float, bool or str", - "default": null - }, - "sample_weight": { - "type": "ndarray", - "default": null - }, - "drop_intermediate": { - "type": "bool", - "default": true - }, - "show_plot": { - "type": "bool", - "default": true - } - } + "output": [ + { + "name": "score", + "type": "float" + }, + { + "name": "figure", + "type": "matplotlib.figure.Figure" + } + ] + }, + + "hyperparameters": { + "fixed": { + "pos_label": { + "type": "int, float, bool or str", + "default": null + }, + "sample_weight": { + "type": "ndarray", + "default": null + }, + "drop_intermediate": { + "type": "bool", + "default": true + } } - -} \ No newline at end of file + } +} diff --git a/zephyr_ml/primitives/postprocessing.py b/zephyr_ml/primitives/postprocessing.py index 7498509..4e47bfe 100644 --- a/zephyr_ml/primitives/postprocessing.py +++ b/zephyr_ml/primitives/postprocessing.py @@ -86,28 +86,33 @@ def apply_threshold(self, y_proba): return binary, self._threshold, self._scores -def confusion_matrix(y_true, y_pred, labels=None, sample_weight=None, normalize=None, show_plot = True): +def confusion_matrix( + y_true, + y_pred, + labels=None, + sample_weight=None, + normalize=None): conf_matrix = metrics.confusion_matrix( y_true, y_pred, labels=labels, sample_weight=sample_weight, normalize=normalize ) - if show_plot: - ax = sns.heatmap(conf_matrix, annot=True, cmap="Blues") - ax.set_title("Confusion Matrix\n") - ax.set_xlabel("\nPredicted Values") - ax.set_ylabel("Actual Values") - - ax.xaxis.set_ticklabels(["False", "True"]) - ax.yaxis.set_ticklabels(["False", "True"]) - - plt.show() - return conf_matrix + fig = plt.figure() + ax = fig.add_axes(sns.heatmap(conf_matrix, annot=True, cmap="Blues")) + + ax.set_title("Confusion Matrix\n") + ax.set_xlabel("\nPredicted Values") + ax.set_ylabel("Actual Values") + + ax.xaxis.set_ticklabels(["False", "True"]) + ax.yaxis.set_ticklabels(["False", "True"]) + + return conf_matrix, fig def roc_auc_score_and_curve( - y_true, y_proba, pos_label=None, sample_weight=None, drop_intermediate=True, show_plot = True + y_true, y_proba, pos_label=None, sample_weight=None, drop_intermediate=True ): if y_proba.ndim > 1: - y_proba = y_proba[:,1] + y_proba = y_proba[:, 1] fpr, tpr, _ = metrics.roc_curve( y_true, y_proba, @@ -124,17 +129,16 @@ def roc_auc_score_and_curve( drop_intermediate=drop_intermediate, ) - auc = metrics.roc_auc_score(y_true, y_proba) - if show_plot: - _, _ = plt.subplots(1, 1) - plt.plot(fpr, tpr, "ro") - plt.plot(fpr, tpr) - plt.plot(ns_fpr, ns_tpr, linestyle="--", color="green") - - plt.ylabel("True Positive Rate") - plt.xlabel("False Positive Rate") - plt.title("AUC: %.3f" % auc) - plt.show() - - return auc + fig, ax = plt.subplots(1, 1) + + ax.plot(fpr, tpr, "ro") + ax.plot(fpr, tpr) + ax.plot(ns_fpr, ns_tpr, linestyle="--", color="green") + + ax.set_ylabel("True Positive Rate") + ax.set_xlabel("False Positive Rate") + ax.set_title("AUC: %.3f" % auc) + + + return auc, fig From 4e635ac6bb8cd8002f1c0df922ef37cc76fc2b27 Mon Sep 17 00:00:00 2001 From: Raymond Pan Date: Tue, 13 May 2025 14:36:14 -0400 Subject: [PATCH 24/28] fix lint --- setup.py | 2 +- tests/labeling/test_helpers.py | 12 +- tests/primitives/test_postprocessing.py | 3 +- tests/test___init__.py | 3 +- tests/test_core.py | 67 ++++--- tests/test_feature_engineering.py | 18 +- tests/test_metadata.py | 3 +- zephyr_ml/__init__.py | 13 +- zephyr_ml/core.py | 176 +++++++++++------- zephyr_ml/entityset.py | 119 +----------- zephyr_ml/feature_engineering.py | 4 +- zephyr_ml/labeling/__init__.py | 9 +- .../labeling_functions/brake_pad_presence.py | 3 +- .../labeling_functions/planet_bearing.py | 3 +- zephyr_ml/labeling/utils.py | 12 +- zephyr_ml/primitives/postprocessing.py | 9 +- 16 files changed, 207 insertions(+), 249 deletions(-) diff --git a/setup.py b/setup.py index c4a2158..91aa469 100644 --- a/setup.py +++ b/setup.py @@ -54,7 +54,7 @@ 'Sphinx>=3,<3.3', 'sphinx_rtd_theme>=0.2.4,<0.5', 'autodocsumm>=0.1.10', - 'mistune>=0.7,<3.1', + 'mistune>=0.7,<2.0', 'Jinja2>=2,<3.1', # fails on Sphinx < v3.4 diff --git a/tests/labeling/test_helpers.py b/tests/labeling/test_helpers.py index 080c312..a43ca9b 100644 --- a/tests/labeling/test_helpers.py +++ b/tests/labeling/test_helpers.py @@ -19,7 +19,8 @@ def test_merge_labeling_and_true(): lambda df: True, lambda df: True ] - assert 1 == merge_binary_labeling_functions(functions, and_connected=True)(pd.DataFrame()) + assert 1 == merge_binary_labeling_functions( + functions, and_connected=True)(pd.DataFrame()) def test_merge_labeling_and_false(): @@ -27,7 +28,8 @@ def test_merge_labeling_and_false(): lambda df: True, lambda df: False ] - assert 0 == merge_binary_labeling_functions(functions, and_connected=True)(pd.DataFrame()) + assert 0 == merge_binary_labeling_functions( + functions, and_connected=True)(pd.DataFrame()) def test_merge_labeling_or_true(): @@ -35,7 +37,8 @@ def test_merge_labeling_or_true(): lambda df: False, lambda df: True ] - assert 1 == merge_binary_labeling_functions(functions, and_connected=False)(pd.DataFrame()) + assert 1 == merge_binary_labeling_functions( + functions, and_connected=False)(pd.DataFrame()) def test_merge_labeling_or_false(): @@ -43,7 +46,8 @@ def test_merge_labeling_or_false(): lambda df: False, lambda df: False ] - assert 0 == merge_binary_labeling_functions(functions, and_connected=False)(pd.DataFrame()) + assert 0 == merge_binary_labeling_functions( + functions, and_connected=False)(pd.DataFrame()) def test_categorical_presence_true(): diff --git a/tests/primitives/test_postprocessing.py b/tests/primitives/test_postprocessing.py index 2b1c65e..696813c 100644 --- a/tests/primitives/test_postprocessing.py +++ b/tests/primitives/test_postprocessing.py @@ -23,7 +23,8 @@ def _run(self, y, y_hat, value): threshold.fit(y, y_hat) assert threshold._threshold == value - binary_y_hat, detected_threshold, scores = threshold.apply_threshold(y_hat) + binary_y_hat, detected_threshold, scores = threshold.apply_threshold( + y_hat) np.testing.assert_allclose(binary_y_hat, y) def test_1d(self): diff --git a/tests/test___init__.py b/tests/test___init__.py index 72b810e..9a67a96 100644 --- a/tests/test___init__.py +++ b/tests/test___init__.py @@ -96,7 +96,8 @@ def merge_work_orders_notifications_data(): changed_wo_data['WTG'] = ['A001', 'A001'] changed_notif_data = NOTIFICATIONS_DATA.copy() # matching the output of the merge - changed_notif_data['Functional location_y'] = changed_notif_data.pop('Functional location') + changed_notif_data['Functional location_y'] = changed_notif_data.pop( + 'Functional location') changed_notif_data['Functional location description_y'] = ( changed_notif_data.pop('Functional location description')) # matching the notifications update diff --git a/tests/test_core.py b/tests/test_core.py index 1627cf6..1925e36 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -1,6 +1,7 @@ import numpy as np import pandas as pd from mlblocks import MLBlock + from zephyr_ml.core import DEFAULT_METRICS, Zephyr @@ -10,8 +11,10 @@ class TestZephyr: def base_dfs(): alarms_df = pd.DataFrame({ 'COD_ELEMENT': [0, 0], - 'DAT_START': [pd.Timestamp('2022-01-01 00:00:00'), pd.Timestamp('2022-03-01 11:12:13')], - 'DAT_END': [pd.Timestamp('2022-01-01 13:00:00'), pd.Timestamp('2022-03-02 11:12:13')], + 'DAT_START': [pd.Timestamp('2022-01-01 00:00:00'), + pd.Timestamp('2022-03-01 11:12:13')], + 'DAT_END': [pd.Timestamp('2022-01-01 13:00:00'), + pd.Timestamp('2022-03-02 11:12:13')], 'IND_DURATION': [0.5417, 1.0], 'COD_ALARM': [12345, 98754], 'COD_ALARM_INT': [12345, 98754], @@ -20,8 +23,10 @@ def base_dfs(): }) stoppages_df = pd.DataFrame({ 'COD_ELEMENT': [0, 0], - 'DAT_START': [pd.Timestamp('2022-01-01 00:00:00'), pd.Timestamp('2022-03-01 11:12:13')], - 'DAT_END': [pd.Timestamp('2022-01-08 11:07:17'), pd.Timestamp('2022-03-01 17:00:13')], + 'DAT_START': [pd.Timestamp('2022-01-01 00:00:00'), + pd.Timestamp('2022-03-01 11:12:13')], + 'DAT_END': [pd.Timestamp('2022-01-08 11:07:17'), + pd.Timestamp('2022-03-01 17:00:13')], 'DES_WO_NAME': ['stoppage name 1', 'stoppage name 2'], 'DES_COMMENTS': ['description of stoppage 1', 'description of stoppage 2'], 'COD_WO': [12345, 67890], @@ -40,12 +45,15 @@ def base_dfs(): 'COD_ORDER': [12345, 67890], 'IND_QUANTITY': [1, -20], 'COD_MATERIAL_SAP': [36052411, 67890], - 'DAT_POSTING': [pd.Timestamp('2022-01-01 00:00:00'), pd.Timestamp('2022-03-01 00:00:00')], + 'DAT_POSTING': [pd.Timestamp('2022-01-01 00:00:00'), + pd.Timestamp('2022-03-01 00:00:00')], 'COD_MAT_DOC': [77889900, 12345690], 'DES_MEDIUM': ['Description of notification 1', 'Description of notification 2'], 'COD_NOTIF': [567890123, 32109877], - 'DAT_MALF_START': [pd.Timestamp('2021-12-25 18:07:10'), pd.Timestamp('2022-02-28 06:04:00')], - 'DAT_MALF_END': [pd.Timestamp('2022-01-08 11:07:17'), pd.Timestamp('2022-03-01 17:00:13')], + 'DAT_MALF_START': [pd.Timestamp('2021-12-25 18:07:10'), + pd.Timestamp('2022-02-28 06:04:00')], + 'DAT_MALF_END': [pd.Timestamp('2022-01-08 11:07:17'), + pd.Timestamp('2022-03-01 17:00:13')], 'IND_BREAKDOWN_DUR': [14.1378, 2.4792], 'FUNCT_LOC_DES': ['location description 1', 'location description 2'], 'COD_ALARM': [12345, 12345], @@ -54,15 +62,19 @@ def base_dfs(): work_orders_df = pd.DataFrame({ 'COD_ELEMENT': [0, 0], 'COD_ORDER': [12345, 67890], - 'DAT_BASIC_START': [pd.Timestamp('2022-01-01 00:00:00'), pd.Timestamp('2022-03-01 00:00:00')], - 'DAT_BASIC_END': [pd.Timestamp('2022-01-09 00:00:00'), pd.Timestamp('2022-03-02 00:00:00')], + 'DAT_BASIC_START': [pd.Timestamp('2022-01-01 00:00:00'), + pd.Timestamp('2022-03-01 00:00:00')], + 'DAT_BASIC_END': [pd.Timestamp('2022-01-09 00:00:00'), + pd.Timestamp('2022-03-02 00:00:00')], 'COD_EQUIPMENT': [98765, 98765], 'COD_MAINT_PLANT': ['ABC', 'ABC'], 'COD_MAINT_ACT_TYPE': ['XYZ', 'XYZ'], 'COD_CREATED_BY': ['A1234', 'B6789'], 'COD_ORDER_TYPE': ['A', 'B'], - 'DAT_REFERENCE': [pd.Timestamp('2022-01-01 00:00:00'), pd.Timestamp('2022-03-01 00:00:00')], - 'DAT_CREATED_ON': [pd.Timestamp('2022-03-01 00:00:00'), pd.Timestamp('2022-04-18 00:00:00')], + 'DAT_REFERENCE': [pd.Timestamp('2022-01-01 00:00:00'), + pd.Timestamp('2022-03-01 00:00:00')], + 'DAT_CREATED_ON': [pd.Timestamp('2022-03-01 00:00:00'), + pd.Timestamp('2022-04-18 00:00:00')], 'DAT_VALID_END': [pd.NaT, pd.NaT], 'DAT_VALID_START': [pd.NaT, pd.NaT], 'COD_SYSTEM_STAT': ['ABC XYZ', 'LMN OPQ'], @@ -86,7 +98,8 @@ def base_dfs(): 'PI_LOCAL_SITE_NAME': ['LOC0'] }) pidata_df = pd.DataFrame({ - 'time': [pd.Timestamp('2022-01-02 13:21:01'), pd.Timestamp('2022-03-08 13:21:01')], + 'time': [pd.Timestamp('2022-01-02 13:21:01'), + pd.Timestamp('2022-03-08 13:21:01')], 'COD_ELEMENT': [0, 0], 'val1': [9872.0, 559.0], 'val2': [10.0, -7.0] @@ -153,40 +166,47 @@ def setup_class(cls): def test_initialize_class(self): _ = Zephyr() - def test_generate_entityset(self): zephyr = Zephyr() - zephyr.generate_entityset(**self.__class__.kwargs["generate_entityset"]) + zephyr.generate_entityset( + **self.__class__.kwargs["generate_entityset"]) es = zephyr.get_entityset() assert es is not None assert es.id == 'pidata' def test_generate_label_times(self): zephyr = Zephyr() - zephyr.generate_entityset(**self.__class__.kwargs["generate_entityset"]) - zephyr.generate_label_times(**self.__class__.kwargs["generate_label_times"]) + zephyr.generate_entityset( + **self.__class__.kwargs["generate_entityset"]) + zephyr.generate_label_times( + **self.__class__.kwargs["generate_label_times"]) label_times = zephyr.get_label_times(visualize=False) assert label_times is not None def test_generate_feature_matrix_and_labels(self): zephyr = Zephyr() - zephyr.generate_entityset(**self.__class__.kwargs["generate_entityset"]) - zephyr.generate_label_times(**self.__class__.kwargs["generate_label_times"]) + zephyr.generate_entityset( + **self.__class__.kwargs["generate_entityset"]) + zephyr.generate_label_times( + **self.__class__.kwargs["generate_label_times"]) zephyr.generate_feature_matrix( **self.__class__.kwargs["generate_feature_matrix"]) - feature_matrix, label_col_name, features= zephyr.get_feature_matrix() + feature_matrix, label_col_name, features = zephyr.get_feature_matrix() assert feature_matrix is not None assert label_col_name in feature_matrix.columns assert features is not None def test_generate_train_test_split(self): zephyr = Zephyr() - zephyr.generate_entityset(**self.__class__.kwargs["generate_entityset"]) - zephyr.generate_label_times(**self.__class__.kwargs["generate_label_times"]) + zephyr.generate_entityset( + **self.__class__.kwargs["generate_entityset"]) + zephyr.generate_label_times( + **self.__class__.kwargs["generate_label_times"]) zephyr.generate_feature_matrix( **self.__class__.kwargs["generate_feature_matrix"]) - zephyr.generate_train_test_split(**self.__class__.kwargs["generate_train_test_split"]) + zephyr.generate_train_test_split( + **self.__class__.kwargs["generate_train_test_split"]) train_test_split = zephyr.get_train_test_split() assert train_test_split is not None X_train, X_test, y_train, y_test = train_test_split @@ -217,7 +237,8 @@ def test_fit_pipeline_no_visual(self): def test_fit_pipeline_visual(self): zephyr = Zephyr() zephyr.set_train_test_split(*self.base_train_test_split()) - output = zephyr.fit_pipeline(visual=True, **self.__class__.kwargs["fit_pipeline"]) + output = zephyr.fit_pipeline( + visual=True, **self.__class__.kwargs["fit_pipeline"]) assert isinstance(output, dict) assert list(output.keys()) == ['threshold', 'scores'] pipeline = zephyr.get_fitted_pipeline() diff --git a/tests/test_feature_engineering.py b/tests/test_feature_engineering.py index 17d72e6..5baf7dd 100644 --- a/tests/test_feature_engineering.py +++ b/tests/test_feature_engineering.py @@ -170,8 +170,10 @@ def test_process_signals_pidata(pidata_es, transformations, aggregations): "fft.mean.mean_value": [9872, None, 559] }) expected['COD_ELEMENT'] = expected['COD_ELEMENT'].astype('category') - expected['fft.mean.mean_value'] = expected['fft.mean.mean_value'].astype('float64') - processed['fft.mean.mean_value'] = processed['fft.mean.mean_value'].astype('float64') + expected['fft.mean.mean_value'] = expected['fft.mean.mean_value'].astype( + 'float64') + processed['fft.mean.mean_value'] = processed['fft.mean.mean_value'].astype( + 'float64') assert pidata_es['pidata_processed'].shape[0] == 3 assert pidata_es['pidata_processed'].shape[1] == 4 @@ -202,8 +204,10 @@ def test_process_signals_pidata_replace(pidata_es, transformations, aggregations "fft.mean.mean_value": [9872, None, 559] }) expected['COD_ELEMENT'] = expected['COD_ELEMENT'].astype('category') - expected['fft.mean.mean_value'] = expected['fft.mean.mean_value'].astype('float64') - processed['fft.mean.mean_value'] = processed['fft.mean.mean_value'].astype('float64') + expected['fft.mean.mean_value'] = expected['fft.mean.mean_value'].astype( + 'float64') + processed['fft.mean.mean_value'] = processed['fft.mean.mean_value'].astype( + 'float64') assert pidata_es['pidata'].shape[0] == 3 assert pidata_es['pidata'].shape[1] == 4 @@ -233,7 +237,8 @@ def test_process_signals_scada(scada_es, transformations, aggregations): "fft.mean.mean_value": [1002, None, 56.8] }) expected['COD_ELEMENT'] = expected['COD_ELEMENT'].astype('category') - expected['fft.mean.mean_value'] = expected['fft.mean.mean_value'].astype('float64') + expected['fft.mean.mean_value'] = expected['fft.mean.mean_value'].astype( + 'float64') after = scada_es['scada'].copy() assert scada_es['scada_processed'].shape[0] == 3 @@ -263,7 +268,8 @@ def test_process_signals_scada_replace(scada_es, transformations, aggregations): "fft.mean.mean_value": [1002, None, 56.8] }) expected['COD_ELEMENT'] = expected['COD_ELEMENT'].astype('category') - expected['fft.mean.mean_value'] = expected['fft.mean.mean_value'].astype('float64') + expected['fft.mean.mean_value'] = expected['fft.mean.mean_value'].astype( + 'float64') assert scada_es['scada'].shape[0] == 3 assert scada_es['scada'].shape[1] == 4 diff --git a/tests/test_metadata.py b/tests/test_metadata.py index ddb816a..8d8f923 100644 --- a/tests/test_metadata.py +++ b/tests/test_metadata.py @@ -10,7 +10,8 @@ def test_default_scada_mapped_kwargs(): def test_default_pidata_mapped_kwargs(): - expected = {**DEFAULT_ES_KWARGS, 'pidata': DEFAULT_ES_TYPE_KWARGS['pidata']} + expected = {**DEFAULT_ES_KWARGS, + 'pidata': DEFAULT_ES_TYPE_KWARGS['pidata']} actual = get_mapped_kwargs('pidata') assert expected == actual diff --git a/zephyr_ml/__init__.py b/zephyr_ml/__init__.py index 5fa5b87..b633a49 100644 --- a/zephyr_ml/__init__.py +++ b/zephyr_ml/__init__.py @@ -9,16 +9,9 @@ import os from zephyr_ml.core import Zephyr -from zephyr_ml.entityset import ( - # create_pidata_entityset, - # create_scada_entityset, - _create_entityset, - VALIDATE_DATA_FUNCTIONS, -) +from zephyr_ml.entityset import VALIDATE_DATA_FUNCTIONS, _create_entityset from zephyr_ml.labeling import DataLabeler -MLBLOCKS_PRIMITIVES = os.path.join(os.path.dirname(__file__), "primitives", "jsons") +MLBLOCKS_PRIMITIVES = os.path.join( + os.path.dirname(__file__), "primitives", "jsons") MLBLOCKS_PIPELINES = os.path.join(os.path.dirname(__file__), "pipelines") -# import os, sys - -# sys.path.append(os.path.dirname(os.path.realpath(__file__))) diff --git a/zephyr_ml/core.py b/zephyr_ml/core.py index 92086d5..b04615a 100644 --- a/zephyr_ml/core.py +++ b/zephyr_ml/core.py @@ -1,20 +1,21 @@ -from zephyr_ml.entityset import _create_entityset, VALIDATE_DATA_FUNCTIONS -from zephyr_ml.labeling import get_labeling_functions -from zephyr_ml.labeling import get_labeling_functions_map -from zephyr_ml.feature_engineering import process_signals -import composeml as cp +import copy +import json +import logging +import os +from functools import wraps from inspect import getfullargspec + +import composeml as cp import featuretools as ft import numpy as np import pandas as pd +from mlblocks import MLBlock, MLPipeline from sklearn.model_selection import train_test_split -import os -import json -from mlblocks import MLPipeline, MLBlock -from functools import wraps -import logging -from functools import wraps -import copy + +from zephyr_ml.entityset import VALIDATE_DATA_FUNCTIONS, _create_entityset +from zephyr_ml.feature_engineering import process_signals +from zephyr_ml.labeling import get_labeling_functions, get_labeling_functions_map + DEFAULT_METRICS = [ "sklearn.metrics.accuracy_score", "sklearn.metrics.precision_score", @@ -62,7 +63,8 @@ def get_necessary_steps(self, actual_next_step): def get_get_steps_in_between(self, cur_step, next_step): step_strs = [] for step in range(cur_step + 1, next_step): - step_strs.append(f"{step} {self.producers_and_getters[step][1][0]}") + step_strs.append( + f"{step} {self.producers_and_getters[step][1][0]}") return step_strs def get_last_up_to_date(self, next_step): @@ -102,9 +104,9 @@ def try_log_skipping_steps_warning(self, name, next_step): def try_log_using_stale_warning(self, name, next_step): latest_up_to_date = self.get_last_up_to_date(next_step) - steps_needed = self.get_steps_in_between(latest_up_to_date - 1, next_step) + steps_needed = self.get_steps_in_between( + latest_up_to_date - 1, next_step) if len(steps_needed) > 0: - necc_steps = self.join_steps(steps_needed) LOGGER.warning(f"Performing {name}. You are in a stale state and \ using potentially stale data to perform this step. \ Re-run the following steps to return to a present state:\n: \ @@ -112,34 +114,41 @@ def try_log_using_stale_warning(self, name, next_step): def try_log_making_stale_warning(self, name, next_step): next_next_step = next_step + 1 - prod_steps = f"{next_next_step}. {' or '.join(self.producers_and_getters[next_next_step][0])}" + prod_steps = f"{next_next_step}. \ + {' or '.join(self.producers_and_getters[next_next_step][0])}" # add later set methods get_steps = self.join_steps( self.get_get_steps_in_between( next_step, self.current_step + 1)) - LOGGER.warning(f"Performing {name}. You are beginning a new iteration. Any data returned \ - by the following get methods will be considered stale:\n{get_steps}. To continue with this iteration, please perform:\n{prod_steps}") + LOGGER.warning(f"Performing {name}. You are beginning a new iteration.\ + Any data returned by the following get methods will be \ + considered stale:\n{get_steps}. To continue with this \ + iteration, please perform:\n{prod_steps}") # stale must be before b/c user must have regressed with progress that contains skips # return set method, and next possible up to date key method def try_log_inconsistent_warning(self, name, next_step): - set_method_str = f"{self.producers_and_getters[next_step][0][1].__name__}" + set_method_str = self.producers_and_getters[next_step][0][1].__name__ latest_up_to_date = self.get_last_up_to_date(next_step) - LOGGER.warning(f"Unable to perform {name} because some steps have been skipped. \ - You can call the corresponding set method: {set_method_str} or re run steps \ - starting at or before {latest_up_to_date}") + LOGGER.warning(f"Unable to perform {name} because some steps have been\ + skipped. You can call the corresponding set method: \ + {set_method_str} or re run steps starting at or before \ + {latest_up_to_date}") def log_get_inconsistent_warning(self, name, next_step): - prod_steps = f"{next_step}. {' or '.join(self.producers_and_getters[next_step][0])}" + prod_steps = f"{next_step}. \ + {' or '.join(self.producers_and_getters[next_step][0])}" latest_up_to_date = self.get_last_up_to_date(next_step) - LOGGER.warning( - f"Unable to perform {name} because {prod_steps} has not been run yet. Run steps starting at or before {latest_up_to_date} ") + LOGGER.warning(f"Unable to perform {name} because {prod_steps} has not \ + been run yet. Run steps starting at or before \ + {latest_up_to_date} ") def log_get_stale_warning(self, name, next_step): latest_up_to_date = self.get_last_up_to_date(next_step) LOGGER.warning(f"Performing {name}. This data is potentially stale. \ - Re-run steps starting at or before {latest_up_to_date} to ensure data is up to date.") + Re-run steps starting at or before \ + {latest_up_to_date} to ensure data is up to date.") # tries to perform step if possible -> warns that data might be stale @@ -149,7 +158,8 @@ def try_perform_forward_producer_step(self, zephyr, method, *method_args, **meth if name in self.set_methods: self.try_log_skipping_steps_warning(name, next_step) # next_step == 0, set method (already warned), or previous step is up to term - res = self.perform_producer_step(zephyr, method, *method_args, **method_kwargs) + res = self.perform_producer_step( + zephyr, method, *method_args, **method_kwargs) return res # next_step == 0, set method, or previous step is up to term @@ -162,7 +172,8 @@ def try_perform_backward_producer_step(self, zephyr, method, *method_args, **met for i in range(0, next_step): if self.terms[i] != -1: self.terms[i] = self.cur_term - res = self.perform_producer_step(zephyr, method, *method_args, **method_kwargs) + res = self.perform_producer_step( + zephyr, method, *method_args, **method_kwargs) return res def try_perform_producer_step(self, zephyr, method, *method_args, **method_kwargs): @@ -186,12 +197,9 @@ def try_perform_stale_or_inconsistent_producer_step( if self.terms[next_step - 1] == -1: # inconsistent self.try_log_inconsistent_warning(name, next_step) else: - # need to include a case where performing using stale data that was skipped in current iteration - # overwrite current iteration's ? - # no not possible b/c if there is a current iteration after this step, it must have updated this step's iteration - # self.try_log_using_stale_warning(name, next_step) - res = self.perform_producer_step(zephyr, method, *method_args, **method_kwargs) + res = self.perform_producer_step( + zephyr, method, *method_args, **method_kwargs) return res def try_perform_getter_step(self, zephyr, method, *method_args, **method_kwargs): @@ -214,15 +222,18 @@ def guide_step(self, zephyr, method, *method_args, **method_kwargs): if method_name in self.producer_to_step_map: # up-todate next_step = self.producer_to_step_map[method_name] - if method_name in self.set_methods or next_step == 0 or self.terms[next_step - 1] == self.cur_term: - res = self.try_perform_producer_step(zephyr, method, *method_args, **method_kwargs) + if (method_name in self.set_methods or next_step == 0 or + self.terms[next_step - 1] == self.cur_term): + res = self.try_perform_producer_step( + zephyr, method, *method_args, **method_kwargs) return res else: # stale or inconsistent res = self.try_perform_stale_or_inconsistent_producer_step( zephyr, method, *method_args, **method_kwargs) return res elif method_name in self.getter_to_step_map: - res = self.try_perform_getter_step(zephyr, method, *method_args, **method_kwargs) + res = self.try_perform_getter_step( + zephyr, method, *method_args, **method_kwargs) return res else: print(f"Method {method_name} does not need to be wrapped") @@ -278,11 +289,13 @@ def __init__(self): def GET_ENTITYSET_TYPES(self): """ - Returns the supported entityset types (PI/SCADA/Vibrations) and the required dataframes and their columns + Returns the supported entityset types (PI/SCADA/Vibrations) + and the required dataframes and their columns """ info_map = {} for es_type, val_fn in VALIDATE_DATA_FUNCTIONS.items(): - info_map[es_type] = {"obj": es_type, "desc": " ".join((val_fn.__doc__.split()))} + info_map[es_type] = {"obj": es_type, + "desc": " ".join((val_fn.__doc__.split()))} return info_map @@ -293,7 +306,8 @@ def GET_EVALUATION_METRICS(self): info_map = {} for metric in DEFAULT_METRICS: primitive = self._get_ml_primitive(metric) - info_map[metric] = {"obj": primitive, "desc": primitive.metadata["description"]} + info_map[metric] = {"obj": primitive, + "desc": primitive.metadata["description"]} return info_map @guide @@ -355,7 +369,9 @@ def set_entityset( if entityset is None: raise ValueError( - "No entityset passed in. Please pass in an entityset object via the entityest parameter or an entityset path via the entityset_path parameter.") + "No entityset passed in. Please pass in an entityset object\ + via the entityset parameter or an entityset path via the \ + entityset_path parameter.") dfs = entityset.dataframe_dict @@ -367,7 +383,8 @@ def set_entityset( @guide def get_entityset(self): if self._entityset is None: - raise ValueError("No entityset has been created or set in this instance.") + raise ValueError( + "No entityset has been created or set in this instance.") return self._entityset @@ -383,7 +400,9 @@ def generate_label_times( labeling_fn = labeling_fn_map[labeling_fn] else: raise ValueError( - f"Unrecognized name argument:{labeling_fn}. Call get_predefined_labeling_functions to view predefined labeling functions" + f"Unrecognized name argument:{labeling_fn}. \ + Call get_predefined_labeling_functions to \ + view predefined labeling functions" ) assert callable(labeling_fn), "Labeling function is not callable" @@ -424,7 +443,7 @@ def generate_label_times( return label_times, meta @guide - def set_label_times(self, label_times, label_col_name, meta = None): + def set_label_times(self, label_times, label_col_name, meta=None): assert (isinstance(label_times, cp.LabelTimes)) self._label_times = label_times self._label_col_name = label_col_name @@ -467,10 +486,10 @@ def generate_feature_matrix( progress_callback=None, include_cutoff_time=True, - add_interesting_values = False, - max_interesting_values = 5, - interesting_dataframe_name = None, - interesting_values = None, + add_interesting_values=False, + max_interesting_values=5, + interesting_dataframe_name=None, + interesting_values=None, signal_dataframe_name=None, signal_column=None, @@ -479,7 +498,6 @@ def generate_feature_matrix( signal_window_size=None, signal_replace_dataframe=False, **sigpro_kwargs): - entityset_copy = copy.deepcopy(self._entityset) # perform signal processing @@ -498,26 +516,38 @@ def generate_feature_matrix( signal_window_size, signal_replace_dataframe, **sigpro_kwargs) - + # add interesting values for where primitives if add_interesting_values: - entityset_copy.add_interesting_values(max_values = max_interesting_values, verbose = verbose,dataframe_name = interesting_dataframe_name, values = interesting_values) - + entityset_copy.add_interesting_values( + max_values=max_interesting_values, + verbose=verbose, + dataframe_name=interesting_dataframe_name, + values=interesting_values) feature_matrix, features = ft.dfs( entityset=entityset_copy, cutoff_time=self._label_times, - target_dataframe_name=target_dataframe_name, instance_ids=instance_ids, - agg_primitives=agg_primitives, trans_primitives=trans_primitives, groupby_trans_primitives=groupby_trans_primitives, - allowed_paths=allowed_paths, max_depth=max_depth, ignore_dataframes=ignore_dataframes, ignore_columns=ignore_columns, + target_dataframe_name=target_dataframe_name, + instance_ids=instance_ids, agg_primitives=agg_primitives, + trans_primitives=trans_primitives, + groupby_trans_primitives=groupby_trans_primitives, + allowed_paths=allowed_paths, max_depth=max_depth, + ignore_dataframes=ignore_dataframes, ignore_columns=ignore_columns, primitive_options=primitive_options, seed_features=seed_features, - drop_contains=drop_contains, drop_exact=drop_exact, where_primitives=where_primitives, max_features=max_features, - cutoff_time_in_index=cutoff_time_in_index, save_progress=save_progress, features_only=features_only, training_window=training_window, - approximate=approximate, chunk_size=chunk_size, n_jobs=n_jobs, dask_kwargs=dask_kwargs, verbose=verbose, return_types=return_types, - progress_callback=progress_callback, include_cutoff_time=include_cutoff_time, + drop_contains=drop_contains, drop_exact=drop_exact, + where_primitives=where_primitives, max_features=max_features, + cutoff_time_in_index=cutoff_time_in_index, + save_progress=save_progress, features_only=features_only, + training_window=training_window, approximate=approximate, + chunk_size=chunk_size, n_jobs=n_jobs, + dask_kwargs=dask_kwargs, verbose=verbose, + return_types=return_types, progress_callback=progress_callback, + include_cutoff_time=include_cutoff_time ) - self._feature_matrix = self._clean_feature_matrix(feature_matrix, label_col_name=self._label_col_name) + self._feature_matrix = self._clean_feature_matrix( + feature_matrix, label_col_name=self._label_col_name) self._features = features - + return self._feature_matrix, self._features, entityset_copy @guide @@ -525,8 +555,9 @@ def get_feature_matrix(self): return self._feature_matrix, self._label_col_name, self._features @guide - def set_feature_matrix(self, feature_matrix, labels = None,label_col_name="label"): - assert isinstance(feature_matrix, pd.DataFrame) and (labels is not None or label_col_name in feature_matrix.columns ) + def set_feature_matrix(self, feature_matrix, labels=None, label_col_name="label"): + assert isinstance(feature_matrix, pd.DataFrame) and ( + labels is not None or label_col_name in feature_matrix.columns) if labels is not None: feature_matrix[label_col_name] = labels self._feature_matrix = self._clean_feature_matrix( @@ -578,7 +609,8 @@ def set_train_test_split(self, X_train, X_test, y_train, y_test): @guide def get_train_test_split(self): - if self._X_train is None or self._X_test is None or self._y_train is None or self._y_test is None: + if (self._X_train is None or self._X_test is None or + self._y_train is None or self._y_test is None): return None return self._X_train, self._X_test, self._y_train, self._y_test @@ -588,9 +620,11 @@ def set_fitted_pipeline(self, pipeline): @guide def fit_pipeline( - self, pipeline="xgb_classifier", pipeline_hyperparameters=None, X=None, y=None, visual=False, **kwargs - ): # kwargs indicate the parameters of the current pipeline - self._pipeline = self._get_mlpipeline(pipeline, pipeline_hyperparameters) + self, pipeline="xgb_classifier", pipeline_hyperparameters=None, + X=None, y=None, visual=False, **kwargs): + # kwargs indicate the parameters of the current pipeline + self._pipeline = self._get_mlpipeline( + pipeline, pipeline_hyperparameters) if X is None: X = self._X_train @@ -611,7 +645,6 @@ def fit_pipeline( def get_fitted_pipeline(self): return self._pipeline - @guide def predict(self, X=None, visual=False, **kwargs): if X is None: @@ -683,7 +716,8 @@ def evaluate( else: metric_args = {} - res = metric_primitive.produce(y_true=self._y_test, **metric_context, **metric_args) + res = metric_primitive.produce( + y_true=self._y_test, **metric_context, **metric_args) results[metric_primitive.name] = res except Exception as e: LOGGER.error( @@ -890,11 +924,13 @@ def _get_outputs_spec(self, default=True): # "pidata", # ) - # obj.set_entityset(entityset_path = "/Users/raymondpan/zephyr/Zephyr-repo/brake_pad_es", es_type = 'scada') + # obj.set_entityset(entityset_path = + # "/Users/raymondpan/zephyr/Zephyr-repo/brake_pad_es", es_type = 'scada') # obj.set_labeling_function(name="brake_pad_presence") - # obj.generate_label_times(labeling_fn="brake_pad_presence", num_samples=10, gap="20d") + # obj.generate_label_times(labeling_fn="brake_pad_presence", + # num_samples=10, gap="20d") # # print(obj.get_label_times()) # obj.generate_feature_matrix_and_labels( diff --git a/zephyr_ml/entityset.py b/zephyr_ml/entityset.py index 5d189d9..514c325 100644 --- a/zephyr_ml/entityset.py +++ b/zephyr_ml/entityset.py @@ -5,105 +5,6 @@ from zephyr_ml.metadata import get_mapped_kwargs -# def _create_entityset(entities, es_type, es_kwargs): - -# # filter out stated logical types for missing columns -# for entity, df in entities.items(): -# es_kwargs[entity]["logical_types"] = { -# col: t -# for col, t in es_kwargs[entity]["logical_types"].items() -# if col in df.columns -# } - -# turbines_index = es_kwargs["turbines"]["index"] -# work_orders_index = es_kwargs["work_orders"]["index"] - -# relationships = [ -# ("turbines", turbines_index, "alarms", turbines_index), -# ("turbines", turbines_index, "stoppages", turbines_index), -# ("turbines", turbines_index, "work_orders", turbines_index), -# ("turbines", turbines_index, es_type, turbines_index), -# ("work_orders", work_orders_index, "notifications", work_orders_index), -# ] - -# es = ft.EntitySet() - -# for name, df in entities.items(): -# es.add_dataframe(dataframe_name=name, dataframe=df, **es_kwargs[name]) - -# for relationship in relationships: -# parent_df, parent_column, child_df, child_column = relationship -# es.add_relationship(parent_df, parent_column, child_df, child_column) - -# return es - - -# def create_pidata_entityset(dfs, new_kwargs_mapping=None): -# """Generate an entityset for PI data datasets - -# Args: -# data_paths (dict): Dictionary mapping entity names ('alarms', 'notifications', -# 'stoppages', 'work_orders', 'pidata', 'turbines') to the pandas dataframe for -# that entity. -# **kwargs: Updated keyword arguments to be used during entityset creation -# """ -# entity_kwargs = get_mapped_kwargs("pidata", new_kwargs_mapping) -# _validate_data(dfs, "pidata", entity_kwargs) - -# es = _create_entityset(dfs, "pidata", entity_kwargs) -# es.id = "PI data" - -# return es - - -# def create_scada_entityset(dfs, new_kwargs_mapping=None): -# """Generate an entityset for SCADA data datasets - -# Args: -# data_paths (dict): Dictionary mapping entity names ('alarms', 'notifications', -# 'stoppages', 'work_orders', 'scada', 'turbines') to the pandas dataframe for -# that entity. -# """ -# entity_kwargs = get_mapped_kwargs("scada", new_kwargs_mapping) -# _validate_data(dfs, "scada", entity_kwargs) - -# es = _create_entityset(dfs, "scada", entity_kwargs) -# es.id = "SCADA data" - -# return es - - -# def create_vibrations_entityset(dfs, new_kwargs_mapping=None): -# """Generate an entityset for Vibrations data datasets - -# Args: -# data_paths (dict): Dictionary mapping entity names ('alarms', 'notifications', -# 'stoppages', 'work_orders', 'vibrations', 'turbines') to the pandas -# dataframe for that entity. Optionally 'pidata' and 'scada' can be included. -# """ -# entities = ["vibrations"] - -# pidata_kwargs, scada_kwargs = {}, {} -# if "pidata" in dfs: -# pidata_kwargs = get_mapped_kwargs("pidata", new_kwargs_mapping) -# entities.append("pidata") -# if "scada" in dfs: -# scada_kwargs = get_mapped_kwargs("scada", new_kwargs_mapping) -# entities.append("scada") - -# entity_kwargs = { -# **pidata_kwargs, -# **scada_kwargs, -# **get_mapped_kwargs("vibrations", new_kwargs_mapping), -# } -# _validate_data(dfs, entities, entity_kwargs) - -# es = _create_entityset(dfs, "vibrations", entity_kwargs) -# es.id = "Vibrations data" - -# return es - - def _validate_data(dfs, es_type, es_kwargs): """Validate data by checking for required columns in each entity""" if not isinstance(es_type, list): @@ -127,10 +28,12 @@ def _validate_data(dfs, es_type, es_kwargs): extra = set(dfs.keys()).difference(entities) msg = [] if missing: - msg.append("Missing dataframes for entities {}.".format(", ".join(missing))) + msg.append("Missing dataframes for entities {}.".format( + ", ".join(missing))) if extra: msg.append( - "Unrecognized entities {} included in dfs.".format(", ".join(extra)) + "Unrecognized entities {} included in dfs.".format( + ", ".join(extra)) ) raise ValueError(" ".join(msg)) @@ -188,7 +91,8 @@ def _validate_data(dfs, es_type, es_kwargs): ) ) - secondary_time_indices = es_kwargs[entity].get("secondary_time_index", {}) + secondary_time_indices = es_kwargs[entity].get( + "secondary_time_index", {}) for time_index, cols in secondary_time_indices.items(): if time_index not in df.columns: raise ValueError( @@ -290,14 +194,3 @@ def _create_entityset(entities, es_type, new_kwargs_mapping=None): es.add_relationship(parent_df, parent_column, child_df, child_column) return es - - -# CREATE_ENTITYSET_FUNCTIONS = { -# "scada": create_scada_entityset, -# "pidata": create_pidata_entityset, -# "vibrations": create_vibrations_entityset, -# } - - -# def get_create_entityset_functions(): -# return CREATE_ENTITYSET_FUNCTIONS.copy() diff --git a/zephyr_ml/feature_engineering.py b/zephyr_ml/feature_engineering.py index 34c35d8..3310e32 100644 --- a/zephyr_ml/feature_engineering.py +++ b/zephyr_ml/feature_engineering.py @@ -38,7 +38,8 @@ def process_signals(es, signal_dataframe_name, signal_column, transformations, a old_relationship = relationship groupby_index = relationship.child_column.name - pipeline = SigPro(transformations, aggregations, values_column_name=signal_column, **kwargs) + pipeline = SigPro(transformations, aggregations, + values_column_name=signal_column, **kwargs) processed_df, f_cols = pipeline.process_signal( signal_df, @@ -54,7 +55,6 @@ def process_signals(es, signal_dataframe_name, signal_column, transformations, a signal_dataframe_name, time_index=time_index, index='_index') - else: df_name = '{}_processed'.format(signal_df.ww.name) diff --git a/zephyr_ml/labeling/__init__.py b/zephyr_ml/labeling/__init__.py index 72b2014..60cb5d7 100644 --- a/zephyr_ml/labeling/__init__.py +++ b/zephyr_ml/labeling/__init__.py @@ -1,11 +1,7 @@ from zephyr_ml.labeling import utils from zephyr_ml.labeling.data_labeler import DataLabeler from zephyr_ml.labeling.labeling_functions import ( - brake_pad_presence, - converter_replacement_presence, - gearbox_replace_presence, - total_power_loss, -) + brake_pad_presence, converter_replacement_presence, gearbox_replace_presence, total_power_loss) LABELING_FUNCTIONS = [ brake_pad_presence, @@ -28,7 +24,8 @@ def get_labeling_functions(): functions = {} for function in LABELING_FUNCTIONS: name = function.__name__ - functions[name] = {"obj": function, "desc": function.__doc__.split("\n")[0]} + functions[name] = {"obj": function, "desc": function.__doc__.split("\n")[ + 0]} return functions diff --git a/zephyr_ml/labeling/labeling_functions/brake_pad_presence.py b/zephyr_ml/labeling/labeling_functions/brake_pad_presence.py index bcaf7ee..fe32c33 100644 --- a/zephyr_ml/labeling/labeling_functions/brake_pad_presence.py +++ b/zephyr_ml/labeling/labeling_functions/brake_pad_presence.py @@ -36,7 +36,8 @@ def label(ds, **kwargs): a = ds[comments] a = a.fillna('') a = a.str.lower() - f = any(a.apply(lambda d: ('brake' in d) and ('pad' in d) and ('yaw' not in d))) + f = any(a.apply(lambda d: ('brake' in d) + and ('pad' in d) and ('yaw' not in d))) return f meta = { diff --git a/zephyr_ml/labeling/labeling_functions/planet_bearing.py b/zephyr_ml/labeling/labeling_functions/planet_bearing.py index 283e5cd..36a5412 100644 --- a/zephyr_ml/labeling/labeling_functions/planet_bearing.py +++ b/zephyr_ml/labeling/labeling_functions/planet_bearing.py @@ -35,7 +35,8 @@ def gearbox_replace_presence(es, column_map={}): def label(ds, **kwargs): label_strings = ['Gearbox replace*', 'Gearbox exchange'] comments_lower = ds[comments].fillna('').str.lower() - f = any(comments_lower.str.contains('|'.join(label_strings), case=False)) + f = any(comments_lower.str.contains( + '|'.join(label_strings), case=False)) return f meta = { diff --git a/zephyr_ml/labeling/utils.py b/zephyr_ml/labeling/utils.py index 23bf0c4..a326e9c 100644 --- a/zephyr_ml/labeling/utils.py +++ b/zephyr_ml/labeling/utils.py @@ -144,7 +144,8 @@ def categorical_function(df): """ return int(df[categorical_column].isin([value]).sum() > 0) - categorical_function.__doc__ = categorical_function.__doc__.format(categorical_column, value) + categorical_function.__doc__ = categorical_function.__doc__.format( + categorical_column, value) return categorical_function @@ -177,7 +178,8 @@ def keyword_function(df): return int(mask.sum() != 0) - keyword_function.__doc__ = keyword_function.__doc__.format(keyword, columns) + keyword_function.__doc__ = keyword_function.__doc__.format( + keyword, columns) return keyword_function @@ -203,7 +205,8 @@ def numerical_function(df): series = df[numerical_column] return int(len(series[series > threshold]) > 0) - numerical_function.__doc__ = numerical_function.__doc__.format(numerical_column, threshold) + numerical_function.__doc__ = numerical_function.__doc__.format( + numerical_column, threshold) return numerical_function @@ -228,5 +231,6 @@ def duration_function(df): """ return ((df[end_time] - df[start_time]).dt.total_seconds()).sum() - duration_function.__doc__ = duration_function.__doc__.format(start_time, end_time) + duration_function.__doc__ = duration_function.__doc__.format( + start_time, end_time) return duration_function diff --git a/zephyr_ml/primitives/postprocessing.py b/zephyr_ml/primitives/postprocessing.py index 4e47bfe..2ae0af1 100644 --- a/zephyr_ml/primitives/postprocessing.py +++ b/zephyr_ml/primitives/postprocessing.py @@ -4,11 +4,11 @@ import logging +import matplotlib.pyplot as plt import numpy as np +import seaborn as sns import sklearn from sklearn import metrics -import seaborn as sns -import matplotlib.pyplot as plt LOGGER = logging.getLogger(__name__) @@ -97,7 +97,7 @@ def confusion_matrix( ) fig = plt.figure() ax = fig.add_axes(sns.heatmap(conf_matrix, annot=True, cmap="Blues")) - + ax.set_title("Confusion Matrix\n") ax.set_xlabel("\nPredicted Values") ax.set_ylabel("Actual Values") @@ -131,7 +131,7 @@ def roc_auc_score_and_curve( auc = metrics.roc_auc_score(y_true, y_proba) fig, ax = plt.subplots(1, 1) - + ax.plot(fpr, tpr, "ro") ax.plot(fpr, tpr) ax.plot(ns_fpr, ns_tpr, linestyle="--", color="green") @@ -139,6 +139,5 @@ def roc_auc_score_and_curve( ax.set_ylabel("True Positive Rate") ax.set_xlabel("False Positive Rate") ax.set_title("AUC: %.3f" % auc) - return auc, fig From c7190338806187978ea13da2edab23d9b9c69cf7 Mon Sep 17 00:00:00 2001 From: Raymond Pan Date: Tue, 13 May 2025 15:45:34 -0400 Subject: [PATCH 25/28] update docstrings --- zephyr_ml/core.py | 305 ++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 270 insertions(+), 35 deletions(-) diff --git a/zephyr_ml/core.py b/zephyr_ml/core.py index b04615a..7dda038 100644 --- a/zephyr_ml/core.py +++ b/zephyr_ml/core.py @@ -249,8 +249,15 @@ def guided_step(self, *method_args, **method_kwargs): class Zephyr: + """Zephyr Class. + + The Zephyr Class supports all the steps of the predictive engineering workflow + for wind farm operations data. It manages user state and handles entityset creation, labeling, + feature engineering, model training and evaluation. + """ def __init__(self): + """Initialize a new Zephyr instance.""" self._entityset = None self._label_times = None @@ -288,9 +295,11 @@ def __init__(self): self.guide_handler = GuideHandler(step_order, set_methods) def GET_ENTITYSET_TYPES(self): - """ - Returns the supported entityset types (PI/SCADA/Vibrations) - and the required dataframes and their columns + """Get the supported entityset types and their required dataframes/columns. + + Returns: + dict: A dictionary mapping entityset types (PI/SCADA/Vibrations) to their + descriptions and value. """ info_map = {} for es_type, val_fn in VALIDATE_DATA_FUNCTIONS.items(): @@ -300,9 +309,21 @@ def GET_ENTITYSET_TYPES(self): return info_map def GET_LABELING_FUNCTIONS(self): + """Get the available predefined labeling functions. + + Returns: + dict: A dictionary mapping labeling function names to their + descriptions and implementations. + """ return get_labeling_functions() def GET_EVALUATION_METRICS(self): + """Get the available evaluation metrics. + + Returns: + dict: A dictionary mapping metric names to their descriptions + and MLBlock instances. + """ info_map = {} for metric in DEFAULT_METRICS: primitive = self._get_ml_primitive(metric) @@ -323,18 +344,26 @@ def generate_entityset( signal_window_size=None, signal_replace_dataframe=False, **sigpro_kwargs): - """ - Generate an entityset + """Generate an entityset from input dataframes with optional signal processing. Args: - dfs ( dict ): Dictionary mapping entity names to the pandas - dataframe for that that entity - es_type (str): type of signal data , either SCADA or PI - custom_kwargs_mapping ( dict ): Updated keyword arguments to be used - during entityset creation + dfs (dict): Dictionary mapping entity names to pandas DataFrames. + es_type (str): Type of signal data, either 'SCADA' or 'PI'. + custom_kwargs_mapping (dict, optional): Custom keyword arguments + for entityset creation. + signal_dataframe_name (str, optional): Name of dataframe containing + signal data to process. + signal_column (str, optional): Name of column containing signal values to process. + signal_transformations (list[dict], optional): List of transformation + primitives to apply. + signal_aggregations (list[dict], optional): List of aggregation primitives to apply. + signal_window_size (str, optional): Size of window for signal binning (e.g. '1h'). + signal_replace_dataframe (bool, optional): Whether to replace + original signal dataframe. + **sigpro_kwargs: Additional keyword arguments for signal processing. + Returns: - featuretools.EntitySet that contains the data passed in and - their relationships + featuretools.EntitySet: EntitySet containing the processed data and relationships. """ entityset = _create_entityset(dfs, es_type, custom_kwargs_mapping) @@ -358,12 +387,19 @@ def generate_entityset( return self._entityset @guide - def set_entityset( - self, - entityset=None, - es_type=None, - entityset_path=None, - custom_kwargs_mapping=None): + def set_entityset(self, entityset=None, es_type=None, entityset_path=None, + custom_kwargs_mapping=None): + """Set the entityset for this Zephyr instance. + + Args: + entityset (featuretools.EntitySet, optional): An existing entityset to use. + es_type (str, optional): The type of entityset (pi/scada/vibrations). + entityset_path (str, optional): Path to a saved entityset to load. + custom_kwargs_mapping (dict, optional): Custom keyword arguments for validation. + + Raises: + ValueError: If no entityset is provided through any of the parameters. + """ if entityset_path is not None: entityset = ft.read_entityset(entityset_path) @@ -382,6 +418,14 @@ def set_entityset( @guide def get_entityset(self): + """Get the current entityset. + + Returns: + featuretools.EntitySet: The current entityset. + + Raises: + ValueError: If no entityset has been set. + """ if self._entityset is None: raise ValueError( "No entityset has been created or set in this instance.") @@ -390,8 +434,48 @@ def get_entityset(self): @guide def generate_label_times( - self, labeling_fn, num_samples=-1, subset=None, column_map={}, verbose=False, **kwargs + self, labeling_fn, num_samples=-1, subset=None, column_map={}, verbose=False, thresh=None, + window_size=None, minimum_data=None, maximum_data=None, gap=None, drop_empty=True, **kwargs ): + """Generate label times using a labeling function. + + This method applies a labeling function to the entityset to generate labels at specific + timestamps. The labeling function can be either a predefined one (specified by name) or + a custom callable. + + Args: + labeling_fn (callable or str): Either a custom labeling function or the + name of a predefined function (e.g. 'brake_pad_presence'). + Predefined functions like brake_pad_presence analyze specific patterns + in the data (e.g. brake pad mentions in stoppage comments) and + return a tuple containing: + 1) A label generation function that processes data slices + 2) A denormalized dataframe containing the source data + 3) Metadata about the labeling process (e.g. target entity, time index) + num_samples (int, optional): Number of samples to generate. -1 for all. Defaults to -1. + subset (int or float, optional): Number or fraction of samples to randomly select. + column_map (dict, optional): Mapping of column names for the labeling function. + verbose (bool, optional): Whether to display progress. Defaults to False. + thresh (float, optional): Threshold for label binarization. If None, tries to + use threshold value from labeling function metadata, if any. + window_size (str, optional): Size of the window for label generation (e.g. '1h'). + If None, tries to use window size value from labeling function metadata, if any. + minimum_data (str, optional): Minimum data required before cutoff time. + maximum_data (str, optional): Maximum data required after cutoff time. + gap (str, optional): Minimum gap between consecutive labels. + drop_empty (bool, optional): Whether to drop windows with no events. Defaults to True. + **kwargs: Additional arguments passed to the label generation function. + + Returns: + tuple: (composeml.LabelTimes, dict) The generated label times and metadata. + Label times contain the generated labels at specific timestamps. + Metadata contains information about the labeling process. + + Raises: + ValueError: If labeling_fn is a string but not a recognized predefined function. + AssertionError: If entityset has not been generated or set or labeling_fn is + not a string and not callable. + """ assert self._entityset is not None, "entityset has not been set" if isinstance(labeling_fn, str): # get predefined labeling function @@ -415,8 +499,10 @@ def generate_label_times( target_entity_index = meta.get("target_entity_index") time_index = meta.get("time_index") - thresh = kwargs.get("thresh") or meta.get("thresh") - window_size = kwargs.get("window_size") or meta.get("window_size") + thresh = meta.get("thresh") if thresh is None else thresh + window_size = meta.get( + "window_size") if window_size is None else window_size + label_maker = cp.LabelMaker( labeling_function=labeling_function, target_dataframe_name=target_entity_index, @@ -431,7 +517,8 @@ def generate_label_times( if kwargs.get(k) is not None } label_times = label_maker.search( - data.sort_values(time_index), num_samples, verbose=verbose, **kwargs + data.sort_values(time_index), num_samples, minimum_data=minimum_data, + maximum_data=maximum_data, gap=gap, drop_empty=drop_empty, verbose=verbose, **kwargs ) if thresh is not None: label_times = label_times.threshold(thresh) @@ -444,13 +531,28 @@ def generate_label_times( @guide def set_label_times(self, label_times, label_col_name, meta=None): + """Set the label times for this Zephyr instance. + + Args: + label_times (composeml.LabelTimes): Label times. + label_col_name (str): Name of the label column. + meta (dict, optional): Additional metadata about the labels. + """ assert (isinstance(label_times, cp.LabelTimes)) self._label_times = label_times self._label_col_name = label_col_name self._label_times_meta = meta @guide - def get_label_times(self, visualize=True): + def get_label_times(self, visualize=False): + """Get the current label times. + + Args: + visualize (bool, optional): Whether to display a distribution plot. Defaults to False. + + Returns: + tuple: (composeml.LabelTimes, dict) The label times and metadata. + """ if visualize: cp.label_times.plots.LabelPlots(self._label_times).distribution() return self._label_times, self._label_times_meta @@ -485,12 +587,10 @@ def generate_feature_matrix( return_types=None, progress_callback=None, include_cutoff_time=True, - add_interesting_values=False, max_interesting_values=5, interesting_dataframe_name=None, interesting_values=None, - signal_dataframe_name=None, signal_column=None, signal_transformations=None, @@ -498,7 +598,56 @@ def generate_feature_matrix( signal_window_size=None, signal_replace_dataframe=False, **sigpro_kwargs): + """Generate a feature matrix using automated feature engineering. + Note that this method creates a deepcopy + of the generated or set entityset within the Zephyr instance + before performing any signal processing or feature generation. + Args: + target_dataframe_name (str, optional): Name of target entity for feature engineering. + instance_ids (list, optional): List of specific instances to generate features for. + agg_primitives (list, optional): Aggregation primitives to apply. + trans_primitives (list, optional): Transform primitives to apply. + groupby_trans_primitives (list, optional): Groupby transform primitives to apply. + allowed_paths (list, optional): Allowed entity paths for feature generation. + max_depth (int, optional): Maximum allowed depth of entity relationships. + Defaults to 2. + ignore_dataframes (list, optional): Dataframes to ignore during feature generation. + ignore_columns (dict, optional): Columns to ignore per dataframe. + primitive_options (dict, optional): Options for specific primitives. + seed_features (list, optional): Seed features to begin with. + drop_contains (list, optional): Drop features containing these substrings. + drop_exact (list, optional): Drop features exactly matching these names. + where_primitives (list, optional): Primitives to use in where clauses. + max_features (int, optional): Maximum number of features to return. -1 for all. + cutoff_time_in_index (bool, optional): Include cutoff time in the index. + save_progress (str, optional): Path to save progress. + features_only (bool, optional): Return only features without calculating values. + training_window (str, optional): Data window to use for training. + approximate (str, optional): Approximation method to use. + chunk_size (int, optional): Size of chunks for parallel processing. + n_jobs (int, optional): Number of parallel jobs. Defaults to 1. + dask_kwargs (dict, optional): Arguments for dask computation. + verbose (bool, optional): Whether to display progress. Defaults to False. + return_types (list, optional): Types of features to return. + progress_callback (callable, optional): Callback for progress updates. + include_cutoff_time (bool, optional): Include cutoff time features. Defaults to True. + add_interesting_values (bool, optional): Add interesting values. Defaults to False. + max_interesting_values (int, optional): Maximum interesting values per column. + interesting_dataframe_name (str, optional): Dataframe for interesting values. + interesting_values (dict, optional): Pre-defined interesting values. + signal_dataframe_name (str, optional): Name of dataframe containing signal data. + signal_column (str, optional): Name of column containing signal values. + signal_transformations (list, optional): Signal transformations to apply. + signal_aggregations (list, optional): Signal aggregations to apply. + signal_window_size (str, optional): Window size for signal processing. + signal_replace_dataframe (bool, optional): Replace original signal dataframe. + **sigpro_kwargs: Additional arguments for signal processing. + + Returns: + tuple: (pd.DataFrame, list, featuretools.EntitySet) + Feature matrix, feature definitions, and the processed entityset. + """ entityset_copy = copy.deepcopy(self._entityset) # perform signal processing if signal_dataframe_name is not None and signal_column is not None: @@ -552,12 +701,27 @@ def generate_feature_matrix( @guide def get_feature_matrix(self): + """Get the current feature matrix. + + Returns: + tuple: (pd.DataFrame, str, list) The feature matrix, label column name, + and feature definitions. + """ return self._feature_matrix, self._label_col_name, self._features @guide def set_feature_matrix(self, feature_matrix, labels=None, label_col_name="label"): + """Set the feature matrix for this Zephyr instance. + + Args: + feature_matrix (pd.DataFrame): The feature matrix to use. + labels (array-like, optional): Labels to add to the feature matrix. + label_col_name (str, optional): Name of the label column. Defaults to "label". + """ assert isinstance(feature_matrix, pd.DataFrame) and ( - labels is not None or label_col_name in feature_matrix.columns) + labels is not None or + label_col_name in feature_matrix.columns + ) if labels is not None: feature_matrix[label_col_name] = labels self._feature_matrix = self._clean_feature_matrix( @@ -574,6 +738,20 @@ def generate_train_test_split( shuffle=True, stratify=False, ): + """Generate a train-test split of the feature matrix. + + Args: + test_size (float or int, optional): Proportion or absolute size of test set. + train_size (float or int, optional): Proportion or absolute size of training set. + random_state (int, optional): Random seed for reproducibility. + shuffle (bool, optional): Whether to shuffle before splitting. Defaults to True. + stratify (bool or list, optional): Whether to maintain label distribution. + If True, uses labels for stratification. If list, uses those columns. + Defaults to False. + + Returns: + tuple: (X_train, X_test, y_train, y_test) The split feature matrices and labels. + """ feature_matrix = self._feature_matrix.copy() labels = feature_matrix.pop(self._label_col_name) @@ -602,6 +780,14 @@ def generate_train_test_split( @guide def set_train_test_split(self, X_train, X_test, y_train, y_test): + """Set the train-test split for this Zephyr instance. + + Args: + X_train (pd.DataFrame): Training features. + X_test (pd.DataFrame): Testing features. + y_train (array-like): Training labels. + y_test (array-like): Testing labels. + """ self._X_train = X_train self._X_test = X_test self._y_train = y_train @@ -609,6 +795,11 @@ def set_train_test_split(self, X_train, X_test, y_train, y_test): @guide def get_train_test_split(self): + """Get the current train-test split. + + Returns: + tuple or None: (X_train, X_test, y_train, y_test) if split exists, None otherwise. + """ if (self._X_train is None or self._X_test is None or self._y_train is None or self._y_test is None): return None @@ -616,13 +807,34 @@ def get_train_test_split(self): @guide def set_fitted_pipeline(self, pipeline): + """Set a fitted pipeline for this Zephyr instance. + + Args: + pipeline (MLPipeline): The fitted pipeline to use. + """ self._pipeline = pipeline @guide def fit_pipeline( self, pipeline="xgb_classifier", pipeline_hyperparameters=None, X=None, y=None, visual=False, **kwargs): - # kwargs indicate the parameters of the current pipeline + """Fit a machine learning pipeline. + + Args: + pipeline (str or dict or MLPipeline, optional): Pipeline to use. Can be: + - Name of a registered pipeline (default: "xgb_classifier") + - Path to a JSON pipeline specification + - Dictionary with pipeline specification + - MLPipeline instance + pipeline_hyperparameters (dict, optional): Hyperparameters for the pipeline. + X (pd.DataFrame, optional): Training features. If None, uses stored training set. + y (array-like, optional): Training labels. If None, uses stored training labels. + visual (bool, optional): Whether to return visualization data. Defaults to False. + **kwargs: Additional arguments passed to the pipeline's fit method. + + Returns: + dict or None: If visual=True, returns visualization data dictionary. + """ self._pipeline = self._get_mlpipeline( pipeline, pipeline_hyperparameters) @@ -643,10 +855,25 @@ def fit_pipeline( @guide def get_fitted_pipeline(self): + """Get the current fitted pipeline. + + Returns: + MLPipeline: The current fitted pipeline. + """ return self._pipeline @guide def predict(self, X=None, visual=False, **kwargs): + """Make predictions using the fitted pipeline. + + Args: + X (pd.DataFrame, optional): Features to predict on. If None, uses test set. + visual (bool, optional): Whether to return visualization data. Defaults to False. + **kwargs: Additional arguments passed to the pipeline's predict method. + + Returns: + array-like or tuple: Predictions, and if visual=True, also returns visualization data. + """ if X is None: X = self._X_test if visual: @@ -663,14 +890,22 @@ def predict(self, X=None, visual=False, **kwargs): @guide def evaluate( - self, - X=None, - y=None, - metrics=None, - global_args=None, - local_args=None, - global_mapping=None, - local_mapping=None): + self, X=None, y=None, metrics=None, global_args=None, + local_args=None, global_mapping=None, local_mapping=None): + """Evaluate the fitted pipeline's performance. + + Args: + X (pd.DataFrame, optional): Features to evaluate on. If None, uses test set. + y (array-like, optional): True labels. If None, uses test labels. + metrics (list, optional): Metrics to compute. If None, uses DEFAULT_METRICS. + global_args (dict, optional): Arguments passed to all metrics. + local_args (dict, optional): Arguments passed to specific metrics. + global_mapping (dict, optional): Mapping applied to all metric inputs. + local_mapping (dict, optional): Mapping applied to specific metric inputs. + + Returns: + dict: A dictionary mapping metric names to their computed values. + """ if X is None: X = self._X_test if y is None: From ee2efdaacf68e2611cb47be6560119bd93190d2e Mon Sep 17 00:00:00 2001 From: Raymond Pan Date: Wed, 14 May 2025 16:49:21 -0400 Subject: [PATCH 26/28] update guide handler w/ new inconsistency and stale definitions --- zephyr_ml/core.py | 92 ++++++++++++++++++++++++++++------------------- 1 file changed, 55 insertions(+), 37 deletions(-) diff --git a/zephyr_ml/core.py b/zephyr_ml/core.py index 7dda038..ed60de2 100644 --- a/zephyr_ml/core.py +++ b/zephyr_ml/core.py @@ -40,10 +40,8 @@ def __init__(self, producers_and_getters, set_methods): self.getter_to_step_map = {} self.terms = [] - self.skipped = [] for idx, (producers, getters) in enumerate(self.producers_and_getters): self.terms.append(-1) - self.skipped.append(False) for prod in producers: self.producer_to_step_map[prod.__name__] = idx @@ -96,25 +94,13 @@ def perform_producer_step(self, zephyr, method, *method_args, **method_kwargs): def try_log_skipping_steps_warning(self, name, next_step): steps_skipped = self.get_steps_in_between(self.current_step, next_step) if len(steps_skipped) > 0: - for step in range(self.current_step + 1, next_step): - self.skipped[step] = True necc_steps = self.join_steps(steps_skipped) LOGGER.warning( f"Performing {name}. You are skipping the following steps:\n{necc_steps}") - def try_log_using_stale_warning(self, name, next_step): - latest_up_to_date = self.get_last_up_to_date(next_step) - steps_needed = self.get_steps_in_between( - latest_up_to_date - 1, next_step) - if len(steps_needed) > 0: - LOGGER.warning(f"Performing {name}. You are in a stale state and \ - using potentially stale data to perform this step. \ - Re-run the following steps to return to a present state:\n: \ - {steps_needed}") - def try_log_making_stale_warning(self, name, next_step): next_next_step = next_step + 1 - prod_steps = f"{next_next_step}. \ + prod_steps = f"step {next_next_step}: \ {' or '.join(self.producers_and_getters[next_next_step][0])}" # add later set methods get_steps = self.join_steps( @@ -124,17 +110,7 @@ def try_log_making_stale_warning(self, name, next_step): LOGGER.warning(f"Performing {name}. You are beginning a new iteration.\ Any data returned by the following get methods will be \ considered stale:\n{get_steps}. To continue with this \ - iteration, please perform:\n{prod_steps}") - - # stale must be before b/c user must have regressed with progress that contains skips - # return set method, and next possible up to date key method - def try_log_inconsistent_warning(self, name, next_step): - set_method_str = self.producers_and_getters[next_step][0][1].__name__ - latest_up_to_date = self.get_last_up_to_date(next_step) - LOGGER.warning(f"Unable to perform {name} because some steps have been\ - skipped. You can call the corresponding set method: \ - {set_method_str} or re run steps starting at or before \ - {latest_up_to_date}") + iteration, please perform \n{prod_steps}") def log_get_inconsistent_warning(self, name, next_step): prod_steps = f"{next_step}. \ @@ -169,6 +145,7 @@ def try_perform_backward_producer_step(self, zephyr, method, *method_args, **met next_step = self.producer_to_step_map[name] self.try_log_making_stale_warning(next_step) self.cur_term += 1 + # mark everything prior to next step as current term for i in range(0, next_step): if self.terms[i] != -1: self.terms[i] = self.cur_term @@ -190,17 +167,51 @@ def try_perform_producer_step(self, zephyr, method, *method_args, **method_kwarg # dont update current step or terms - def try_perform_stale_or_inconsistent_producer_step( + def try_perform_inconsistent_producer_step( # add using stale and overwriting self, zephyr, method, *method_args, **method_kwargs): name = method.__name__ next_step = self.producer_to_step_map[name] - if self.terms[next_step - 1] == -1: # inconsistent - self.try_log_inconsistent_warning(name, next_step) - else: - self.try_log_using_stale_warning(name, next_step) - res = self.perform_producer_step( - zephyr, method, *method_args, **method_kwargs) - return res + # inconsistent forward step: performing key method but previous step is not up to date + if next_step >= self.current_step and self.terms[next_step-1] != self.cur_term: + corr_set_method = self.producers_and_getters[next_step][0][1].__name__ + prev_step = next_step-1 + prev_set_method = self.producers_and_getters[prev_step][0][1].__name__ + prev_key_method = self.producers_and_getters[prev_step][0][0].__name__ + LOGGER.warning(f"Unable to perform {name} because you are performing a key method at\ + step {next_step} but the result of the previous step, \ + step {prev_step}, is not up to date.\ + If you already have the data for step {next_step}, \ + you can use the corresponding set method: {corr_set_method}.\ + Otherwise, please perform step {prev_step} \ + with {prev_key_method} or {prev_set_method}.") + # inconsistent backward step: performing set method at nonzero step + elif next_step < self.current_step and name in self.set_method: + first_set_method = self.producers_and_getters[0][0][1].__name__ + corr_key_method = self.producers_and_getters[next_step][0][0].__name__ + LOGGER.warning(f"Unable to perform {name} because you are going backwards \ + and performing step {next_step} with a set method.\ + You can only perform a backwards step with a set \ + method at step 0: {first_set_method}.\ + If you would like to perform step {next_step}, \ + please use the corresponding key method: {corr_key_method}.") + # inconsistent backward step: performing key method but previous step is not up to date + elif next_step < self.current_step and self.terms[next_step-1] != self.cur_term: + prev_step = next_step-1 + prev_key_method = self.producers_and_getters[prev_step][0][0].__name__ + corr_set_method = self.producers_and_getters[next_step][0][1].__name__ + LOGGER.warning(f"Unable to perform {name} because you are going \ + backwards and starting a new iteration by\ + performing a key method at step {next_step} \ + but the result of the previous step,\ + step {prev_step}, is not up to date.\ + Please perform step {prev_step} with {prev_key_method} first.\ + If you already have the data for \ + step {next_step} from the previous iteration,\ + re-performing {prev_key_method} with the same \ + arguments should generate the same result.\ + Otherwise, if the data is unrelated, \ + please create a new Zephyr instance\ + and use its {corr_set_method} method.") def try_perform_getter_step(self, zephyr, method, *method_args, **method_kwargs): name = method.__name__ @@ -222,13 +233,20 @@ def guide_step(self, zephyr, method, *method_args, **method_kwargs): if method_name in self.producer_to_step_map: # up-todate next_step = self.producer_to_step_map[method_name] - if (method_name in self.set_methods or next_step == 0 or - self.terms[next_step - 1] == self.cur_term): + if (next_step == 0 or # 0 step always valid, starting new iteration + # forward step only valid if set method or key method w/ no skips + (next_step >= self.current_step and + (method_name in self.set_methods or + self.terms[next_step - 1] == self.cur_term)) or + # backward step only valid if key method w/ previous step up to date + (next_step < self.current_step and + (method_name not in self.set_methods and + self.terms[next_step - 1] == self.cur_term))): res = self.try_perform_producer_step( zephyr, method, *method_args, **method_kwargs) return res else: # stale or inconsistent - res = self.try_perform_stale_or_inconsistent_producer_step( + res = self.try_perform_inconsistent_producer_step( zephyr, method, *method_args, **method_kwargs) return res elif method_name in self.getter_to_step_map: From 5d9bf1c538a2d6c76461d059e3d7215009cbb2ba Mon Sep 17 00:00:00 2001 From: Raymond Pan Date: Thu, 15 May 2025 11:42:19 -0400 Subject: [PATCH 27/28] changes to guidehandler w/ no rules for set methods --- zephyr_ml/core.py | 69 ++++++++++++++++++++++++++--------------------- 1 file changed, 38 insertions(+), 31 deletions(-) diff --git a/zephyr_ml/core.py b/zephyr_ml/core.py index ed60de2..e893762 100644 --- a/zephyr_ml/core.py +++ b/zephyr_ml/core.py @@ -33,6 +33,7 @@ class GuideHandler: def __init__(self, producers_and_getters, set_methods): self.cur_term = 0 self.current_step = -1 + self.start_point = 0 self.producers_and_getters = producers_and_getters self.set_methods = set_methods @@ -131,8 +132,10 @@ def log_get_stale_warning(self, name, next_step): def try_perform_forward_producer_step(self, zephyr, method, *method_args, **method_kwargs): name = method.__name__ next_step = self.producer_to_step_map[name] - if name in self.set_methods: + if name in self.set_methods: # set method will update start point and start new iteration self.try_log_skipping_steps_warning(name, next_step) + self.start_point = next_step + self.cur_term += 1 # next_step == 0, set method (already warned), or previous step is up to term res = self.perform_producer_step( zephyr, method, *method_args, **method_kwargs) @@ -143,12 +146,17 @@ def try_perform_forward_producer_step(self, zephyr, method, *method_args, **meth def try_perform_backward_producer_step(self, zephyr, method, *method_args, **method_kwargs): name = method.__name__ next_step = self.producer_to_step_map[name] - self.try_log_making_stale_warning(next_step) + # starting new iteration self.cur_term += 1 - # mark everything prior to next step as current term - for i in range(0, next_step): - if self.terms[i] != -1: - self.terms[i] = self.cur_term + if next_step == 0 or name in self.set_methods: + self.start_point = next_step + else: # key method + # mark everything from start point to next step as current term + for i in range(self.start_point, next_step): + if self.terms[i] != -1: + self.terms[i] = self.cur_term + + self.try_log_making_stale_warning(next_step) res = self.perform_producer_step( zephyr, method, *method_args, **method_kwargs) return res @@ -185,33 +193,35 @@ def try_perform_inconsistent_producer_step( # add using stale and overwriting Otherwise, please perform step {prev_step} \ with {prev_key_method} or {prev_set_method}.") # inconsistent backward step: performing set method at nonzero step - elif next_step < self.current_step and name in self.set_method: - first_set_method = self.producers_and_getters[0][0][1].__name__ - corr_key_method = self.producers_and_getters[next_step][0][0].__name__ - LOGGER.warning(f"Unable to perform {name} because you are going backwards \ - and performing step {next_step} with a set method.\ - You can only perform a backwards step with a set \ - method at step 0: {first_set_method}.\ - If you would like to perform step {next_step}, \ - please use the corresponding key method: {corr_key_method}.") + # elif next_step < self.current_step and name in self.set_method: + # first_set_method = self.producers_and_getters[0][0][1].__name__ + # corr_key_method = self.producers_and_getters[next_step][0][0].__name__ + # LOGGER.warning(f"Unable to perform {name} because you are going backwards \ + # and performing step {next_step} with a set method.\ + # You can only perform a backwards step with a set \ + # method at step 0: {first_set_method}.\ + # If you would like to perform step {next_step}, \ + # please use the corresponding key method: {corr_key_method}.") # inconsistent backward step: performing key method but previous step is not up to date elif next_step < self.current_step and self.terms[next_step-1] != self.cur_term: prev_step = next_step-1 prev_key_method = self.producers_and_getters[prev_step][0][0].__name__ corr_set_method = self.producers_and_getters[next_step][0][1].__name__ + prev_get_method = self.producers_and_getters[prev_step][1][0].__name__ + prev_set_method = self.producers_and_getters[prev_step][0][1].__name__ LOGGER.warning(f"Unable to perform {name} because you are going \ backwards and starting a new iteration by\ performing a key method at step {next_step} \ but the result of the previous step,\ - step {prev_step}, is not up to date.\ - Please perform step {prev_step} with {prev_key_method} first.\ - If you already have the data for \ - step {next_step} from the previous iteration,\ - re-performing {prev_key_method} with the same \ - arguments should generate the same result.\ - Otherwise, if the data is unrelated, \ - please create a new Zephyr instance\ - and use its {corr_set_method} method.") + step {prev_step}, is STALE.\ + If you want to use the STALE result of the PREVIOUS step, \ + you can call {prev_get_method} to get the data, then\ + {prev_set_method} to set the data, and then recall this method.\ + If you want to regenerate the data of the PREVIOUS step, \ + please call {prev_key_method}, and then recall this method.\ + If you already have the data for THIS step, you can \ + call {corr_set_method} to set the data.\ + ") def try_perform_getter_step(self, zephyr, method, *method_args, **method_kwargs): name = method.__name__ @@ -234,14 +244,11 @@ def guide_step(self, zephyr, method, *method_args, **method_kwargs): # up-todate next_step = self.producer_to_step_map[method_name] if (next_step == 0 or # 0 step always valid, starting new iteration + # set method always valid, but will update start point and start new iteration + method_name in self.set_methods or + # key method valid if previous step is up to date + self.terms[next_step-1] == self.cur_term): # forward step only valid if set method or key method w/ no skips - (next_step >= self.current_step and - (method_name in self.set_methods or - self.terms[next_step - 1] == self.cur_term)) or - # backward step only valid if key method w/ previous step up to date - (next_step < self.current_step and - (method_name not in self.set_methods and - self.terms[next_step - 1] == self.cur_term))): res = self.try_perform_producer_step( zephyr, method, *method_args, **method_kwargs) return res From 154e28054f6174655cc91fc21a503eeaa165b462 Mon Sep 17 00:00:00 2001 From: Raymond Pan Date: Thu, 15 May 2025 14:23:34 -0400 Subject: [PATCH 28/28] start point initallly -1 --- zephyr_ml/core.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/zephyr_ml/core.py b/zephyr_ml/core.py index e893762..309e08b 100644 --- a/zephyr_ml/core.py +++ b/zephyr_ml/core.py @@ -33,7 +33,7 @@ class GuideHandler: def __init__(self, producers_and_getters, set_methods): self.cur_term = 0 self.current_step = -1 - self.start_point = 0 + self.start_point = -1 self.producers_and_getters = producers_and_getters self.set_methods = set_methods @@ -187,7 +187,7 @@ def try_perform_inconsistent_producer_step( # add using stale and overwriting prev_key_method = self.producers_and_getters[prev_step][0][0].__name__ LOGGER.warning(f"Unable to perform {name} because you are performing a key method at\ step {next_step} but the result of the previous step, \ - step {prev_step}, is not up to date.\ + step {prev_step}, is STALE.\ If you already have the data for step {next_step}, \ you can use the corresponding set method: {corr_set_method}.\ Otherwise, please perform step {prev_step} \