From 2024748dd36c5f76c296f3ec46a5456076c57dda Mon Sep 17 00:00:00 2001 From: "Kamil A. Kaczmarek" Date: Fri, 5 Oct 2018 17:01:53 +0200 Subject: [PATCH] Dev s12 (#112) * removed suffixes * removed global _ALL_STEPS_NAMES, check upstream names by default - if two names are the same raise an error, simplified error logs --- docs/conf.py | 2 +- setup.py | 4 +-- steppy/base.py | 82 +++++++++++++++++++++----------------------------- 3 files changed, 37 insertions(+), 51 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 706be9b..56efcad 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -26,7 +26,7 @@ # The short X.Y version version = '0.1' # The full version, including alpha/beta/rc tags -release = '0.1.11' +release = '0.1.12' # -- General configuration --------------------------------------------------- diff --git a/setup.py b/setup.py index 3e73a73..a1802cd 100644 --- a/setup.py +++ b/setup.py @@ -13,11 +13,11 @@ setup(name='steppy', packages=['steppy'], - version='0.1.11', + version='0.1.12', description='A lightweight, open-source, Python library for fast and reproducible experimentation', long_description=long_description, url='https://github.com/minerva-ml/steppy', - download_url='https://github.com/minerva-ml/steppy/archive/0.1.11.tar.gz', + download_url='https://github.com/minerva-ml/steppy/archive/0.1.12.tar.gz', author='Kamil A. Kaczmarek, Jakub Czakon', author_email='kamil.kaczmarek@neptune.ml, jakub.czakon@neptune.ml', keywords=['machine-learning', 'reproducibility', 'pipeline', 'data-science'], diff --git a/steppy/base.py b/steppy/base.py index 8227f9f..7739a45 100644 --- a/steppy/base.py +++ b/steppy/base.py @@ -18,8 +18,6 @@ 'load_persisted_output': False } -_ALL_STEPS_NAMES = list() - class Step: """Step is a building block of steppy pipelines. @@ -180,41 +178,42 @@ def __init__(self, cache_output=False, load_persisted_output=False): - name = self._format_step_name(name, transformer) + self.name = self._format_step_name(name, transformer) if experiment_directory is not None: assert isinstance(experiment_directory, str),\ 'Step {} error, experiment_directory must ' \ - 'be str, got {} instead.'.format(name, type(experiment_directory)) + 'be str, got {} instead.'.format(self.name, type(experiment_directory)) else: experiment_directory = os.path.join(os.path.expanduser("~"), '.steppy') logger.info('Using default experiment directory: {}'.format(experiment_directory)) if output_directory is not None: assert isinstance(output_directory, str),\ - 'Step {}, output_directory must be str, got {} instead'.format(name, type(output_directory)) + 'Step {}, output_directory must be str, got {} instead'.format(self.name, type(output_directory)) if input_data is not None: assert isinstance(input_data, list), 'Step {} error, input_data must be list, ' \ - 'got {} instead.'.format(name, type(input_data)) + 'got {} instead.'.format(self.name, type(input_data)) if input_steps is not None: assert isinstance(input_steps, list), 'Step {} error, input_steps must be list, ' \ - 'got {} instead.'.format(name, type(input_steps)) + 'got {} instead.'.format(self.name, type(input_steps)) if adapter is not None: assert isinstance(adapter, Adapter), 'Step {} error, adapter must be an instance ' \ - 'of {}'.format(name, str(Adapter)) + 'of {}'.format(self.name, str(Adapter)) assert isinstance(cache_output, bool), 'Step {} error, cache_output must be bool, ' \ - 'got {} instead.'.format(name, type(cache_output)) + 'got {} instead.'.format(self.name, type(cache_output)) assert isinstance(persist_output, bool), 'Step {} error, persist_output must be bool, ' \ - 'got {} instead.'.format(name, type(persist_output)) + 'got {} instead.'.format(self.name, type(persist_output)) assert isinstance(load_persisted_output, bool),\ 'Step {} error, load_persisted_output ' \ - 'must be bool, got {} instead.'.format(name, type(load_persisted_output)) + 'must be bool, got {} instead.'.format(self.name, type(load_persisted_output)) assert isinstance(force_fitting, bool), 'Step {} error, force_fitting must be bool, ' \ - 'got {} instead.'.format(name, type(force_fitting)) + 'got {} instead.'.format(self.name, type(force_fitting)) - logger.info('Initializing Step {}'.format(name)) + self._validate_upstream_names() + logger.info('Initializing Step {}'.format(self.name)) self.transformer = transformer self.output_directory = output_directory @@ -228,11 +227,7 @@ def __init__(self, self.force_fitting = force_fitting self.output = None - self.name = self._apply_suffix(name) - _ALL_STEPS_NAMES.append(self.name) - self.experiment_directory = os.path.join(experiment_directory) - self._prepare_experiment_directories() self._mode = 'train' @@ -492,7 +487,7 @@ def get_step_by_name(self, name): return self.all_upstream_steps[name] except KeyError as e: msg = 'No Step with name "{}" found. ' \ - 'You have following Steps: {}'.format(name, _ALL_STEPS_NAMES) + 'You have following Steps: {}'.format(name, list(self.all_upstream_steps.keys())) raise StepError(msg) from e def persist_upstream_structure(self): @@ -525,9 +520,8 @@ def _fit_transform_operation(self, step_inputs): try: step_output_data = self.transformer.transform(**step_inputs) except Exception as e: - msg = 'Step {}, Transformer "{}" error during "transform()" operation. ' \ - 'Check "Step.transformer" implementation"'.format(self.name, - self.transformer.__class__.__name__) + msg = 'Step {}, Transformer "{}" error ' \ + 'during "transform()" operation.'.format(self.name, self.transformer.__class__.__name__) raise StepError(msg) from e logger.info('Step {}, transforming completed'.format(self.name)) @@ -537,9 +531,8 @@ def _fit_transform_operation(self, step_inputs): try: step_output_data = self.transformer.fit_transform(**step_inputs) except Exception as e: - msg = 'Step {}, Transformer "{}" error during "fit_transform()" operation. ' \ - 'Check "Step.transformer" implementation"'.format(self.name, - self.transformer.__class__.__name__) + msg = 'Step {}, Transformer "{}" error ' \ + 'during "fit_transform()" operation.'.format(self.name, self.transformer.__class__.__name__) raise StepError(msg) from e logger.info('Step {}, fitting and transforming completed'.format(self.name)) @@ -552,10 +545,8 @@ def _fit_transform_operation(self, step_inputs): try: step_output_data = self.transformer.transform(**step_inputs) except Exception as e: - msg = 'Step {}, Transformer "{}" error during "transform()" operation. ' \ - 'This Transformer is not fittable. ' \ - 'Check "Step.transformer" implementation"'.format(self.name, - self.transformer.__class__.__name__) + msg = 'Step {}, Transformer "{}" error ' \ + 'during "transform()" operation.'.format(self.name, self.transformer.__class__.__name__) raise StepError(msg) from e logger.info('Step {}, transforming completed'.format(self.name)) @@ -579,9 +570,8 @@ def _transform_operation(self, step_inputs): try: step_output_data = self.transformer.transform(**step_inputs) except Exception as e: - msg = 'Step {}, Transformer "{}" error during "transform()" operation. ' \ - 'Check "Step.transformer" implementation"'.format(self.name, - self.transformer.__class__.__name__) + msg = 'Step {}, Transformer "{}" error ' \ + 'during "transform()" operation.'.format(self.name, self.transformer.__class__.__name__) raise StepError(msg) from e logger.info('Step {}, transforming completed'.format(self.name)) @@ -595,10 +585,8 @@ def _transform_operation(self, step_inputs): try: step_output_data = self.transformer.transform(**step_inputs) except Exception as e: - msg = 'Step {}, Transformer "{}" error during "transform()" operation. ' \ - 'This Transformer is not fittable. ' \ - 'Check "Step.transformer" implementation"'.format(self.name, - self.transformer.__class__.__name__) + msg = 'Step {}, Transformer "{}" error ' \ + 'during "transform()" operation.'.format(self.name, self.transformer.__class__.__name__) raise StepError(msg) from e logger.info('Step {}, transforming completed'.format(self.name)) @@ -652,6 +640,7 @@ def _prepare_experiment_directories(self): os.makedirs(os.path.join(self.experiment_directory, dir_name), exist_ok=True) def _get_steps(self, all_steps): + self._check_name_uniqueness(all_steps=all_steps) for input_step in self.input_steps: all_steps = input_step._get_steps(all_steps) all_steps[self.name] = self @@ -670,19 +659,16 @@ def _validate_step_name(self, name): assert isinstance(name, str) or isinstance(name, float) or isinstance(name, int),\ 'Step name must be str, float or int. Got {} instead.'.format(type(name)) - def _apply_suffix(self, name): - """returns suffix '_k' - Where 'k' is int that denotes highest increment of step with the same name. - """ - highest_id = 0 - for x in _ALL_STEPS_NAMES: - if not x == name: - key_id = x.split('_')[-1] - key_stripped = x[:-len(key_id) - 1] - if key_stripped == name: - if int(key_id) >= highest_id: - highest_id += 1 - return '{}_{}'.format(name, highest_id) + def _check_name_uniqueness(self, all_steps): + if self.name in all_steps.keys(): + raise ValueError('Step with name "{}", already exist. Assign unique Step name.'.format(self.name)) + + def _validate_upstream_names(self): + try: + _ = self.all_upstream_steps.keys() + except ValueError as e: + msg = 'Incorrect Step names' + raise StepError(msg) from e def _build_structure_dict(self, structure_dict): for input_step in self.input_steps: