From 99275eaed0763cff8f58c8c8f15dc9d52c09db46 Mon Sep 17 00:00:00 2001 From: "Kamil A. Kaczmarek" Date: Fri, 5 Oct 2018 16:59:01 +0200 Subject: [PATCH] Revert "removed suffixes (#110)" (#111) This reverts commit 520a17ad9abb49506774dcd29435d2cc14931847. --- docs/conf.py | 2 +- setup.py | 4 +-- steppy/base.py | 82 +++++++++++++++++++++++++++++--------------------- 3 files changed, 51 insertions(+), 37 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 56efcad..706be9b 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -26,7 +26,7 @@ # The short X.Y version version = '0.1' # The full version, including alpha/beta/rc tags -release = '0.1.12' +release = '0.1.11' # -- General configuration --------------------------------------------------- diff --git a/setup.py b/setup.py index a1802cd..3e73a73 100644 --- a/setup.py +++ b/setup.py @@ -13,11 +13,11 @@ setup(name='steppy', packages=['steppy'], - version='0.1.12', + version='0.1.11', description='A lightweight, open-source, Python library for fast and reproducible experimentation', long_description=long_description, url='https://github.com/minerva-ml/steppy', - download_url='https://github.com/minerva-ml/steppy/archive/0.1.12.tar.gz', + download_url='https://github.com/minerva-ml/steppy/archive/0.1.11.tar.gz', author='Kamil A. Kaczmarek, Jakub Czakon', author_email='kamil.kaczmarek@neptune.ml, jakub.czakon@neptune.ml', keywords=['machine-learning', 'reproducibility', 'pipeline', 'data-science'], diff --git a/steppy/base.py b/steppy/base.py index 7739a45..8227f9f 100644 --- a/steppy/base.py +++ b/steppy/base.py @@ -18,6 +18,8 @@ 'load_persisted_output': False } +_ALL_STEPS_NAMES = list() + class Step: """Step is a building block of steppy pipelines. @@ -178,42 +180,41 @@ def __init__(self, cache_output=False, load_persisted_output=False): - self.name = self._format_step_name(name, transformer) + name = self._format_step_name(name, transformer) if experiment_directory is not None: assert isinstance(experiment_directory, str),\ 'Step {} error, experiment_directory must ' \ - 'be str, got {} instead.'.format(self.name, type(experiment_directory)) + 'be str, got {} instead.'.format(name, type(experiment_directory)) else: experiment_directory = os.path.join(os.path.expanduser("~"), '.steppy') logger.info('Using default experiment directory: {}'.format(experiment_directory)) if output_directory is not None: assert isinstance(output_directory, str),\ - 'Step {}, output_directory must be str, got {} instead'.format(self.name, type(output_directory)) + 'Step {}, output_directory must be str, got {} instead'.format(name, type(output_directory)) if input_data is not None: assert isinstance(input_data, list), 'Step {} error, input_data must be list, ' \ - 'got {} instead.'.format(self.name, type(input_data)) + 'got {} instead.'.format(name, type(input_data)) if input_steps is not None: assert isinstance(input_steps, list), 'Step {} error, input_steps must be list, ' \ - 'got {} instead.'.format(self.name, type(input_steps)) + 'got {} instead.'.format(name, type(input_steps)) if adapter is not None: assert isinstance(adapter, Adapter), 'Step {} error, adapter must be an instance ' \ - 'of {}'.format(self.name, str(Adapter)) + 'of {}'.format(name, str(Adapter)) assert isinstance(cache_output, bool), 'Step {} error, cache_output must be bool, ' \ - 'got {} instead.'.format(self.name, type(cache_output)) + 'got {} instead.'.format(name, type(cache_output)) assert isinstance(persist_output, bool), 'Step {} error, persist_output must be bool, ' \ - 'got {} instead.'.format(self.name, type(persist_output)) + 'got {} instead.'.format(name, type(persist_output)) assert isinstance(load_persisted_output, bool),\ 'Step {} error, load_persisted_output ' \ - 'must be bool, got {} instead.'.format(self.name, type(load_persisted_output)) + 'must be bool, got {} instead.'.format(name, type(load_persisted_output)) assert isinstance(force_fitting, bool), 'Step {} error, force_fitting must be bool, ' \ - 'got {} instead.'.format(self.name, type(force_fitting)) + 'got {} instead.'.format(name, type(force_fitting)) - self._validate_upstream_names() - logger.info('Initializing Step {}'.format(self.name)) + logger.info('Initializing Step {}'.format(name)) self.transformer = transformer self.output_directory = output_directory @@ -227,7 +228,11 @@ def __init__(self, self.force_fitting = force_fitting self.output = None + self.name = self._apply_suffix(name) + _ALL_STEPS_NAMES.append(self.name) + self.experiment_directory = os.path.join(experiment_directory) + self._prepare_experiment_directories() self._mode = 'train' @@ -487,7 +492,7 @@ def get_step_by_name(self, name): return self.all_upstream_steps[name] except KeyError as e: msg = 'No Step with name "{}" found. ' \ - 'You have following Steps: {}'.format(name, list(self.all_upstream_steps.keys())) + 'You have following Steps: {}'.format(name, _ALL_STEPS_NAMES) raise StepError(msg) from e def persist_upstream_structure(self): @@ -520,8 +525,9 @@ def _fit_transform_operation(self, step_inputs): try: step_output_data = self.transformer.transform(**step_inputs) except Exception as e: - msg = 'Step {}, Transformer "{}" error ' \ - 'during "transform()" operation.'.format(self.name, self.transformer.__class__.__name__) + msg = 'Step {}, Transformer "{}" error during "transform()" operation. ' \ + 'Check "Step.transformer" implementation"'.format(self.name, + self.transformer.__class__.__name__) raise StepError(msg) from e logger.info('Step {}, transforming completed'.format(self.name)) @@ -531,8 +537,9 @@ def _fit_transform_operation(self, step_inputs): try: step_output_data = self.transformer.fit_transform(**step_inputs) except Exception as e: - msg = 'Step {}, Transformer "{}" error ' \ - 'during "fit_transform()" operation.'.format(self.name, self.transformer.__class__.__name__) + msg = 'Step {}, Transformer "{}" error during "fit_transform()" operation. ' \ + 'Check "Step.transformer" implementation"'.format(self.name, + self.transformer.__class__.__name__) raise StepError(msg) from e logger.info('Step {}, fitting and transforming completed'.format(self.name)) @@ -545,8 +552,10 @@ def _fit_transform_operation(self, step_inputs): try: step_output_data = self.transformer.transform(**step_inputs) except Exception as e: - msg = 'Step {}, Transformer "{}" error ' \ - 'during "transform()" operation.'.format(self.name, self.transformer.__class__.__name__) + msg = 'Step {}, Transformer "{}" error during "transform()" operation. ' \ + 'This Transformer is not fittable. ' \ + 'Check "Step.transformer" implementation"'.format(self.name, + self.transformer.__class__.__name__) raise StepError(msg) from e logger.info('Step {}, transforming completed'.format(self.name)) @@ -570,8 +579,9 @@ def _transform_operation(self, step_inputs): try: step_output_data = self.transformer.transform(**step_inputs) except Exception as e: - msg = 'Step {}, Transformer "{}" error ' \ - 'during "transform()" operation.'.format(self.name, self.transformer.__class__.__name__) + msg = 'Step {}, Transformer "{}" error during "transform()" operation. ' \ + 'Check "Step.transformer" implementation"'.format(self.name, + self.transformer.__class__.__name__) raise StepError(msg) from e logger.info('Step {}, transforming completed'.format(self.name)) @@ -585,8 +595,10 @@ def _transform_operation(self, step_inputs): try: step_output_data = self.transformer.transform(**step_inputs) except Exception as e: - msg = 'Step {}, Transformer "{}" error ' \ - 'during "transform()" operation.'.format(self.name, self.transformer.__class__.__name__) + msg = 'Step {}, Transformer "{}" error during "transform()" operation. ' \ + 'This Transformer is not fittable. ' \ + 'Check "Step.transformer" implementation"'.format(self.name, + self.transformer.__class__.__name__) raise StepError(msg) from e logger.info('Step {}, transforming completed'.format(self.name)) @@ -640,7 +652,6 @@ def _prepare_experiment_directories(self): os.makedirs(os.path.join(self.experiment_directory, dir_name), exist_ok=True) def _get_steps(self, all_steps): - self._check_name_uniqueness(all_steps=all_steps) for input_step in self.input_steps: all_steps = input_step._get_steps(all_steps) all_steps[self.name] = self @@ -659,16 +670,19 @@ def _validate_step_name(self, name): assert isinstance(name, str) or isinstance(name, float) or isinstance(name, int),\ 'Step name must be str, float or int. Got {} instead.'.format(type(name)) - def _check_name_uniqueness(self, all_steps): - if self.name in all_steps.keys(): - raise ValueError('Step with name "{}", already exist. Assign unique Step name.'.format(self.name)) - - def _validate_upstream_names(self): - try: - _ = self.all_upstream_steps.keys() - except ValueError as e: - msg = 'Incorrect Step names' - raise StepError(msg) from e + def _apply_suffix(self, name): + """returns suffix '_k' + Where 'k' is int that denotes highest increment of step with the same name. + """ + highest_id = 0 + for x in _ALL_STEPS_NAMES: + if not x == name: + key_id = x.split('_')[-1] + key_stripped = x[:-len(key_id) - 1] + if key_stripped == name: + if int(key_id) >= highest_id: + highest_id += 1 + return '{}_{}'.format(name, highest_id) def _build_structure_dict(self, structure_dict): for input_step in self.input_steps: