Skip to content
This repository has been archived by the owner on Jun 22, 2022. It is now read-only.

Commit

Permalink
Dev s12 (#112)
Browse files Browse the repository at this point in the history
* removed suffixes

* removed global _ALL_STEPS_NAMES, check upstream names by default - if two names are the same raise an error, simplified error logs
  • Loading branch information
Kamil A. Kaczmarek committed Oct 5, 2018
1 parent 99275ea commit 2024748
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 51 deletions.
2 changes: 1 addition & 1 deletion docs/conf.py
Expand Up @@ -26,7 +26,7 @@
# The short X.Y version
version = '0.1'
# The full version, including alpha/beta/rc tags
release = '0.1.11'
release = '0.1.12'


# -- General configuration ---------------------------------------------------
Expand Down
4 changes: 2 additions & 2 deletions setup.py
Expand Up @@ -13,11 +13,11 @@

setup(name='steppy',
packages=['steppy'],
version='0.1.11',
version='0.1.12',
description='A lightweight, open-source, Python library for fast and reproducible experimentation',
long_description=long_description,
url='https://github.com/minerva-ml/steppy',
download_url='https://github.com/minerva-ml/steppy/archive/0.1.11.tar.gz',
download_url='https://github.com/minerva-ml/steppy/archive/0.1.12.tar.gz',
author='Kamil A. Kaczmarek, Jakub Czakon',
author_email='kamil.kaczmarek@neptune.ml, jakub.czakon@neptune.ml',
keywords=['machine-learning', 'reproducibility', 'pipeline', 'data-science'],
Expand Down
82 changes: 34 additions & 48 deletions steppy/base.py
Expand Up @@ -18,8 +18,6 @@
'load_persisted_output': False
}

_ALL_STEPS_NAMES = list()


class Step:
"""Step is a building block of steppy pipelines.
Expand Down Expand Up @@ -180,41 +178,42 @@ def __init__(self,
cache_output=False,
load_persisted_output=False):

name = self._format_step_name(name, transformer)
self.name = self._format_step_name(name, transformer)

if experiment_directory is not None:
assert isinstance(experiment_directory, str),\
'Step {} error, experiment_directory must ' \
'be str, got {} instead.'.format(name, type(experiment_directory))
'be str, got {} instead.'.format(self.name, type(experiment_directory))
else:
experiment_directory = os.path.join(os.path.expanduser("~"), '.steppy')
logger.info('Using default experiment directory: {}'.format(experiment_directory))

if output_directory is not None:
assert isinstance(output_directory, str),\
'Step {}, output_directory must be str, got {} instead'.format(name, type(output_directory))
'Step {}, output_directory must be str, got {} instead'.format(self.name, type(output_directory))

if input_data is not None:
assert isinstance(input_data, list), 'Step {} error, input_data must be list, ' \
'got {} instead.'.format(name, type(input_data))
'got {} instead.'.format(self.name, type(input_data))
if input_steps is not None:
assert isinstance(input_steps, list), 'Step {} error, input_steps must be list, ' \
'got {} instead.'.format(name, type(input_steps))
'got {} instead.'.format(self.name, type(input_steps))
if adapter is not None:
assert isinstance(adapter, Adapter), 'Step {} error, adapter must be an instance ' \
'of {}'.format(name, str(Adapter))
'of {}'.format(self.name, str(Adapter))

assert isinstance(cache_output, bool), 'Step {} error, cache_output must be bool, ' \
'got {} instead.'.format(name, type(cache_output))
'got {} instead.'.format(self.name, type(cache_output))
assert isinstance(persist_output, bool), 'Step {} error, persist_output must be bool, ' \
'got {} instead.'.format(name, type(persist_output))
'got {} instead.'.format(self.name, type(persist_output))
assert isinstance(load_persisted_output, bool),\
'Step {} error, load_persisted_output ' \
'must be bool, got {} instead.'.format(name, type(load_persisted_output))
'must be bool, got {} instead.'.format(self.name, type(load_persisted_output))
assert isinstance(force_fitting, bool), 'Step {} error, force_fitting must be bool, ' \
'got {} instead.'.format(name, type(force_fitting))
'got {} instead.'.format(self.name, type(force_fitting))

logger.info('Initializing Step {}'.format(name))
self._validate_upstream_names()
logger.info('Initializing Step {}'.format(self.name))

self.transformer = transformer
self.output_directory = output_directory
Expand All @@ -228,11 +227,7 @@ def __init__(self,
self.force_fitting = force_fitting

self.output = None
self.name = self._apply_suffix(name)
_ALL_STEPS_NAMES.append(self.name)

self.experiment_directory = os.path.join(experiment_directory)

self._prepare_experiment_directories()
self._mode = 'train'

Expand Down Expand Up @@ -492,7 +487,7 @@ def get_step_by_name(self, name):
return self.all_upstream_steps[name]
except KeyError as e:
msg = 'No Step with name "{}" found. ' \
'You have following Steps: {}'.format(name, _ALL_STEPS_NAMES)
'You have following Steps: {}'.format(name, list(self.all_upstream_steps.keys()))
raise StepError(msg) from e

def persist_upstream_structure(self):
Expand Down Expand Up @@ -525,9 +520,8 @@ def _fit_transform_operation(self, step_inputs):
try:
step_output_data = self.transformer.transform(**step_inputs)
except Exception as e:
msg = 'Step {}, Transformer "{}" error during "transform()" operation. ' \
'Check "Step.transformer" implementation"'.format(self.name,
self.transformer.__class__.__name__)
msg = 'Step {}, Transformer "{}" error ' \
'during "transform()" operation.'.format(self.name, self.transformer.__class__.__name__)
raise StepError(msg) from e

logger.info('Step {}, transforming completed'.format(self.name))
Expand All @@ -537,9 +531,8 @@ def _fit_transform_operation(self, step_inputs):
try:
step_output_data = self.transformer.fit_transform(**step_inputs)
except Exception as e:
msg = 'Step {}, Transformer "{}" error during "fit_transform()" operation. ' \
'Check "Step.transformer" implementation"'.format(self.name,
self.transformer.__class__.__name__)
msg = 'Step {}, Transformer "{}" error ' \
'during "fit_transform()" operation.'.format(self.name, self.transformer.__class__.__name__)
raise StepError(msg) from e

logger.info('Step {}, fitting and transforming completed'.format(self.name))
Expand All @@ -552,10 +545,8 @@ def _fit_transform_operation(self, step_inputs):
try:
step_output_data = self.transformer.transform(**step_inputs)
except Exception as e:
msg = 'Step {}, Transformer "{}" error during "transform()" operation. ' \
'This Transformer is not fittable. ' \
'Check "Step.transformer" implementation"'.format(self.name,
self.transformer.__class__.__name__)
msg = 'Step {}, Transformer "{}" error ' \
'during "transform()" operation.'.format(self.name, self.transformer.__class__.__name__)
raise StepError(msg) from e

logger.info('Step {}, transforming completed'.format(self.name))
Expand All @@ -579,9 +570,8 @@ def _transform_operation(self, step_inputs):
try:
step_output_data = self.transformer.transform(**step_inputs)
except Exception as e:
msg = 'Step {}, Transformer "{}" error during "transform()" operation. ' \
'Check "Step.transformer" implementation"'.format(self.name,
self.transformer.__class__.__name__)
msg = 'Step {}, Transformer "{}" error ' \
'during "transform()" operation.'.format(self.name, self.transformer.__class__.__name__)
raise StepError(msg) from e

logger.info('Step {}, transforming completed'.format(self.name))
Expand All @@ -595,10 +585,8 @@ def _transform_operation(self, step_inputs):
try:
step_output_data = self.transformer.transform(**step_inputs)
except Exception as e:
msg = 'Step {}, Transformer "{}" error during "transform()" operation. ' \
'This Transformer is not fittable. ' \
'Check "Step.transformer" implementation"'.format(self.name,
self.transformer.__class__.__name__)
msg = 'Step {}, Transformer "{}" error ' \
'during "transform()" operation.'.format(self.name, self.transformer.__class__.__name__)
raise StepError(msg) from e

logger.info('Step {}, transforming completed'.format(self.name))
Expand Down Expand Up @@ -652,6 +640,7 @@ def _prepare_experiment_directories(self):
os.makedirs(os.path.join(self.experiment_directory, dir_name), exist_ok=True)

def _get_steps(self, all_steps):
self._check_name_uniqueness(all_steps=all_steps)
for input_step in self.input_steps:
all_steps = input_step._get_steps(all_steps)
all_steps[self.name] = self
Expand All @@ -670,19 +659,16 @@ def _validate_step_name(self, name):
assert isinstance(name, str) or isinstance(name, float) or isinstance(name, int),\
'Step name must be str, float or int. Got {} instead.'.format(type(name))

def _apply_suffix(self, name):
"""returns suffix '_k'
Where 'k' is int that denotes highest increment of step with the same name.
"""
highest_id = 0
for x in _ALL_STEPS_NAMES:
if not x == name:
key_id = x.split('_')[-1]
key_stripped = x[:-len(key_id) - 1]
if key_stripped == name:
if int(key_id) >= highest_id:
highest_id += 1
return '{}_{}'.format(name, highest_id)
def _check_name_uniqueness(self, all_steps):
if self.name in all_steps.keys():
raise ValueError('Step with name "{}", already exist. Assign unique Step name.'.format(self.name))

def _validate_upstream_names(self):
try:
_ = self.all_upstream_steps.keys()
except ValueError as e:
msg = 'Incorrect Step names'
raise StepError(msg) from e

def _build_structure_dict(self, structure_dict):
for input_step in self.input_steps:
Expand Down

0 comments on commit 2024748

Please sign in to comment.