Skip to content

Commit

Permalink
feat - add support to specify the arguments for various steps in pipe…
Browse files Browse the repository at this point in the history
…line / requires changes to the syntax in sklearnPipeline section
  • Loading branch information
ksachdeva committed Oct 21, 2019
1 parent d1e41a2 commit be76859
Show file tree
Hide file tree
Showing 10 changed files with 115 additions and 23 deletions.
8 changes: 6 additions & 2 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -158,8 +158,12 @@ so if you have a typo in the names and/or they are not available in your PYTHONP
sklearnPipeline:
name: normalizer_svc
steps:
normalizer: sklearn.preprocessing.Normalizer
svc: sklearn.svm.SVC
normalizer:
type: sklearn.preprocessing.Normalizer
classArgs:
norm: l2
svc:
type: sklearn.svm.SVC
In above example, there are 2 steps. The first step is to normalize the data and the second step is train a classifier using Support
Vector Machine.
Expand Down
6 changes: 4 additions & 2 deletions docs/usage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -80,8 +80,10 @@ typo in the names and/or they are not available in your PYTHONPATH you will get
sklearnPipeline:
name: normalizer_svc
steps:
normalizer: sklearn.preprocessing.Normalizer
svc: sklearn.svm.SVC
normalizer:
type: sklearn.preprocessing.Normalizer
svc:
type: sklearn.svm.SVC
In above example, there are 2 steps. The first step is to normalize the data and the second step is train a classifier using Support
Vector Machine.
Expand Down
6 changes: 4 additions & 2 deletions examples/basic_svc.nni.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,10 @@ dataSource:
sklearnPipeline:
name: normalizer_svc
steps:
normalizer: sklearn.preprocessing.Normalizer
svc: sklearn.svm.SVC
normalizer:
type: sklearn.preprocessing.Normalizer
svc:
type: sklearn.svm.SVC

# This section is more or less compliant with the NNI's
# way of specifying the hyper parameters except that you
Expand Down
9 changes: 6 additions & 3 deletions examples/pca_logistic.nni.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,12 @@ dataSource:
sklearnPipeline:
name: normalizer_svc
steps:
normalizer: sklearn.preprocessing.Normalizer
pca: sklearn.decomposition.PCA
logistic_regression: sklearn.linear_model.LogisticRegression
normalizer:
type: sklearn.preprocessing.Normalizer
pca:
type: sklearn.decomposition.PCA
logistic_regression:
type: sklearn.linear_model.LogisticRegression

# This section is more or less compliant with the NNI's
# way of specifying the hyper parameters except that you
Expand Down
9 changes: 6 additions & 3 deletions examples/pca_svc.nni.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,12 @@ dataSource:
sklearnPipeline:
name: normalizer_svc
steps:
normalizer: sklearn.preprocessing.Normalizer
pca: sklearn.decomposition.PCA
svc: sklearn.svm.SVC
normalizer:
type: sklearn.preprocessing.Normalizer
pca:
type: sklearn.decomposition.PCA
svc:
type: sklearn.svm.SVC

# This section is more or less compliant with the NNI's
# way of specifying the hyper parameters except that you
Expand Down
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[bumpversion]
current_version = 0.1.1
current_version = 0.2.0
commit = True
tag = True

Expand Down
18 changes: 13 additions & 5 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-

"""The setup script."""

from setuptools import setup, find_packages
Expand All @@ -11,7 +10,14 @@
with open('HISTORY.rst') as history_file:
history = history_file.read()

requirements = ['Click>=7.0', 'scikit-learn', 'nni', 'pymongo', 'absl-py', 'pyyaml',]
requirements = [
'Click>=7.0',
'scikit-learn',
'nni',
'pymongo',
'absl-py',
'pyyaml',
]

setup_requirements = []

Expand All @@ -20,7 +26,8 @@
setup(
author="Kapil Sachdeva",
author_email='not@anemail.com',
python_requires='>=3.6, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*',
python_requires=
'>=3.6, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*',
classifiers=[
'Development Status :: 2 - Pre-Alpha',
'Intended Audience :: Developers',
Expand All @@ -29,7 +36,8 @@
'Programming Language :: Python :: 3.6',
'Programming Language :: Python :: 3.7',
],
description="Hyper parameters search for scikit-learn components using Microsoft NNI",
description=
"Hyper parameters search for scikit-learn components using Microsoft NNI",
entry_points={
'console_scripts': [
'sknni=sknni.cli:cli',
Expand All @@ -46,6 +54,6 @@
test_suite='tests',
tests_require=test_requirements,
url='https://github.com/ksachdeva/scikit-nni',
version='0.1.1',
version='0.2.0',
zip_safe=False,
)
3 changes: 1 addition & 2 deletions sknni/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
# -*- coding: utf-8 -*-

"""Top-level package for scikit-nni."""

__author__ = """Kapil Sachdeva"""
__email__ = 'notanemail@email.com'
__version__ = '0.1.1'
__version__ = '0.2.0'
12 changes: 9 additions & 3 deletions sknni/internals/_pipeline_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,12 @@

from ._utils import get_class


class PipelineBuilder(object):
def __init__(self, experiment_spec):
self.steps = experiment_spec['sklearnPipeline']['steps']
self.params_info = self._param_info_from_search_space(experiment_spec['nniConfigSearchSpace'])
self.params_info = self._param_info_from_search_space(
experiment_spec['nniConfigSearchSpace'])

def _param_info_from_search_space(self, search_space):
steps_with_params = {}
Expand All @@ -28,7 +30,12 @@ def __call__(self, nni_hparams):
# a step in the pipeline
sklearn_steps = []
for k, v in self.steps.items():
estimator_cls = get_class(v)
estimator_cls = get_class(v['type'])

# find the default params if any
kwargs = {}
if 'classArgs' in v:
kwargs.update(v['classArgs'])

# find the arguments for this estimator and set their values
# using nni_hparams
Expand All @@ -37,7 +44,6 @@ def __call__(self, nni_hparams):
sklearn_steps.append((k, estimator_cls()))
continue

kwargs = {}
for p in self.params_info[k]:
kwargs[p] = nni_hparams[f"{k}_{p}"]

Expand Down
65 changes: 65 additions & 0 deletions tests/test_pipeline_builder.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
#!/usr/bin/env python

import unittest
import yaml

from sknni.internals import PipelineBuilder
from sklearn.preprocessing import Normalizer

from lightgbm import LGBMClassifier


class TestPipelineBuilder(unittest.TestCase):
"""Tests for `sknni` package."""
def setUp(self):
"""Set up test fixtures, if any."""
pass

def tearDown(self):
"""Tear down test fixtures, if any."""
pass

def test_estimator_arguments(self):
""" Tests setting the arguments which are not in the search space """
simple_config = yaml.load("""
sklearnPipeline:
name: normalizer_lightgbm
steps:
normalizer:
type: sklearn.preprocessing.Normalizer
lightgbm:
type: lightgbm.LGBMClassifier
classArgs:
objective: multiclass
nniConfigSearchSpace:
- lightgbm:
num_leaves:
_type: choice
_value: [31,41,51]
boosting_type:
_type: choice
_value: [gbdt, goss, dart]
""")

print(simple_config)

pipeline = PipelineBuilder(simple_config)({
'lightgbm_num_leaves':
31,
'lightgbm_boosting_type':
'goss'
})

assert len(pipeline.named_steps.keys()) == 2
assert isinstance(pipeline.named_steps['normalizer'], Normalizer)
assert isinstance(pipeline.named_steps['lightgbm'], LGBMClassifier)

assert pipeline.named_steps['lightgbm'].num_leaves == 31
assert pipeline.named_steps['lightgbm'].objective == 'multiclass'
assert pipeline.named_steps['lightgbm'].boosting_type == 'goss'

def test_command_line_interface(self):
pass

0 comments on commit be76859

Please sign in to comment.