feat - add support to specify the arguments for various steps in pipe…

…line / requires changes to the syntax in sklearnPipeline section
ksachdeva · Oct 21, 2019 · be76859 · be76859
1 parent d1e41a2
commit be76859
Show file tree

Hide file tree

Showing 10 changed files with 115 additions and 23 deletions.
diff --git a/README.rst b/README.rst
@@ -158,8 +158,12 @@ so if you have a typo in the names and/or they are not available in your PYTHONP
     sklearnPipeline:
         name: normalizer_svc
         steps:
-            normalizer: sklearn.preprocessing.Normalizer
-            svc: sklearn.svm.SVC
+            normalizer:
+                type: sklearn.preprocessing.Normalizer
+                classArgs:
+                    norm: l2
+            svc:
+                type: sklearn.svm.SVC
 
 In above example, there are 2 steps. The first step is to normalize the data and the second step is train a classifier using Support
 Vector Machine.

diff --git a/docs/usage.rst b/docs/usage.rst
@@ -80,8 +80,10 @@ typo in the names and/or they are not available in your PYTHONPATH you will get
     sklearnPipeline:
         name: normalizer_svc
         steps:
-            normalizer: sklearn.preprocessing.Normalizer
-            svc: sklearn.svm.SVC
+            normalizer:
+                type: sklearn.preprocessing.Normalizer
+            svc:
+                type: sklearn.svm.SVC
 
 In above example, there are 2 steps. The first step is to normalize the data and the second step is train a classifier using Support
 Vector Machine.

diff --git a/examples/basic_svc.nni.yml b/examples/basic_svc.nni.yml
@@ -16,8 +16,10 @@ dataSource:
 sklearnPipeline:
     name: normalizer_svc
     steps:
-        normalizer: sklearn.preprocessing.Normalizer
-        svc: sklearn.svm.SVC
+        normalizer:
+            type: sklearn.preprocessing.Normalizer
+        svc:
+            type: sklearn.svm.SVC
 
 # This section is more or less compliant with the NNI's
 # way of specifying the hyper parameters except that you

diff --git a/examples/pca_logistic.nni.yml b/examples/pca_logistic.nni.yml
@@ -16,9 +16,12 @@ dataSource:
 sklearnPipeline:
     name: normalizer_svc
     steps:
-        normalizer: sklearn.preprocessing.Normalizer
-        pca: sklearn.decomposition.PCA
-        logistic_regression: sklearn.linear_model.LogisticRegression
+        normalizer:
+            type: sklearn.preprocessing.Normalizer
+        pca:
+            type: sklearn.decomposition.PCA
+        logistic_regression:
+            type: sklearn.linear_model.LogisticRegression
 
 # This section is more or less compliant with the NNI's
 # way of specifying the hyper parameters except that you

diff --git a/examples/pca_svc.nni.yml b/examples/pca_svc.nni.yml
@@ -16,9 +16,12 @@ dataSource:
 sklearnPipeline:
     name: normalizer_svc
     steps:
-        normalizer: sklearn.preprocessing.Normalizer
-        pca: sklearn.decomposition.PCA
-        svc: sklearn.svm.SVC
+        normalizer:
+            type: sklearn.preprocessing.Normalizer
+        pca:
+            type: sklearn.decomposition.PCA
+        svc:
+            type: sklearn.svm.SVC
 
 # This section is more or less compliant with the NNI's
 # way of specifying the hyper parameters except that you

diff --git a/setup.cfg b/setup.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.1.1
+current_version = 0.2.0
 commit = True
 tag = True
 

diff --git a/setup.py b/setup.py
@@ -1,6 +1,5 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
-
 """The setup script."""
 
 from setuptools import setup, find_packages
@@ -11,7 +10,14 @@
 with open('HISTORY.rst') as history_file:
     history = history_file.read()
 
-requirements = ['Click>=7.0', 'scikit-learn', 'nni', 'pymongo', 'absl-py', 'pyyaml',]
+requirements = [
+    'Click>=7.0',
+    'scikit-learn',
+    'nni',
+    'pymongo',
+    'absl-py',
+    'pyyaml',
+]
 
 setup_requirements = []
 
@@ -20,7 +26,8 @@
 setup(
     author="Kapil Sachdeva",
     author_email='not@anemail.com',
-    python_requires='>=3.6, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*',
+    python_requires=
+    '>=3.6, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*',
     classifiers=[
         'Development Status :: 2 - Pre-Alpha',
         'Intended Audience :: Developers',
@@ -29,7 +36,8 @@
         'Programming Language :: Python :: 3.6',
         'Programming Language :: Python :: 3.7',
     ],
-    description="Hyper parameters search for scikit-learn components using Microsoft NNI",
+    description=
+    "Hyper parameters search for scikit-learn components using Microsoft NNI",
     entry_points={
         'console_scripts': [
             'sknni=sknni.cli:cli',
@@ -46,6 +54,6 @@
     test_suite='tests',
     tests_require=test_requirements,
     url='https://github.com/ksachdeva/scikit-nni',
-    version='0.1.1',
+    version='0.2.0',
     zip_safe=False,
 )
diff --git a/sknni/__init__.py b/sknni/__init__.py
@@ -1,7 +1,6 @@
 # -*- coding: utf-8 -*-
-
 """Top-level package for scikit-nni."""
 
 __author__ = """Kapil Sachdeva"""
 __email__ = 'notanemail@email.com'
-__version__ = '0.1.1'
+__version__ = '0.2.0'
diff --git a/sknni/internals/_pipeline_builder.py b/sknni/internals/_pipeline_builder.py
@@ -6,10 +6,12 @@
 
 from ._utils import get_class
 
+
 class PipelineBuilder(object):
     def __init__(self, experiment_spec):
         self.steps = experiment_spec['sklearnPipeline']['steps']
-        self.params_info = self._param_info_from_search_space(experiment_spec['nniConfigSearchSpace'])
+        self.params_info = self._param_info_from_search_space(
+            experiment_spec['nniConfigSearchSpace'])
 
     def _param_info_from_search_space(self, search_space):
         steps_with_params = {}
@@ -28,7 +30,12 @@ def __call__(self, nni_hparams):
         # a step in the pipeline
         sklearn_steps = []
         for k, v in self.steps.items():
-            estimator_cls = get_class(v)
+            estimator_cls = get_class(v['type'])
+
+            # find the default params if any
+            kwargs = {}
+            if 'classArgs' in v:
+                kwargs.update(v['classArgs'])
 
             # find the arguments for this estimator and set their values
             # using nni_hparams
@@ -37,7 +44,6 @@ def __call__(self, nni_hparams):
                 sklearn_steps.append((k, estimator_cls()))
                 continue
 
-            kwargs = {}
             for p in self.params_info[k]:
                 kwargs[p] = nni_hparams[f"{k}_{p}"]
 

diff --git a/tests/test_pipeline_builder.py b/tests/test_pipeline_builder.py
@@ -0,0 +1,65 @@
+#!/usr/bin/env python
+
+import unittest
+import yaml
+
+from sknni.internals import PipelineBuilder
+from sklearn.preprocessing import Normalizer
+
+from lightgbm import LGBMClassifier
+
+
+class TestPipelineBuilder(unittest.TestCase):
+    """Tests for `sknni` package."""
+    def setUp(self):
+        """Set up test fixtures, if any."""
+        pass
+
+    def tearDown(self):
+        """Tear down test fixtures, if any."""
+        pass
+
+    def test_estimator_arguments(self):
+        """ Tests setting the arguments which are not in the search space """
+        simple_config = yaml.load("""
+
+            sklearnPipeline:
+                name: normalizer_lightgbm
+                steps:
+                    normalizer:
+                        type: sklearn.preprocessing.Normalizer
+                    lightgbm:
+                        type: lightgbm.LGBMClassifier
+                        classArgs:
+                            objective: multiclass
+
+            nniConfigSearchSpace:
+                - lightgbm:
+                    num_leaves:
+                        _type: choice
+                        _value: [31,41,51]
+                    boosting_type:
+                        _type: choice
+                        _value: [gbdt, goss, dart]
+
+        """)
+
+        print(simple_config)
+
+        pipeline = PipelineBuilder(simple_config)({
+            'lightgbm_num_leaves':
+            31,
+            'lightgbm_boosting_type':
+            'goss'
+        })
+
+        assert len(pipeline.named_steps.keys()) == 2
+        assert isinstance(pipeline.named_steps['normalizer'], Normalizer)
+        assert isinstance(pipeline.named_steps['lightgbm'], LGBMClassifier)
+
+        assert pipeline.named_steps['lightgbm'].num_leaves == 31
+        assert pipeline.named_steps['lightgbm'].objective == 'multiclass'
+        assert pipeline.named_steps['lightgbm'].boosting_type == 'goss'
+
+    def test_command_line_interface(self):
+        pass