Merge pull request #127 from ottogroup/develop

Develop
ottogroup · May 7, 2020 · 3e7bd7d · 3e7bd7d
2 parents 20e369b + 99bf061
commit 3e7bd7d
Show file tree

Hide file tree

Showing 33 changed files with 736 additions and 139 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -8,7 +8,7 @@ matrix:
       dist: xenial
       sudo: true
 env:
-  - TRAVIS=yes
+  - TRAVIS=yes AWS_ACCESS_KEY_ID=test AWS_SECRET_ACCESS_KEY=test
 before_install:
   - pip install -U pip && pip --version
   - wget https://repo.anaconda.com/miniconda/Miniconda3-4.6.14-Linux-x86_64.sh -O miniconda.sh

diff --git a/CHANGES.txt b/CHANGES.txt
@@ -1,3 +1,20 @@
+v1.2.3 - 2020-05-07
+===================
+
+- Updated requirements in order to use newer versions of dependencies
+  (also fixing potential security vulnerabilities in dependencies)
+
+- Added support for handling model attachments
+
+- Exclamation mark `!` can now be used instead of `__factory__` in
+  configuration files
+
+v1.2.2.1 - 2019-09-30
+=====================
+
+- Added AWS S3 persister
+
+
 v1.2.2 - 2019-08-15
 ===================
 

diff --git a/VERSION b/VERSION
@@ -1 +1 @@
-1.2.2
+1.2.3
diff --git a/docs/user/R.rst b/docs/user/R.rst
@@ -42,7 +42,7 @@ above:
 .. code-block:: python
 
   'dataset_loader_train': {
-      '__factory__': 'palladium.R.DatasetLoader',
+      '!': 'palladium.R.DatasetLoader',
       'scriptname': 'iris.R',
       'funcname': 'dataset',
       },
@@ -54,7 +54,7 @@ R classification models are configured very similarly, using
 .. code-block:: python
 
   'model': {
-      '__factory__': 'palladium.R.ClassificationModel',
+      '!': 'palladium.R.ClassificationModel',
       'scriptname': 'iris.R',
       'funcname': 'train.randomForest',
       'encode_labels': True,
@@ -99,7 +99,7 @@ Here's how this would look like:
 .. code-block:: python
 
   'model': {
-      '__factory__': 'palladium.R.RegressionModel',
+      '!': 'palladium.R.RegressionModel',
       'scriptname': 'tooth.R',
       'funcname': 'train.randomForest',
       },
@@ -110,7 +110,7 @@ classification above:
 .. code-block:: python
 
     'dataset_loader_train': {
-        '__factory__': 'palladium.R.DatasetLoader',
+        '!': 'palladium.R.DatasetLoader',
         'scriptname': 'tooth.R',
         'funcname': 'dataset',
     },
@@ -129,13 +129,13 @@ including :class:`~palladium.R.Rpy2Transform` in a
 .. code-block:: python
 
     'model': {
-        '__factory__': 'sklearn.pipeline.Pipeline',
+        '!': 'sklearn.pipeline.Pipeline',
         'steps': [
             ['rpy2', {
-                '__factory__': 'palladium.R.Rpy2Transform',
+                '!': 'palladium.R.Rpy2Transform',
             }],
             ['regressor', {
-                '__factory__': 'palladium.R.RegressionModel',
+                '!': 'palladium.R.RegressionModel',
                 'scriptname': 'tooth.R',
                 'funcname': 'train.randomForest',
             }],

diff --git a/docs/user/configuration.rst b/docs/user/configuration.rst
@@ -31,7 +31,7 @@ you to pass in things like database credentials from the environment:
 .. code-block:: python
 
     'dataset_loader_train': {
-        '__factory__': 'palladium.dataset.SQL',
+        '!': 'palladium.dataset.SQL',
         'url': 'mysql://{}:{}@localhost/test?encoding=utf8'.format(
             environ['DB_USER'], environ['DB_PASS'],
             ),
@@ -46,7 +46,7 @@ folder as the configuration:
 .. code-block:: python
 
     'dataset_loader_train': {
-        '__factory__': 'palladium.dataset.CSV',
+        '!': 'palladium.dataset.CSV',
         'path': '{}/data.csv'.format(here),
         }
 
@@ -80,15 +80,15 @@ file:
 .. code-block:: python
 
     'dataset_loader_train': {
-        '__factory__': 'palladium.dataset.CSV',
+        '!': 'palladium.dataset.CSV',
         'path': '{}/train.csv'.format(here),
         'many': '...',
         'more': {'...'},
         'entries': ['...'],
         }
 
     'dataset_loader_test': {
-        '__factory__': 'palladium.dataset.CSV',
+        '!': 'palladium.dataset.CSV',
         'path': '{}/test.csv'.format(here),
         'many': '...',
         'more': {'...'},
@@ -100,7 +100,7 @@ With ``__copy__``, you can reduce this down to:
 .. code-block:: python
 
     'dataset_loader_train': {
-        '__factory__': 'palladium.dataset.CSV',
+        '!': 'palladium.dataset.CSV',
         'path': '{}/train.csv'.format(here),
         'many': '...',
         'more': {'...'},

diff --git a/docs/user/deployment.rst b/docs/user/deployment.rst
@@ -421,7 +421,7 @@ startup:
 .. code-block:: python
 
   'oauth_init_app': {
-      '__factory__': 'myoauth.oauth.init_app',
+      '!': 'myoauth.oauth.init_app',
       'app': 'palladium.server.app',
       },
 

diff --git a/docs/user/faq.rst b/docs/user/faq.rst
@@ -156,7 +156,7 @@ passed at runtime.
             'C': [0.1, 0.3, 1.0],
             },
         'cv': {
-            '__factory__': 'palladium.util.Partial',
+            '!': 'palladium.util.Partial',
             'func': 'sklearn.cross_validation.StratifiedKFold',
             'random_state': 0,
             },
@@ -177,16 +177,16 @@ classifier:
 .. code-block:: python
 
     'grid_search': {
-        '__factory__': 'skopt.BayesSearchCV',
+        '!': 'skopt.BayesSearchCV',
         'estimator': {'__copy__': 'model'},
         'n_iter': 16,
         'search_spaces': {
             'C': {
-                '__factory__': 'skopt.space.Real',
+                '!': 'skopt.space.Real',
                 'low': 1e-6, 'high': 1e+1, 'prior': 'log-uniform',
             },
             'degree': {
-                '__factory__': 'skopt.space.Integer',
+                '!': 'skopt.space.Integer',
                 'low': 1, 'high': 20,
             },
         },
@@ -208,35 +208,34 @@ grid search:
 
 .. code-block:: python
 
+{
     'grid_search': {
-        '__factory__': 'palladium.fit.with_parallel_backend',
+        '!': 'palladium.fit.with_parallel_backend',
         'estimator': {
-            '__factory__': 'sklearn.model_selection.GridSearchCV',
+            '!': 'sklearn.model_selection.GridSearchCV',
             'estimator': {'__copy__': 'model'},
-            'param_grid': {
-                'C': [0.1, 0.3, 1.0],
-            },
-            'n_jobs': -1,
+            'param_grid': {'__copy__': 'grid_search.param_grid'},
+            'scoring': {'__copy__': 'scoring'},
         },
-        'backend': 'dask.distributed',
-        'scheduler_host': '127.0.0.1:8786',
+        'backend': 'dask',
     },
 
-    '_init_distributed': {
-        '__factory__': 'palladium.util.resolve_dotted_name',
-        'dotted_name': 'distributed.joblib.joblib',
+    '_init_client': {
+        '!': 'dask.distributed.Client',
+        'address': '127.0.0.1:8786',
     },
+}
 
-To start up the Dask scheduler and workers you can follow the
-dask.distributed documentation.  Here's an example that runs three
-workers locally:
+For details on how to set up Dask workers and a scheduler, please
+consult the `Dask docs <https://docs.dask.org>`_.  But here's how you
+would start up a scheduler and three workers locally:
 
 .. code-block:: bash
 
     $ dask-scheduler
     Scheduler started at 127.0.0.1:8786
 
-    $ dask-worker 127.0.0.1:8786
+    $ dask-worker 127.0.0.1:8786  # start each in a new terminal
     $ dask-worker 127.0.0.1:8786
     $ dask-worker 127.0.0.1:8786    
 

diff --git a/docs/user/julia.rst b/docs/user/julia.rst
@@ -44,7 +44,7 @@ configuration in that example defines the model to be of type
 .. code-block:: python
 
   'model': {
-      '__factory__': 'palladium.julia.ClassificationModel',
+      '!': 'palladium.julia.ClassificationModel',
       'fit_func': 'SVM.svm',
       'predict_func': 'SVM.predict',
       }

diff --git a/docs/user/tutorial.rst b/docs/user/tutorial.rst
@@ -149,7 +149,7 @@ defines the type of dataset loader we want to use.  That is
 .. code-block:: python
 
     'dataset_loader_train': {
-        '__factory__': 'palladium.dataset.CSV',
+        '!': 'palladium.dataset.CSV',
 
 The rest of what is inside the ``dataset_loader_train`` are the
 keyword arguments that are used to initialize the
@@ -159,7 +159,7 @@ keyword arguments that are used to initialize the
 .. code-block:: python
 
     'dataset_loader_train': {
-        '__factory__': 'palladium.dataset.CSV',
+        '!': 'palladium.dataset.CSV',
         'path': 'iris.data',
         'names': [
             'sepal length',
@@ -232,7 +232,7 @@ scikit-learn:
 .. code-block:: python
 
     'model': {
-        '__factory__': 'sklearn.linear_model.LogisticRegression',
+        '!': 'sklearn.linear_model.LogisticRegression',
         'C': 0.3,
         },
 
@@ -367,10 +367,10 @@ part of the configuration:
 .. code-block:: python
 
     'model_persister': {
-        '__factory__': 'palladium.persistence.CachedUpdatePersister',
+        '!': 'palladium.persistence.CachedUpdatePersister',
         'update_cache_rrule': {'freq': 'HOURLY'},
         'impl': {
-            '__factory__': 'palladium.persistence.Database',
+            '!': 'palladium.persistence.Database',
             'url': 'sqlite:///iris-model.db',
             },
         },
@@ -407,9 +407,9 @@ model's version:
 .. code-block:: python
 
     'model_persister': {
-        '__factory__': 'palladium.persistence.CachedUpdatePersister',
+        '!': 'palladium.persistence.CachedUpdatePersister',
         'impl': {
-            '__factory__': 'palladium.persistence.File',
+            '!': 'palladium.persistence.File',
             'path': 'model-{version}.pickle',
             },
         },
@@ -420,9 +420,9 @@ models, you can use the RestPersister:
 .. code-block:: python
 
     'model_persister': {
-        '__factory__': 'palladium.persistence.CachedUpdatePersister',
+        '!': 'palladium.persistence.CachedUpdatePersister',
         'impl': {
-            '__factory__': 'palladium.persistence.Rest',
+            '!': 'palladium.persistence.Rest',
             'url': 'http://localhost:8081/artifactory/modelz/{version}',
             'auth': ('username', 'passw0rd'),
             },
@@ -440,7 +440,7 @@ endpoint.  Let us take a look at how it is configured:
 .. code-block:: python
 
     'predict_service': {
-        '__factory__': 'palladium.server.PredictService',
+        '!': 'palladium.server.PredictService',
         'mapping': [
             ('sepal length', 'float'),
             ('sepal width', 'float'),
@@ -450,7 +450,7 @@ endpoint.  Let us take a look at how it is configured:
         }
 
 Again, the specific implementation of the ``predict_service`` that we
-use is specified through the ``__factory__`` setting.
+use is specified through the ``!`` setting.
 
 The ``mapping`` defines which request parameters are to be expected.
 In this example, we expect a ``float`` number for each of ``sepal
@@ -522,7 +522,7 @@ different entry points:
 .. code-block:: python
 
     'predict_service1': {
-        '__factory__': 'mypackage.server.PredictService',
+        '!': 'mypackage.server.PredictService',
         'mapping': [
             ('sepal length', 'float'),
             ('sepal width', 'float'),
@@ -533,7 +533,7 @@ different entry points:
 	'decorator_list_name': 'predict_decorators',
         }
     'predict_service2': {
-        '__factory__': 'mypackage.server.PredictServiceID',
+        '!': 'mypackage.server.PredictServiceID',
         'mapping': [
             ('id', 'int'),
             ],
@@ -590,7 +590,7 @@ entry in ``config.py`` to look like this:
 .. code-block:: python
 
     'model': {
-        '__factory__': 'iris.model',
+        '!': 'iris.model',
         'clf__C': 0.3,
         },
 
@@ -609,7 +609,7 @@ configuration file, e.g.:
 .. code-block:: python
 
     'model': {
-        '__factory__': 'sklearn.pipeline.Pipeline',
-        'steps': [['clf', {'__factory__': 'sklearn.linear_model.LinearRegression'}],
+        '!': 'sklearn.pipeline.Pipeline',
+        'steps': [['clf', {'!': 'sklearn.linear_model.LinearRegression'}],
         ],
     },
diff --git a/docs/user/web-service.rst b/docs/user/web-service.rst
@@ -31,7 +31,7 @@ configuration from the :ref:`tutorial`:
 .. code-block:: python
 
     'predict_service': {
-        '__factory__': 'palladium.server.PredictService',
+        '!': 'palladium.server.PredictService',
         'mapping': [
             ('sepal length', 'float'),
             ('sepal width', 'float'),
@@ -106,7 +106,7 @@ there's a list of predictions that's returned:
 
 Should a different output format be desired than the one implemented
 by :class:`~palladium.interfaces.PredictService`, it is possible to use a
-different class altogether by setting an appropriate ``__factory__``
+different class altogether by setting an appropriate ``!``
 (though that class will likely derive from
 :class:`~palladium.interfaces.PredictService` for reasons of convenience).
 
@@ -271,13 +271,13 @@ endpoints is this:
 
     'flask_add_url_rules': [
         {
-            '__factory__': 'palladium.server.add_url_rule',
+            '!': 'palladium.server.add_url_rule',
             'rule': '/fit',
             'view_func': 'palladium.server.fit',
             'methods': ['POST'],
         },
         {
-            '__factory__': 'palladium.server.add_url_rule',
+            '!': 'palladium.server.add_url_rule',
             'rule': '/update-model-cache',
             'view_func': 'palladium.server.update_model_cache',
             'methods': ['POST'],

diff --git a/examples/R/config-iris-dataset-from-python.py b/examples/R/config-iris-dataset-from-python.py
@@ -4,7 +4,7 @@
 
 {
     'dataset_loader_train': {
-        '__factory__': 'palladium.dataset.CSV',
+        '!': 'palladium.dataset.CSV',
         'path': 'iris.data',
         'names': [
             'sepal length',