diff --git a/.travis.yml b/.travis.yml index a6f4d3c..eff8101 100644 --- a/.travis.yml +++ b/.travis.yml @@ -8,7 +8,7 @@ matrix: dist: xenial sudo: true env: - - TRAVIS=yes + - TRAVIS=yes AWS_ACCESS_KEY_ID=test AWS_SECRET_ACCESS_KEY=test before_install: - pip install -U pip && pip --version - wget https://repo.anaconda.com/miniconda/Miniconda3-4.6.14-Linux-x86_64.sh -O miniconda.sh diff --git a/CHANGES.txt b/CHANGES.txt index a626264..534fb56 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,3 +1,20 @@ +v1.2.3 - 2020-05-07 +=================== + +- Updated requirements in order to use newer versions of dependencies + (also fixing potential security vulnerabilities in dependencies) + +- Added support for handling model attachments + +- Exclamation mark `!` can now be used instead of `__factory__` in + configuration files + +v1.2.2.1 - 2019-09-30 +===================== + +- Added AWS S3 persister + + v1.2.2 - 2019-08-15 =================== diff --git a/VERSION b/VERSION index 23aa839..0495c4a 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -1.2.2 +1.2.3 diff --git a/docs/user/R.rst b/docs/user/R.rst index 4c720e1..00ccf60 100644 --- a/docs/user/R.rst +++ b/docs/user/R.rst @@ -42,7 +42,7 @@ above: .. code-block:: python 'dataset_loader_train': { - '__factory__': 'palladium.R.DatasetLoader', + '!': 'palladium.R.DatasetLoader', 'scriptname': 'iris.R', 'funcname': 'dataset', }, @@ -54,7 +54,7 @@ R classification models are configured very similarly, using .. code-block:: python 'model': { - '__factory__': 'palladium.R.ClassificationModel', + '!': 'palladium.R.ClassificationModel', 'scriptname': 'iris.R', 'funcname': 'train.randomForest', 'encode_labels': True, @@ -99,7 +99,7 @@ Here's how this would look like: .. code-block:: python 'model': { - '__factory__': 'palladium.R.RegressionModel', + '!': 'palladium.R.RegressionModel', 'scriptname': 'tooth.R', 'funcname': 'train.randomForest', }, @@ -110,7 +110,7 @@ classification above: .. code-block:: python 'dataset_loader_train': { - '__factory__': 'palladium.R.DatasetLoader', + '!': 'palladium.R.DatasetLoader', 'scriptname': 'tooth.R', 'funcname': 'dataset', }, @@ -129,13 +129,13 @@ including :class:`~palladium.R.Rpy2Transform` in a .. code-block:: python 'model': { - '__factory__': 'sklearn.pipeline.Pipeline', + '!': 'sklearn.pipeline.Pipeline', 'steps': [ ['rpy2', { - '__factory__': 'palladium.R.Rpy2Transform', + '!': 'palladium.R.Rpy2Transform', }], ['regressor', { - '__factory__': 'palladium.R.RegressionModel', + '!': 'palladium.R.RegressionModel', 'scriptname': 'tooth.R', 'funcname': 'train.randomForest', }], diff --git a/docs/user/configuration.rst b/docs/user/configuration.rst index 5004919..defdfd3 100644 --- a/docs/user/configuration.rst +++ b/docs/user/configuration.rst @@ -31,7 +31,7 @@ you to pass in things like database credentials from the environment: .. code-block:: python 'dataset_loader_train': { - '__factory__': 'palladium.dataset.SQL', + '!': 'palladium.dataset.SQL', 'url': 'mysql://{}:{}@localhost/test?encoding=utf8'.format( environ['DB_USER'], environ['DB_PASS'], ), @@ -46,7 +46,7 @@ folder as the configuration: .. code-block:: python 'dataset_loader_train': { - '__factory__': 'palladium.dataset.CSV', + '!': 'palladium.dataset.CSV', 'path': '{}/data.csv'.format(here), } @@ -80,7 +80,7 @@ file: .. code-block:: python 'dataset_loader_train': { - '__factory__': 'palladium.dataset.CSV', + '!': 'palladium.dataset.CSV', 'path': '{}/train.csv'.format(here), 'many': '...', 'more': {'...'}, @@ -88,7 +88,7 @@ file: } 'dataset_loader_test': { - '__factory__': 'palladium.dataset.CSV', + '!': 'palladium.dataset.CSV', 'path': '{}/test.csv'.format(here), 'many': '...', 'more': {'...'}, @@ -100,7 +100,7 @@ With ``__copy__``, you can reduce this down to: .. code-block:: python 'dataset_loader_train': { - '__factory__': 'palladium.dataset.CSV', + '!': 'palladium.dataset.CSV', 'path': '{}/train.csv'.format(here), 'many': '...', 'more': {'...'}, diff --git a/docs/user/deployment.rst b/docs/user/deployment.rst index ca971c4..2b45871 100644 --- a/docs/user/deployment.rst +++ b/docs/user/deployment.rst @@ -421,7 +421,7 @@ startup: .. code-block:: python 'oauth_init_app': { - '__factory__': 'myoauth.oauth.init_app', + '!': 'myoauth.oauth.init_app', 'app': 'palladium.server.app', }, diff --git a/docs/user/faq.rst b/docs/user/faq.rst index 89655b3..c5ae981 100644 --- a/docs/user/faq.rst +++ b/docs/user/faq.rst @@ -156,7 +156,7 @@ passed at runtime. 'C': [0.1, 0.3, 1.0], }, 'cv': { - '__factory__': 'palladium.util.Partial', + '!': 'palladium.util.Partial', 'func': 'sklearn.cross_validation.StratifiedKFold', 'random_state': 0, }, @@ -177,16 +177,16 @@ classifier: .. code-block:: python 'grid_search': { - '__factory__': 'skopt.BayesSearchCV', + '!': 'skopt.BayesSearchCV', 'estimator': {'__copy__': 'model'}, 'n_iter': 16, 'search_spaces': { 'C': { - '__factory__': 'skopt.space.Real', + '!': 'skopt.space.Real', 'low': 1e-6, 'high': 1e+1, 'prior': 'log-uniform', }, 'degree': { - '__factory__': 'skopt.space.Integer', + '!': 'skopt.space.Integer', 'low': 1, 'high': 20, }, }, @@ -208,35 +208,34 @@ grid search: .. code-block:: python +{ 'grid_search': { - '__factory__': 'palladium.fit.with_parallel_backend', + '!': 'palladium.fit.with_parallel_backend', 'estimator': { - '__factory__': 'sklearn.model_selection.GridSearchCV', + '!': 'sklearn.model_selection.GridSearchCV', 'estimator': {'__copy__': 'model'}, - 'param_grid': { - 'C': [0.1, 0.3, 1.0], - }, - 'n_jobs': -1, + 'param_grid': {'__copy__': 'grid_search.param_grid'}, + 'scoring': {'__copy__': 'scoring'}, }, - 'backend': 'dask.distributed', - 'scheduler_host': '127.0.0.1:8786', + 'backend': 'dask', }, - '_init_distributed': { - '__factory__': 'palladium.util.resolve_dotted_name', - 'dotted_name': 'distributed.joblib.joblib', + '_init_client': { + '!': 'dask.distributed.Client', + 'address': '127.0.0.1:8786', }, +} -To start up the Dask scheduler and workers you can follow the -dask.distributed documentation. Here's an example that runs three -workers locally: +For details on how to set up Dask workers and a scheduler, please +consult the `Dask docs `_. But here's how you +would start up a scheduler and three workers locally: .. code-block:: bash $ dask-scheduler Scheduler started at 127.0.0.1:8786 - $ dask-worker 127.0.0.1:8786 + $ dask-worker 127.0.0.1:8786 # start each in a new terminal $ dask-worker 127.0.0.1:8786 $ dask-worker 127.0.0.1:8786 diff --git a/docs/user/julia.rst b/docs/user/julia.rst index 5154b14..b2af4f7 100644 --- a/docs/user/julia.rst +++ b/docs/user/julia.rst @@ -44,7 +44,7 @@ configuration in that example defines the model to be of type .. code-block:: python 'model': { - '__factory__': 'palladium.julia.ClassificationModel', + '!': 'palladium.julia.ClassificationModel', 'fit_func': 'SVM.svm', 'predict_func': 'SVM.predict', } diff --git a/docs/user/tutorial.rst b/docs/user/tutorial.rst index 3d06de4..62b8890 100644 --- a/docs/user/tutorial.rst +++ b/docs/user/tutorial.rst @@ -149,7 +149,7 @@ defines the type of dataset loader we want to use. That is .. code-block:: python 'dataset_loader_train': { - '__factory__': 'palladium.dataset.CSV', + '!': 'palladium.dataset.CSV', The rest of what is inside the ``dataset_loader_train`` are the keyword arguments that are used to initialize the @@ -159,7 +159,7 @@ keyword arguments that are used to initialize the .. code-block:: python 'dataset_loader_train': { - '__factory__': 'palladium.dataset.CSV', + '!': 'palladium.dataset.CSV', 'path': 'iris.data', 'names': [ 'sepal length', @@ -232,7 +232,7 @@ scikit-learn: .. code-block:: python 'model': { - '__factory__': 'sklearn.linear_model.LogisticRegression', + '!': 'sklearn.linear_model.LogisticRegression', 'C': 0.3, }, @@ -367,10 +367,10 @@ part of the configuration: .. code-block:: python 'model_persister': { - '__factory__': 'palladium.persistence.CachedUpdatePersister', + '!': 'palladium.persistence.CachedUpdatePersister', 'update_cache_rrule': {'freq': 'HOURLY'}, 'impl': { - '__factory__': 'palladium.persistence.Database', + '!': 'palladium.persistence.Database', 'url': 'sqlite:///iris-model.db', }, }, @@ -407,9 +407,9 @@ model's version: .. code-block:: python 'model_persister': { - '__factory__': 'palladium.persistence.CachedUpdatePersister', + '!': 'palladium.persistence.CachedUpdatePersister', 'impl': { - '__factory__': 'palladium.persistence.File', + '!': 'palladium.persistence.File', 'path': 'model-{version}.pickle', }, }, @@ -420,9 +420,9 @@ models, you can use the RestPersister: .. code-block:: python 'model_persister': { - '__factory__': 'palladium.persistence.CachedUpdatePersister', + '!': 'palladium.persistence.CachedUpdatePersister', 'impl': { - '__factory__': 'palladium.persistence.Rest', + '!': 'palladium.persistence.Rest', 'url': 'http://localhost:8081/artifactory/modelz/{version}', 'auth': ('username', 'passw0rd'), }, @@ -440,7 +440,7 @@ endpoint. Let us take a look at how it is configured: .. code-block:: python 'predict_service': { - '__factory__': 'palladium.server.PredictService', + '!': 'palladium.server.PredictService', 'mapping': [ ('sepal length', 'float'), ('sepal width', 'float'), @@ -450,7 +450,7 @@ endpoint. Let us take a look at how it is configured: } Again, the specific implementation of the ``predict_service`` that we -use is specified through the ``__factory__`` setting. +use is specified through the ``!`` setting. The ``mapping`` defines which request parameters are to be expected. In this example, we expect a ``float`` number for each of ``sepal @@ -522,7 +522,7 @@ different entry points: .. code-block:: python 'predict_service1': { - '__factory__': 'mypackage.server.PredictService', + '!': 'mypackage.server.PredictService', 'mapping': [ ('sepal length', 'float'), ('sepal width', 'float'), @@ -533,7 +533,7 @@ different entry points: 'decorator_list_name': 'predict_decorators', } 'predict_service2': { - '__factory__': 'mypackage.server.PredictServiceID', + '!': 'mypackage.server.PredictServiceID', 'mapping': [ ('id', 'int'), ], @@ -590,7 +590,7 @@ entry in ``config.py`` to look like this: .. code-block:: python 'model': { - '__factory__': 'iris.model', + '!': 'iris.model', 'clf__C': 0.3, }, @@ -609,7 +609,7 @@ configuration file, e.g.: .. code-block:: python 'model': { - '__factory__': 'sklearn.pipeline.Pipeline', - 'steps': [['clf', {'__factory__': 'sklearn.linear_model.LinearRegression'}], + '!': 'sklearn.pipeline.Pipeline', + 'steps': [['clf', {'!': 'sklearn.linear_model.LinearRegression'}], ], }, diff --git a/docs/user/web-service.rst b/docs/user/web-service.rst index 647faba..b1caa5b 100644 --- a/docs/user/web-service.rst +++ b/docs/user/web-service.rst @@ -31,7 +31,7 @@ configuration from the :ref:`tutorial`: .. code-block:: python 'predict_service': { - '__factory__': 'palladium.server.PredictService', + '!': 'palladium.server.PredictService', 'mapping': [ ('sepal length', 'float'), ('sepal width', 'float'), @@ -106,7 +106,7 @@ there's a list of predictions that's returned: Should a different output format be desired than the one implemented by :class:`~palladium.interfaces.PredictService`, it is possible to use a -different class altogether by setting an appropriate ``__factory__`` +different class altogether by setting an appropriate ``!`` (though that class will likely derive from :class:`~palladium.interfaces.PredictService` for reasons of convenience). @@ -271,13 +271,13 @@ endpoints is this: 'flask_add_url_rules': [ { - '__factory__': 'palladium.server.add_url_rule', + '!': 'palladium.server.add_url_rule', 'rule': '/fit', 'view_func': 'palladium.server.fit', 'methods': ['POST'], }, { - '__factory__': 'palladium.server.add_url_rule', + '!': 'palladium.server.add_url_rule', 'rule': '/update-model-cache', 'view_func': 'palladium.server.update_model_cache', 'methods': ['POST'], diff --git a/examples/R/config-iris-dataset-from-python.py b/examples/R/config-iris-dataset-from-python.py index f4271a4..7f31a6c 100644 --- a/examples/R/config-iris-dataset-from-python.py +++ b/examples/R/config-iris-dataset-from-python.py @@ -4,7 +4,7 @@ { 'dataset_loader_train': { - '__factory__': 'palladium.dataset.CSV', + '!': 'palladium.dataset.CSV', 'path': 'iris.data', 'names': [ 'sepal length', diff --git a/examples/R/config-iris.py b/examples/R/config-iris.py index 40c4b06..c1338b2 100644 --- a/examples/R/config-iris.py +++ b/examples/R/config-iris.py @@ -5,34 +5,34 @@ }, 'dataset_loader_train': { - '__factory__': 'palladium.R.DatasetLoader', + '!': 'palladium.R.DatasetLoader', 'scriptname': 'iris.R', 'funcname': 'dataset', }, 'dataset_loader_test': { - '__factory__': 'palladium.R.DatasetLoader', + '!': 'palladium.R.DatasetLoader', 'scriptname': 'iris.R', 'funcname': 'dataset', }, 'model': { - '__factory__': 'palladium.R.ClassificationModel', + '!': 'palladium.R.ClassificationModel', 'scriptname': 'iris.R', 'funcname': 'train.randomForest', 'encode_labels': True, }, 'model_persister': { - '__factory__': 'palladium.persistence.CachedUpdatePersister', + '!': 'palladium.persistence.CachedUpdatePersister', 'impl': { - '__factory__': 'palladium.persistence.Database', + '!': 'palladium.persistence.Database', 'url': 'sqlite:///iris-model.db', }, }, 'predict_service': { - '__factory__': 'palladium.server.PredictService', + '!': 'palladium.server.PredictService', 'mapping': [ ('sepal length', 'float'), ('sepal width', 'float'), diff --git a/examples/R/config-tooth.py b/examples/R/config-tooth.py index c403e6a..b2dc98e 100644 --- a/examples/R/config-tooth.py +++ b/examples/R/config-tooth.py @@ -5,25 +5,25 @@ }, 'dataset_loader_train': { - '__factory__': 'palladium.R.DatasetLoader', + '!': 'palladium.R.DatasetLoader', 'scriptname': 'tooth.R', 'funcname': 'dataset', }, 'dataset_loader_test': { - '__factory__': 'palladium.R.DatasetLoader', + '!': 'palladium.R.DatasetLoader', 'scriptname': 'tooth.R', 'funcname': 'dataset', }, 'model': { - '__factory__': 'sklearn.pipeline.Pipeline', + '!': 'sklearn.pipeline.Pipeline', 'steps': [ ['rpy2', { - '__factory__': 'palladium.R.Rpy2Transform', + '!': 'palladium.R.Rpy2Transform', }], ['regressor', { - '__factory__': 'palladium.R.RegressionModel', + '!': 'palladium.R.RegressionModel', 'scriptname': 'tooth.R', 'funcname': 'train.randomForest', }], @@ -31,15 +31,15 @@ }, 'model_persister': { - '__factory__': 'palladium.persistence.CachedUpdatePersister', + '!': 'palladium.persistence.CachedUpdatePersister', 'impl': { - '__factory__': 'palladium.persistence.Database', + '!': 'palladium.persistence.Database', 'url': 'sqlite:///tooth-model.db', }, }, 'predict_service': { - '__factory__': 'palladium.server.PredictService', + '!': 'palladium.server.PredictService', 'mapping': [ ('supp', 'str'), ('dose', 'float'), diff --git a/examples/iris/config.py b/examples/iris/config.py index e58ab22..16accb2 100644 --- a/examples/iris/config.py +++ b/examples/iris/config.py @@ -5,7 +5,7 @@ }, 'dataset_loader_train': { - '__factory__': 'palladium.dataset.CSV', + '!': 'palladium.dataset.CSV', 'path': 'iris.data', 'names': [ 'sepal length', @@ -19,7 +19,7 @@ }, 'dataset_loader_test': { - '__factory__': 'palladium.dataset.CSV', + '!': 'palladium.dataset.CSV', 'path': 'iris.data', 'names': [ 'sepal length', @@ -33,7 +33,7 @@ }, 'model': { - '__factory__': 'sklearn.linear_model.LogisticRegression', + '!': 'sklearn.linear_model.LogisticRegression', 'C': 0.3, 'solver': 'lbfgs', 'multi_class': 'auto', @@ -49,16 +49,16 @@ }, 'model_persister': { - '__factory__': 'palladium.persistence.CachedUpdatePersister', + '!': 'palladium.persistence.CachedUpdatePersister', 'update_cache_rrule': {'freq': 'HOURLY'}, 'impl': { - '__factory__': 'palladium.persistence.Database', + '!': 'palladium.persistence.Database', 'url': 'sqlite:///iris-model.db', }, }, 'predict_service': { - '__factory__': 'palladium.server.PredictService', + '!': 'palladium.server.PredictService', 'mapping': [ ('sepal length', 'float'), ('sepal width', 'float'), diff --git a/examples/julia/config.py b/examples/julia/config.py index f91f7aa..640978c 100644 --- a/examples/julia/config.py +++ b/examples/julia/config.py @@ -1,6 +1,6 @@ { 'dataset_loader_train': { - '__factory__': 'palladium.dataset.CSV', + '!': 'palladium.dataset.CSV', 'path': 'iris.data', 'names': [ 'sepal length', @@ -15,7 +15,7 @@ }, 'dataset_loader_test': { - '__factory__': 'palladium.dataset.CSV', + '!': 'palladium.dataset.CSV', 'path': 'iris.data', 'names': [ 'sepal length', @@ -30,18 +30,18 @@ }, 'model': { - '__factory__': 'palladium.julia.ClassificationModel', + '!': 'palladium.julia.ClassificationModel', 'fit_func': 'SVM.svm', 'predict_func': 'SVM.predict', }, 'model_persister': { - '__factory__': 'palladium.persistence.Database', + '!': 'palladium.persistence.Database', 'url': 'sqlite:///iris-model.db', }, 'predict_service': { - '__factory__': 'palladium.server.PredictService', + '!': 'palladium.server.PredictService', 'mapping': [ ('sepal length', 'float'), ('sepal width', 'float'), diff --git a/examples/keras/config.py b/examples/keras/config.py new file mode 100644 index 0000000..436a8bf --- /dev/null +++ b/examples/keras/config.py @@ -0,0 +1,51 @@ +{ + 'dataset_loader_train': { + '__factory__': 'palladium.dataset.Table', + 'path': 'iris.data', + 'names': [ + 'sepal length', + 'sepal width', + 'petal length', + 'petal width', + 'species', + ], + 'target_column': 'species', + 'sep': ',', + 'nrows': 100, + }, + + 'dataset_loader_test': { + '__copy__': 'dataset_loader_train', + 'nrows': None, + 'skiprows': 100, + }, + + 'model': { + '__factory__': 'model.make_pipeline', + 'epochs': 100, + }, + + 'model_persister': { + '__factory__': 'palladium.persistence.Database', + 'url': 'sqlite:///iris-model.db', + }, + + 'predict_service': { + '__factory__': 'palladium.server.PredictService', + 'mapping': [ + ('sepal length', 'float'), + ('sepal width', 'float'), + ('petal length', 'float'), + ('petal width', 'float'), + ], + }, + + 'service_metadata': { + 'service_name': 'iris', + 'service_version': '0.1', + }, + + 'alive': { + 'process_store_required': ('model',), + }, +} diff --git a/examples/keras/iris.data b/examples/keras/iris.data new file mode 100644 index 0000000..f0d63d9 --- /dev/null +++ b/examples/keras/iris.data @@ -0,0 +1,150 @@ +5.2,3.5,1.5,0.2,Iris-setosa +4.3,3.0,1.1,0.1,Iris-setosa +5.6,3.0,4.5,1.5,Iris-versicolor +6.3,3.3,6.0,2.5,Iris-virginica +5.1,3.8,1.5,0.3,Iris-setosa +6.9,3.1,5.1,2.3,Iris-virginica +7.7,3.0,6.1,2.3,Iris-virginica +6.3,2.3,4.4,1.3,Iris-versicolor +4.4,2.9,1.4,0.2,Iris-setosa +7.2,3.2,6.0,1.8,Iris-virginica +7.4,2.8,6.1,1.9,Iris-virginica +7.7,2.8,6.7,2.0,Iris-virginica +6.1,2.8,4.0,1.3,Iris-versicolor +6.9,3.1,5.4,2.1,Iris-virginica +5.6,2.9,3.6,1.3,Iris-versicolor +6.5,3.2,5.1,2.0,Iris-virginica +5.8,2.7,5.1,1.9,Iris-virginica +7.1,3.0,5.9,2.1,Iris-virginica +5.0,3.6,1.4,0.2,Iris-setosa +5.0,2.3,3.3,1.0,Iris-versicolor +6.5,3.0,5.2,2.0,Iris-virginica +4.6,3.1,1.5,0.2,Iris-setosa +6.1,2.9,4.7,1.4,Iris-versicolor +6.9,3.1,4.9,1.5,Iris-versicolor +4.9,2.5,4.5,1.7,Iris-virginica +6.2,2.9,4.3,1.3,Iris-versicolor +4.8,3.1,1.6,0.2,Iris-setosa +5.7,2.8,4.1,1.3,Iris-versicolor +5.8,2.7,4.1,1.0,Iris-versicolor +6.5,3.0,5.8,2.2,Iris-virginica +6.4,2.8,5.6,2.2,Iris-virginica +5.5,3.5,1.3,0.2,Iris-setosa +4.9,2.4,3.3,1.0,Iris-versicolor +6.5,2.8,4.6,1.5,Iris-versicolor +6.0,2.2,5.0,1.5,Iris-virginica +5.1,3.8,1.6,0.2,Iris-setosa +6.7,3.1,5.6,2.4,Iris-virginica +5.0,3.4,1.6,0.4,Iris-setosa +5.2,2.7,3.9,1.4,Iris-versicolor +5.4,3.7,1.5,0.2,Iris-setosa +5.3,3.7,1.5,0.2,Iris-setosa +7.7,2.6,6.9,2.3,Iris-virginica +6.8,2.8,4.8,1.4,Iris-versicolor +5.2,4.1,1.5,0.1,Iris-setosa +6.0,3.4,4.5,1.6,Iris-versicolor +4.7,3.2,1.3,0.2,Iris-setosa +7.2,3.6,6.1,2.5,Iris-virginica +6.8,3.2,5.9,2.3,Iris-virginica +6.3,2.8,5.1,1.5,Iris-virginica +5.9,3.0,4.2,1.5,Iris-versicolor +5.1,3.8,1.9,0.4,Iris-setosa +5.0,3.3,1.4,0.2,Iris-setosa +5.0,2.0,3.5,1.0,Iris-versicolor +4.4,3.2,1.3,0.2,Iris-setosa +6.3,2.5,5.0,1.9,Iris-virginica +5.9,3.2,4.8,1.8,Iris-versicolor +5.0,3.5,1.6,0.6,Iris-setosa +5.4,3.0,4.5,1.5,Iris-versicolor +6.7,3.1,4.7,1.5,Iris-versicolor +5.7,2.5,5.0,2.0,Iris-virginica +5.0,3.4,1.5,0.2,Iris-setosa +5.8,2.7,3.9,1.2,Iris-versicolor +6.1,2.8,4.7,1.2,Iris-versicolor +4.5,2.3,1.3,0.3,Iris-setosa +5.0,3.2,1.2,0.2,Iris-setosa +5.4,3.9,1.7,0.4,Iris-setosa +6.7,3.0,5.2,2.3,Iris-virginica +6.6,2.9,4.6,1.3,Iris-versicolor +6.0,2.2,4.0,1.0,Iris-versicolor +6.7,3.0,5.0,1.7,Iris-versicolor +7.0,3.2,4.7,1.4,Iris-versicolor +5.6,2.5,3.9,1.1,Iris-versicolor +4.9,3.0,1.4,0.2,Iris-setosa +6.7,3.3,5.7,2.1,Iris-virginica +5.4,3.4,1.5,0.4,Iris-setosa +7.2,3.0,5.8,1.6,Iris-virginica +5.5,2.4,3.8,1.1,Iris-versicolor +5.7,3.8,1.7,0.3,Iris-setosa +6.9,3.2,5.7,2.3,Iris-virginica +6.2,3.4,5.4,2.3,Iris-virginica +4.6,3.4,1.4,0.3,Iris-setosa +6.4,3.2,5.3,2.3,Iris-virginica +4.8,3.4,1.6,0.2,Iris-setosa +6.7,3.3,5.7,2.5,Iris-virginica +5.6,2.8,4.9,2.0,Iris-virginica +4.4,3.0,1.3,0.2,Iris-setosa +6.7,2.5,5.8,1.8,Iris-virginica +6.4,3.2,4.5,1.5,Iris-versicolor +5.8,2.7,5.1,1.9,Iris-virginica +6.0,2.9,4.5,1.5,Iris-versicolor +6.6,3.0,4.4,1.4,Iris-versicolor +5.7,2.8,4.5,1.3,Iris-versicolor +5.5,4.2,1.4,0.2,Iris-setosa +5.8,2.8,5.1,2.4,Iris-virginica +6.4,2.7,5.3,1.9,Iris-virginica +5.7,2.9,4.2,1.3,Iris-versicolor +6.4,2.9,4.3,1.3,Iris-versicolor +5.0,3.5,1.3,0.3,Iris-setosa +5.1,3.7,1.5,0.4,Iris-setosa +5.7,3.0,4.2,1.2,Iris-versicolor +6.5,3.0,5.5,1.8,Iris-virginica +4.9,3.1,1.5,0.1,Iris-setosa +6.3,3.4,5.6,2.4,Iris-virginica +6.7,3.1,4.4,1.4,Iris-versicolor +5.4,3.9,1.3,0.4,Iris-setosa +5.5,2.6,4.4,1.2,Iris-versicolor +4.8,3.0,1.4,0.3,Iris-setosa +7.7,3.8,6.7,2.2,Iris-virginica +6.3,3.3,4.7,1.6,Iris-versicolor +6.0,3.0,4.8,1.8,Iris-virginica +6.1,3.0,4.6,1.4,Iris-versicolor +4.7,3.2,1.6,0.2,Iris-setosa +7.3,2.9,6.3,1.8,Iris-virginica +4.8,3.0,1.4,0.1,Iris-setosa +6.1,3.0,4.9,1.8,Iris-virginica +5.7,2.6,3.5,1.0,Iris-versicolor +5.6,3.0,4.1,1.3,Iris-versicolor +6.4,2.8,5.6,2.1,Iris-virginica +6.3,2.9,5.6,1.8,Iris-virginica +5.7,4.4,1.5,0.4,Iris-setosa +6.1,2.6,5.6,1.4,Iris-virginica +5.1,3.5,1.4,0.2,Iris-setosa +5.9,3.0,5.1,1.8,Iris-virginica +6.3,2.7,4.9,1.8,Iris-virginica +5.5,2.5,4.0,1.3,Iris-versicolor +5.5,2.3,4.0,1.3,Iris-versicolor +4.6,3.6,1.0,0.2,Iris-setosa +5.8,4.0,1.2,0.2,Iris-setosa +5.1,2.5,3.0,1.1,Iris-versicolor +5.2,3.4,1.4,0.2,Iris-setosa +4.9,3.1,1.5,0.1,Iris-setosa +5.0,3.0,1.6,0.2,Iris-setosa +6.0,2.7,5.1,1.6,Iris-versicolor +4.9,3.1,1.5,0.1,Iris-setosa +5.8,2.6,4.0,1.2,Iris-versicolor +6.8,3.0,5.5,2.1,Iris-virginica +5.6,2.7,4.2,1.3,Iris-versicolor +4.8,3.4,1.9,0.2,Iris-setosa +6.2,2.8,4.8,1.8,Iris-virginica +4.6,3.2,1.4,0.2,Iris-setosa +6.2,2.2,4.5,1.5,Iris-versicolor +5.1,3.3,1.7,0.5,Iris-setosa +5.4,3.4,1.7,0.2,Iris-setosa +5.1,3.4,1.5,0.2,Iris-setosa +5.1,3.5,1.4,0.3,Iris-setosa +7.6,3.0,6.6,2.1,Iris-virginica +5.5,2.4,3.7,1.0,Iris-versicolor +6.4,3.1,5.5,1.8,Iris-virginica +7.9,3.8,6.4,2.0,Iris-virginica +6.3,2.5,4.9,1.5,Iris-versicolor diff --git a/examples/keras/model.py b/examples/keras/model.py new file mode 100644 index 0000000..3905fac --- /dev/null +++ b/examples/keras/model.py @@ -0,0 +1,33 @@ +from dstoolbox.pipeline import PipelineY +from keras.models import Sequential +from keras.layers import Dense +from keras.wrappers.scikit_learn import KerasClassifier +import numpy as np +from sklearn.preprocessing import LabelBinarizer + + +np.random.seed(0) + + +def keras_model(): + model = Sequential() + model.add(Dense(8, input_dim=4, activation='relu')) + model.add(Dense(3, activation='softmax')) + model.compile( + loss='categorical_crossentropy', + optimizer='adam', + metrics=['accuracy'], + ) + return model + + +def make_pipeline(**kw): + # In the case of this Iris dataset, our targets are string labels, + # and KerasClassifier doesn't like that. So we transform the + # targets into a one-hot encoding instead using PipeLineY. + return PipelineY([ + ('clf', KerasClassifier(build_fn=keras_model, **kw)), + ], + y_transformer=LabelBinarizer(), + predict_use_inverse=False, + ) diff --git a/examples/keras/requirements.txt b/examples/keras/requirements.txt new file mode 100644 index 0000000..cdf759b --- /dev/null +++ b/examples/keras/requirements.txt @@ -0,0 +1,3 @@ +dstoolbox +Keras +tensorflow diff --git a/examples/xgboost/config.py b/examples/xgboost/config.py new file mode 100644 index 0000000..39307ab --- /dev/null +++ b/examples/xgboost/config.py @@ -0,0 +1,60 @@ +{ + 'dataset_loader_train': { + '__factory__': 'palladium.dataset.Table', + 'path': 'iris.data', + 'names': [ + 'sepal length', + 'sepal width', + 'petal length', + 'petal width', + 'species', + ], + 'target_column': 'species', + 'sep': ',', + 'nrows': 100, + }, + + 'dataset_loader_test': { + '__copy__': 'dataset_loader_train', + 'nrows': None, + 'skiprows': 100, + }, + + 'model': { + '__factory__': 'xgboost.XGBClassifier', + }, + + 'grid_search': { + 'param_grid': { + 'max_depth': [2, 3, 4], + 'n_estimators': [3, 30, 300], + }, + 'cv': 8, + 'verbose': 4, + 'n_jobs': -1, + }, + + 'model_persister': { + '__factory__': 'palladium.persistence.Database', + 'url': 'sqlite:///iris-model.db', + }, + + 'predict_service': { + '__factory__': 'palladium.server.PredictService', + 'mapping': [ + ('sepal length', 'float'), + ('sepal width', 'float'), + ('petal length', 'float'), + ('petal width', 'float'), + ], + }, + + 'service_metadata': { + 'service_name': 'iris', + 'service_version': '0.1', + }, + + 'alive': { + 'process_store_required': ('model',), + }, +} diff --git a/examples/xgboost/iris.data b/examples/xgboost/iris.data new file mode 100644 index 0000000..f0d63d9 --- /dev/null +++ b/examples/xgboost/iris.data @@ -0,0 +1,150 @@ +5.2,3.5,1.5,0.2,Iris-setosa +4.3,3.0,1.1,0.1,Iris-setosa +5.6,3.0,4.5,1.5,Iris-versicolor +6.3,3.3,6.0,2.5,Iris-virginica +5.1,3.8,1.5,0.3,Iris-setosa +6.9,3.1,5.1,2.3,Iris-virginica +7.7,3.0,6.1,2.3,Iris-virginica +6.3,2.3,4.4,1.3,Iris-versicolor +4.4,2.9,1.4,0.2,Iris-setosa +7.2,3.2,6.0,1.8,Iris-virginica +7.4,2.8,6.1,1.9,Iris-virginica +7.7,2.8,6.7,2.0,Iris-virginica +6.1,2.8,4.0,1.3,Iris-versicolor +6.9,3.1,5.4,2.1,Iris-virginica +5.6,2.9,3.6,1.3,Iris-versicolor +6.5,3.2,5.1,2.0,Iris-virginica +5.8,2.7,5.1,1.9,Iris-virginica +7.1,3.0,5.9,2.1,Iris-virginica +5.0,3.6,1.4,0.2,Iris-setosa +5.0,2.3,3.3,1.0,Iris-versicolor +6.5,3.0,5.2,2.0,Iris-virginica +4.6,3.1,1.5,0.2,Iris-setosa +6.1,2.9,4.7,1.4,Iris-versicolor +6.9,3.1,4.9,1.5,Iris-versicolor +4.9,2.5,4.5,1.7,Iris-virginica +6.2,2.9,4.3,1.3,Iris-versicolor +4.8,3.1,1.6,0.2,Iris-setosa +5.7,2.8,4.1,1.3,Iris-versicolor +5.8,2.7,4.1,1.0,Iris-versicolor +6.5,3.0,5.8,2.2,Iris-virginica +6.4,2.8,5.6,2.2,Iris-virginica +5.5,3.5,1.3,0.2,Iris-setosa +4.9,2.4,3.3,1.0,Iris-versicolor +6.5,2.8,4.6,1.5,Iris-versicolor +6.0,2.2,5.0,1.5,Iris-virginica +5.1,3.8,1.6,0.2,Iris-setosa +6.7,3.1,5.6,2.4,Iris-virginica +5.0,3.4,1.6,0.4,Iris-setosa +5.2,2.7,3.9,1.4,Iris-versicolor +5.4,3.7,1.5,0.2,Iris-setosa +5.3,3.7,1.5,0.2,Iris-setosa +7.7,2.6,6.9,2.3,Iris-virginica +6.8,2.8,4.8,1.4,Iris-versicolor +5.2,4.1,1.5,0.1,Iris-setosa +6.0,3.4,4.5,1.6,Iris-versicolor +4.7,3.2,1.3,0.2,Iris-setosa +7.2,3.6,6.1,2.5,Iris-virginica +6.8,3.2,5.9,2.3,Iris-virginica +6.3,2.8,5.1,1.5,Iris-virginica +5.9,3.0,4.2,1.5,Iris-versicolor +5.1,3.8,1.9,0.4,Iris-setosa +5.0,3.3,1.4,0.2,Iris-setosa +5.0,2.0,3.5,1.0,Iris-versicolor +4.4,3.2,1.3,0.2,Iris-setosa +6.3,2.5,5.0,1.9,Iris-virginica +5.9,3.2,4.8,1.8,Iris-versicolor +5.0,3.5,1.6,0.6,Iris-setosa +5.4,3.0,4.5,1.5,Iris-versicolor +6.7,3.1,4.7,1.5,Iris-versicolor +5.7,2.5,5.0,2.0,Iris-virginica +5.0,3.4,1.5,0.2,Iris-setosa +5.8,2.7,3.9,1.2,Iris-versicolor +6.1,2.8,4.7,1.2,Iris-versicolor +4.5,2.3,1.3,0.3,Iris-setosa +5.0,3.2,1.2,0.2,Iris-setosa +5.4,3.9,1.7,0.4,Iris-setosa +6.7,3.0,5.2,2.3,Iris-virginica +6.6,2.9,4.6,1.3,Iris-versicolor +6.0,2.2,4.0,1.0,Iris-versicolor +6.7,3.0,5.0,1.7,Iris-versicolor +7.0,3.2,4.7,1.4,Iris-versicolor +5.6,2.5,3.9,1.1,Iris-versicolor +4.9,3.0,1.4,0.2,Iris-setosa +6.7,3.3,5.7,2.1,Iris-virginica +5.4,3.4,1.5,0.4,Iris-setosa +7.2,3.0,5.8,1.6,Iris-virginica +5.5,2.4,3.8,1.1,Iris-versicolor +5.7,3.8,1.7,0.3,Iris-setosa +6.9,3.2,5.7,2.3,Iris-virginica +6.2,3.4,5.4,2.3,Iris-virginica +4.6,3.4,1.4,0.3,Iris-setosa +6.4,3.2,5.3,2.3,Iris-virginica +4.8,3.4,1.6,0.2,Iris-setosa +6.7,3.3,5.7,2.5,Iris-virginica +5.6,2.8,4.9,2.0,Iris-virginica +4.4,3.0,1.3,0.2,Iris-setosa +6.7,2.5,5.8,1.8,Iris-virginica +6.4,3.2,4.5,1.5,Iris-versicolor +5.8,2.7,5.1,1.9,Iris-virginica +6.0,2.9,4.5,1.5,Iris-versicolor +6.6,3.0,4.4,1.4,Iris-versicolor +5.7,2.8,4.5,1.3,Iris-versicolor +5.5,4.2,1.4,0.2,Iris-setosa +5.8,2.8,5.1,2.4,Iris-virginica +6.4,2.7,5.3,1.9,Iris-virginica +5.7,2.9,4.2,1.3,Iris-versicolor +6.4,2.9,4.3,1.3,Iris-versicolor +5.0,3.5,1.3,0.3,Iris-setosa +5.1,3.7,1.5,0.4,Iris-setosa +5.7,3.0,4.2,1.2,Iris-versicolor +6.5,3.0,5.5,1.8,Iris-virginica +4.9,3.1,1.5,0.1,Iris-setosa +6.3,3.4,5.6,2.4,Iris-virginica +6.7,3.1,4.4,1.4,Iris-versicolor +5.4,3.9,1.3,0.4,Iris-setosa +5.5,2.6,4.4,1.2,Iris-versicolor +4.8,3.0,1.4,0.3,Iris-setosa +7.7,3.8,6.7,2.2,Iris-virginica +6.3,3.3,4.7,1.6,Iris-versicolor +6.0,3.0,4.8,1.8,Iris-virginica +6.1,3.0,4.6,1.4,Iris-versicolor +4.7,3.2,1.6,0.2,Iris-setosa +7.3,2.9,6.3,1.8,Iris-virginica +4.8,3.0,1.4,0.1,Iris-setosa +6.1,3.0,4.9,1.8,Iris-virginica +5.7,2.6,3.5,1.0,Iris-versicolor +5.6,3.0,4.1,1.3,Iris-versicolor +6.4,2.8,5.6,2.1,Iris-virginica +6.3,2.9,5.6,1.8,Iris-virginica +5.7,4.4,1.5,0.4,Iris-setosa +6.1,2.6,5.6,1.4,Iris-virginica +5.1,3.5,1.4,0.2,Iris-setosa +5.9,3.0,5.1,1.8,Iris-virginica +6.3,2.7,4.9,1.8,Iris-virginica +5.5,2.5,4.0,1.3,Iris-versicolor +5.5,2.3,4.0,1.3,Iris-versicolor +4.6,3.6,1.0,0.2,Iris-setosa +5.8,4.0,1.2,0.2,Iris-setosa +5.1,2.5,3.0,1.1,Iris-versicolor +5.2,3.4,1.4,0.2,Iris-setosa +4.9,3.1,1.5,0.1,Iris-setosa +5.0,3.0,1.6,0.2,Iris-setosa +6.0,2.7,5.1,1.6,Iris-versicolor +4.9,3.1,1.5,0.1,Iris-setosa +5.8,2.6,4.0,1.2,Iris-versicolor +6.8,3.0,5.5,2.1,Iris-virginica +5.6,2.7,4.2,1.3,Iris-versicolor +4.8,3.4,1.9,0.2,Iris-setosa +6.2,2.8,4.8,1.8,Iris-virginica +4.6,3.2,1.4,0.2,Iris-setosa +6.2,2.2,4.5,1.5,Iris-versicolor +5.1,3.3,1.7,0.5,Iris-setosa +5.4,3.4,1.7,0.2,Iris-setosa +5.1,3.4,1.5,0.2,Iris-setosa +5.1,3.5,1.4,0.3,Iris-setosa +7.6,3.0,6.6,2.1,Iris-virginica +5.5,2.4,3.7,1.0,Iris-versicolor +6.4,3.1,5.5,1.8,Iris-virginica +7.9,3.8,6.4,2.0,Iris-virginica +6.3,2.5,4.9,1.5,Iris-versicolor diff --git a/examples/xgboost/requirements.txt b/examples/xgboost/requirements.txt new file mode 100644 index 0000000..10ddd5b --- /dev/null +++ b/examples/xgboost/requirements.txt @@ -0,0 +1 @@ +xgboost diff --git a/palladium/config.py b/palladium/config.py index 295a538..2829515 100644 --- a/palladium/config.py +++ b/palladium/config.py @@ -38,7 +38,7 @@ def __getitem__(self, name): class ComponentHandler: - key = '__factory__' + key = '!' def __init__(self, config): self.config = config @@ -63,6 +63,10 @@ def finish(self): component.initialize_component(self.config) +class ComponentHandler2(ComponentHandler): + key = '__factory__' + + class CopyHandler: key = '__copy__' @@ -131,9 +135,24 @@ def __call__(self, name, props): return props +def rewrite_handler(key_from, key_to): + class RewriteHandler: + key = key_from + target = key_to + + def __init__(self, config): + pass + + def __call__(self, name, props): + props[self.target] = props.pop(self.key) + return props + return RewriteHandler + + def _handlers_phase0(configs): return { Handler.key: Handler(configs) for Handler in [ + rewrite_handler('__factory__', '!'), CopyHandler, ] } diff --git a/palladium/persistence.py b/palladium/persistence.py index a106313..53820b4 100644 --- a/palladium/persistence.py +++ b/palladium/persistence.py @@ -183,7 +183,7 @@ def read(self, version=None): if version is None: props = self.list_properties() if 'active-model' not in props: - raise LookupError("No active model available") + raise LookupError("No active model available: " + self.path) version = props['active-model'] fname = self.path.format(version=version) + '.pkl.gz' @@ -196,7 +196,17 @@ def read(self, version=None): with self.io.open(fname, 'rb') as fh: with gzip.open(fh, 'rb') as f: - return pickle.load(f) + model = pickle.load(f) + + attachments = annotate(model).get('__attachments__', []) + for key in attachments: + fname_attach = self.attach_filename(version=version, key=key) + if self.io.exists(fname_attach): + with open(fname_attach, 'rb') as f: + data_attach = base64.b64encode(f.read()) + annotate(model, {key: data_attach}) + + return model def write(self, model): last_version = 0 @@ -207,11 +217,29 @@ def write(self, model): version = last_version + 1 li.append(annotate(model, {'version': version})) + annotations = annotate(model) + attachments = { + key: data + for key, data in annotations.items() + if key.startswith('attachments/') + } + if attachments: + for key in attachments: + del annotations[key] + annotations['__attachments__'] = tuple(attachments.keys()) + annotate(model, annotations) + fname = self.path.format(version=version) + '.pkl.gz' with self.io.open(fname, 'wb') as fh: with gzip.open(fh, 'wb') as f: pickle.dump(model, f) + if attachments: + for key, data in attachments.items(): + fname_attach = self.attach_filename(version=version, key=key) + with self.io.open(fname_attach, 'wb') as f: + f.write(base64.b64decode(data)) + self._update_md({'models': li}) return version @@ -230,19 +258,33 @@ def activate(self, version): self._update_md({'properties': md['properties']}) def delete(self, version): - md = self._read_md() - versions = [m['version'] for m in md['models']] version = int(version) - if version not in versions: + md = self._read_md() + try: + model_md = [m for m in md['models'] if m['version'] == version][0] + except IndexError: raise LookupError("No such version: {}".format(version)) + self._update_md({ 'models': [m for m in md['models'] if m['version'] != version]}) self.io.remove(self.path.format(version=version) + '.pkl.gz') + attachments = model_md.get('__attachments__', []) + for key in attachments: + fname_attach = self.attach_filename(version=version, key=key) + if self.io.exists(fname_attach): + self.io.remove(fname_attach) + @property def _md_filename(self): return self.path.format(version='metadata') + '.json' + def attach_filename(self, version, key): + return ( + self.path.format(version=version) + + '-{}'.format(key[len('attachments/'):]) + ) + def _read_md(self): if self.io.exists(self._md_filename): with self.io.open(self._md_filename, 'r') as f: @@ -670,6 +712,8 @@ def __init__(self, **kwargs): self.fs = s3fs.S3FileSystem(anon=False) def open(self, path, mode='r'): + # this is needed to avoid reading stale metadata JSONs + self.fs.invalidate_cache() return self.fs.open(path, mode=mode) def exists(self, path): @@ -688,9 +732,9 @@ class S3(FileLike): persister. For example, if you used the ``File`` persister before, change your config as follows: - - '__factory__': 'palladium.persistence.File', + - '!': 'palladium.persistence.File', - 'path': 'models/mymodel-{version}', - + '__factory__': 'palladium.persistence.S3', + + '!': 'palladium.persistence.S3', + 'path': 'your-s3-bucket/models/mymodel-{version}', Note that the first part of the path denotes the s3 bucket. diff --git a/palladium/server.py b/palladium/server.py index 1257b6c..2ce446a 100644 --- a/palladium/server.py +++ b/palladium/server.py @@ -35,7 +35,7 @@ def make_ujson_response(obj, status_code=200): :return: A Flask response. """ - json_encoded = ujson.encode(obj, ensure_ascii=False, double_precision=-1) + json_encoded = ujson.encode(obj, ensure_ascii=False) resp = make_response(json_encoded) resp.mimetype = 'application/json' resp.content_type = 'application/json; charset=utf-8' diff --git a/palladium/tests/test_config.py b/palladium/tests/test_config.py index b8b1f25..f8efe4e 100644 --- a/palladium/tests/test_config.py +++ b/palladium/tests/test_config.py @@ -86,7 +86,7 @@ def config1_fname(self, tmpdir): 'env': environ['ENV1'], 'here': here, 'blocking': { - '__factory__': 'palladium.tests.test_config.BlockingDummy', + '!': 'palladium.tests.test_config.BlockingDummy', } }""") return str(path) @@ -102,7 +102,7 @@ def config3_fname(self, tmpdir): path = tmpdir.join('config3.py') path.write("""{ 'bad': { - '__factory__': 'palladium.tests.test_config.BadDummy' + '!': 'palladium.tests.test_config.BadDummy' } }""") return str(path) @@ -169,14 +169,14 @@ def config1(self): dummy = 'palladium.tests.test_config.MyDummyComponent' return { 'mycomponent': { - '__factory__': dummy, + '!': dummy, 'arg1': 3, 'arg2': {'no': 'factory'}, 'subcomponent': { - '__factory__': dummy, + '!': dummy, 'arg1': { 'subsubcomponent': { - '__factory__': + '!': dummy, 'arg1': 'wobwob', 'arg2': 9, @@ -186,16 +186,16 @@ def config1(self): }, }, 'mylistofcomponents': [{ - '__factory__': dummy, + '!': dummy, 'arg1': 'wobwob', }, 'somethingelse', ], 'mynestedlistofcomponents': [[{ - '__factory__': dummy, + '!': dummy, 'arg1': 'feep', 'arg2': { - '__factory__': dummy, + '__factory__': dummy, # alternative to '!' 'arg1': 6, }, }]], @@ -214,7 +214,7 @@ def config1(self): }, '__python__': """ -C['mynestedlistofcomponents'][0][0]['arg2']['__factory__'] = 'builtins:dict' +C['mynestedlistofcomponents'][0][0]['arg2']['!'] = 'builtins:dict' C['myotherconstant'] = 13 """, } diff --git a/palladium/tests/test_fit.py b/palladium/tests/test_fit.py index 5890e35..e7f0b43 100644 --- a/palladium/tests/test_fit.py +++ b/palladium/tests/test_fit.py @@ -438,7 +438,6 @@ def estimator(self): LogisticRegression(solver='liblinear'), param_grid={'C': [0.001, 0.01]}, cv=3, - iid=False, ) @pytest.mark.parametrize('backend', ['threading', 'sequential']) diff --git a/palladium/tests/test_persistence.py b/palladium/tests/test_persistence.py index e50f9b3..6672449 100644 --- a/palladium/tests/test_persistence.py +++ b/palladium/tests/test_persistence.py @@ -13,6 +13,8 @@ import requests_mock import pytest +from palladium.interfaces import annotate + class Dummy: def __init__(self, **kwargs): @@ -74,12 +76,14 @@ def test_read(self, File): patch('palladium.persistence.File.list_properties') as lp,\ patch('palladium.persistence.os.path.exists') as exists,\ patch('palladium.persistence.open') as open,\ + patch('palladium.persistence.annotate') as annotate,\ patch('palladium.persistence.gzip.open') as gzopen,\ patch('palladium.persistence.pickle.load') as load: lm.return_value = [{'version': 99}] lp.return_value = {'active-model': '99'} exists.side_effect = lambda fn: fn == '/models/model-99.pkl.gz' open.return_value = MagicMock() + annotate.return_value = {} result = File('/models/model-{version}').read() open.assert_called_with('/models/model-99.pkl.gz', 'rb') assert result == load.return_value @@ -89,11 +93,13 @@ def test_read_with_version(self, File): with patch('palladium.persistence.File.list_models') as lm,\ patch('palladium.persistence.os.path.exists') as exists,\ patch('palladium.persistence.open') as open,\ + patch('palladium.persistence.annotate') as annotate,\ patch('palladium.persistence.gzip.open') as gzopen,\ patch('palladium.persistence.pickle.load') as load: lm.return_value = [{'version': 99}] exists.side_effect = lambda fn: fn == '/models/model-432.pkl.gz' open.return_value = MagicMock() + annotate.return_value = {} result = File('/models/model-{version}').read(432) open.assert_called_with('/models/model-432.pkl.gz', 'rb') assert result == load.return_value @@ -104,20 +110,22 @@ def test_read_no_model(self, File): patch('palladium.persistence.File.list_properties') as lp: lp.return_value = {} lm.return_value = [] - f = File('/models/model-{version}') + filename = '/models/model-{version}' + f = File(filename) with pytest.raises(LookupError) as exc: f.read() - assert exc.value.args[0] == 'No active model available' + assert exc.value.args[0] == 'No active model available: {}'.format(filename) def test_read_no_active_model(self, File): with patch('palladium.persistence.File.list_models') as lm,\ patch('palladium.persistence.File.list_properties') as lp: lp.return_value = {} lm.return_value = [{'version': 99}] - f = File('/models/model-{version}') + filename = '/models/model-{version}' + f = File(filename) with pytest.raises(LookupError) as exc: f.read() - assert exc.value.args[0] == 'No active model available' + assert exc.value.args[0] == 'No active model available: {}'.format(filename) def test_read_no_model_with_given_version(self, File): with patch('palladium.persistence.os.path.exists') as exists: @@ -385,6 +393,69 @@ def test_upgrade_1_0_no_metadata(self, File): dump.assert_called_with(new_md, open_rv, indent=4) +class TestFileAttachments: + @pytest.fixture + def persister(self, tmpdir): + from palladium.persistence import File + model1 = Dummy() + annotate(model1, {'attachments/myatt.txt': 'aGV5', + 'attachments/my2ndatt.txt': 'aG8='}) + model2 = Dummy() + annotate(model2, {'attachments/myatt.txt': 'aG8='}) + persister = File(str(tmpdir) + '/model-{version}') + persister.write(model1) + persister.write(model2) + return persister + + def test_filenames(self, persister, tmpdir): + # Attachment files are namespaced by the model: + assert sorted(os.listdir(tmpdir)) == [ + 'model-1-my2ndatt.txt', 'model-1-myatt.txt', 'model-1.pkl.gz', + 'model-2-myatt.txt', 'model-2.pkl.gz', + 'model-metadata.json', + ] + + def test_attachment_file_contents(self, persister, tmpdir): + # Attachment data is written to files system decoded: + with open(tmpdir + '/model-1-myatt.txt', 'rb') as f: + assert f.read() == b'hey' + with open(tmpdir + '/model-1-my2ndatt.txt', 'rb') as f: + assert f.read() == b'ho' + with open(tmpdir + '/model-2-myatt.txt', 'rb') as f: + assert f.read() == b'ho' + + def test_attachment_not_in_metadata_file(self, persister, tmpdir): + # Attachment data is not written to the metadata file: + with open(tmpdir + '/model-metadata.json') as f: + md = json.loads(f.read()) + assert len(md['models']) == 2 + for model_md in md['models']: + assert 'attachments/myatt.txt' not in model_md + + def test_attachment_not_in_pickle(self, persister, tmpdir): + # Attachment data is not pickled as part of the model: + with open(tmpdir + '/model-1.pkl.gz', 'rb') as fh: + with gzip.open(fh, 'rb') as f: + model1 = pickle.load(f) + assert 'attachments/myatt.txt' not in annotate(model1) + + def test_loaded_back_on_read(self, persister, tmpdir): + # Attachment is read back from the file into metadata + # dictionary on read: + model1 = persister.read(version=1) + assert annotate(model1)['attachments/myatt.txt'] == b'aGV5' + assert annotate(model1)['attachments/my2ndatt.txt'] == b'aG8=' + + def test_deleted_on_delete(self, persister, tmpdir): + # Attachment files are removed from the file system when a + # model is deleted: + persister.delete(1) + assert sorted(os.listdir(tmpdir)) == [ + 'model-2-myatt.txt', 'model-2.pkl.gz', + 'model-metadata.json', + ] + + class TestDatabase: @pytest.fixture def Database(self): @@ -665,7 +736,7 @@ def handle_put_md(request, context): def test_download(self, mocked_requests, persister): """ test download and activation of a model """ - expected = Dummy(name='mymodel') + expected = Dummy(name='mymodel', __metadata__={}) zipped_model = gzip.compress(pickle.dumps(expected)) get_md_url = "%s/mymodel-metadata.json" % (self.base_url,) diff --git a/palladium/tests/test_server.py b/palladium/tests/test_server.py index 2062b3c..86f5d7d 100644 --- a/palladium/tests/test_server.py +++ b/palladium/tests/test_server.py @@ -320,7 +320,7 @@ def test_entry_point_not_set( config['model_persister'] = Mock() config['predict_service'] = { - '__factory__': 'palladium.server.PredictService', + '!': 'palladium.server.PredictService', 'mapping': [ ('param', 'str'), ], @@ -342,14 +342,14 @@ def test_entry_point_multiple( config['model_persister'] = Mock() config['my_predict_service'] = { - '__factory__': 'palladium.server.PredictService', + '!': 'palladium.server.PredictService', 'mapping': [ ('param', 'str'), ], 'entry_point': '/predict1', } config['my_predict_service2'] = { - '__factory__': 'palladium.server.PredictService', + '!': 'palladium.server.PredictService', 'mapping': [ ('param', 'str'), ], @@ -381,14 +381,14 @@ def test_entry_point_multiple_conflict( config['model_persister'] = Mock() config['my_predict_service'] = { - '__factory__': 'palladium.server.PredictService', + '!': 'palladium.server.PredictService', 'mapping': [ ('param', 'str'), ], 'entry_point': '/predict1', # <-- } config['my_predict_service2'] = { - '__factory__': 'palladium.server.PredictService', + '!': 'palladium.server.PredictService', 'mapping': [ ('param', 'str'), ], @@ -665,7 +665,7 @@ def test_it(self, fit, config, jobs, flask_app): config['model_persister'] = model_persister with flask_app.test_request_context(method='POST'): resp = fit() - sleep(0.02) + sleep(0.05) resp_json = json.loads(resp.get_data(as_text=True)) job = jobs[resp_json['job_id']] assert job['status'] == 'finished' diff --git a/palladium/tests/test_util.py b/palladium/tests/test_util.py index a584c3f..c6fb99c 100644 --- a/palladium/tests/test_util.py +++ b/palladium/tests/test_util.py @@ -424,12 +424,12 @@ def myfunc(tts): num_threads_before = len(threading.enumerate()) for i in range(3): - run_job(myfunc, tts=i/100) + run_job(myfunc, tts=5*i/100) job1, job2, job3 = sorted(jobs.values(), key=lambda x: x['started']) samples = [] - for i in range(10): + for i in range(50): samples.append(( job1['status'], job2['status'], diff --git a/requirements-dev.txt b/requirements-dev.txt index 13b14ba..a74eca6 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,6 +1,6 @@ cov-core==1.15.0 -coverage==4.5.4 -pluggy==0.12.0 -pytest==5.0.1 -pytest-cov==2.7.1 -requests-mock==1.6.0 +coverage==5.1 +pluggy==0.13.1 +pytest==5.4.1 +pytest-cov==2.8.1 +requests-mock==1.8.0 diff --git a/requirements.txt b/requirements.txt index 651e5c0..6cc5941 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,24 +1,24 @@ -certifi==2019.6.16 +certifi==2020.4.5.1 chardet==3.0.4 -Click==7.0 +Click==7.1.2 docopt==0.6.2 -Flask==1.1.1 -idna==2.8 +Flask==1.1.2 +idna==2.9 itsdangerous==1.1.0 -Jinja2==2.10.1 -joblib==0.13.2 +Jinja2==2.11.2 +joblib==0.14.1 MarkupSafe==1.1.1 -numpy==1.17.0 -pandas==0.24.2 -psutil==5.6.3 -python-dateutil==2.8.0 -pytz==2019.2 -requests==2.22.0 -scikit-learn==0.21.3 -scipy==1.3.0 -six==1.12.0 -SQLAlchemy==1.3.6 -ujson==1.35 -urllib3==1.25.3 -Werkzeug==0.15.5 +numpy==1.18.4 +pandas==1.0.3 +psutil==5.7.0 +python-dateutil==2.8.1 +pytz==2020.1 +requests==2.23.0 +scikit-learn==0.22.2.post1 +scipy==1.4.1 +six==1.14.0 +SQLAlchemy==1.3.16 +ujson==2.0.3 +urllib3==1.25.9 +Werkzeug==1.0.1 # julia==0.4.5 diff --git a/setup.py b/setup.py index a96ed66..65a17e2 100644 --- a/setup.py +++ b/setup.py @@ -23,8 +23,8 @@ ] docs_require = [ - 'julia', - 'rpy2', + # 'julia', + # 'rpy2', 'Sphinx', 'sphinx_rtd_theme', ]