From 6821507cb34618254f631bf6927673ccfc2ae171 Mon Sep 17 00:00:00 2001 From: Patrick Date: Tue, 30 Jul 2019 21:53:33 +0200 Subject: [PATCH] update tf tests --- docker-compose-dev.yml | 2 +- .../backends/tensorflow/tfestimatormodel.py | 33 ++++-- omegaml/restapi/tests/test_api.py | 2 +- omegaml/store/base.py | 2 +- omegaml/tests/test_tfestimator.py | 102 ++++++++++-------- 5 files changed, 87 insertions(+), 54 deletions(-) diff --git a/docker-compose-dev.yml b/docker-compose-dev.yml index fcf38482..41cd26aa 100644 --- a/docker-compose-dev.yml +++ b/docker-compose-dev.yml @@ -5,7 +5,7 @@ services: ports: - "27019:27017" - "27020:28017" - command: ["--auth", "--replSet", "rs0", "--oplogSize", "100"] + command: ["--auth", "--oplogSize", "100"] rabbitmq: image: rabbitmq ports: diff --git a/omegaml/backends/tensorflow/tfestimatormodel.py b/omegaml/backends/tensorflow/tfestimatormodel.py index 2d4739a2..7ba45c06 100644 --- a/omegaml/backends/tensorflow/tfestimatormodel.py +++ b/omegaml/backends/tensorflow/tfestimatormodel.py @@ -1,3 +1,4 @@ +# import glob import glob import os import tempfile @@ -24,10 +25,23 @@ class TFEstimatorModel(object): The estimator_fn returns a tf.estimator.Estimator or subclass. """ - def __init__(self, estimator_fn, model_dir=None): + def __init__(self, estimator_fn, input_fn=None, model_dir=None): + """ + + Args: + estimator_fn (func): the function to return a valid tf.estimator.Estimator instance. Called as + fn(model_dir=) + input_fn (func|dict): the function to create the input_fn as fn(mode, X, Y, batch_size=n), where mode + is either 'fit', 'evaluate', or 'predict'. If not provide defaults to an input_fn + that tries to infer the correct input_fn from the method and input arguments. If + provided as a dict, must contain the 'fit', 'evaluate' and 'predict' keys where + each value is a valid input_fn as fn(X, Y, batch_size=n). + model_dir (str): the model directory to use. Defaults to whatever estimator_fn/Estimator instance sets + """ self.estimator_fn = estimator_fn - self._model_dir = None + self._model_dir = model_dir self._estimator = None + self._input_fn = input_fn @property def model_dir(self): @@ -44,11 +58,12 @@ def restore(self, model_dir): self._model_dir = model_dir return self - def make_input_fn(self, X, Y, batch_size=1): + def make_input_fn(self, mode, X, Y=None, batch_size=1): """ Return a tf.data.Dataset from the input provided Args: + mode (str): calling mode, either 'fit', 'predict' or 'evaluate' X (NDArray|Tensor|Dataset): features, or Dataset of (features, labels) Y (NDArray|Tensor|Dataset): labels, optional @@ -67,6 +82,12 @@ def make_input_fn(self, X, Y, batch_size=1): import pandas as pd import numpy as np + if self._input_fn is not None: + if isinstance(self._input_fn, dict): + return self._input_fn[mode](X, Y=Y, batch_size=batch_size) + else: + return self._input_fn(mode, X, Y=Y, batch_size=batch_size) + def input_fn(): # if we have a dataset, use that if isinstance(X, tf.data.Dataset): @@ -97,7 +118,7 @@ def fit(self, X=None, Y=None, input_fn=None, batch_size=100, **kwargs): """ assert (ok(X, object) or ok(input_fn, object)), "specify either X, Y or input_fn, not both" if input_fn is None: - input_fn = self.make_input_fn(X, Y, batch_size=batch_size) + input_fn = self.make_input_fn('fit', X, Y, batch_size=batch_size) return self.estimator.train(input_fn=input_fn) def score(self, X=None, Y=None, input_fn=None, batch_size=100, **kwargs): @@ -108,7 +129,7 @@ def score(self, X=None, Y=None, input_fn=None, batch_size=100, **kwargs): """ assert (ok(X, object) or ok(input_fn, object)), "specify either X, Y or input_fn, not both" if input_fn is None: - input_fn = self.make_input_fn(X, Y, batch_size=batch_size) + input_fn = self.make_input_fn('score', X, Y, batch_size=batch_size) return self.estimator.evaluate(input_fn=input_fn) def predict(self, X=None, Y=None, input_fn=None, batch_size=1, **kwargs): @@ -121,7 +142,7 @@ def predict(self, X=None, Y=None, input_fn=None, batch_size=1, **kwargs): options2 = (X is not None) and (input_fn is None) assert options1 or options2, "specify either X, Y or input_fn, not both" if input_fn is None: - input_fn = self.make_input_fn(X, Y, batch_size=batch_size) + input_fn = self.make_input_fn('predict', X, Y, batch_size=batch_size) return self.estimator.predict(input_fn=input_fn) diff --git a/omegaml/restapi/tests/test_api.py b/omegaml/restapi/tests/test_api.py index f727d1e9..371424e5 100644 --- a/omegaml/restapi/tests/test_api.py +++ b/omegaml/restapi/tests/test_api.py @@ -30,7 +30,7 @@ def test_predict(self): self.assertEqual(resp.status_code, 200) data = resp.get_json() self.assertEqual(data.get('model'), 'regression') - self.assertEqual(data.get('result'), [[10.]]) + self.assertEqual(data.get('result'), [10.]) def test_dataset_query(self): om = self.om diff --git a/omegaml/store/base.py b/omegaml/store/base.py index 1ff8c246..fb965fc9 100644 --- a/omegaml/store/base.py +++ b/omegaml/store/base.py @@ -386,7 +386,7 @@ def put(self, obj, name, attributes=None, **kwargs): **kwargs) elif isinstance(obj, (dict, list, tuple)): if kwargs.pop('as_hdf', False): - self.put_pyobj_as_hdf(obj, name, + return self.put_pyobj_as_hdf(obj, name, attributes=attributes, **kwargs) return self.put_pyobj_as_document(obj, name, attributes=attributes, diff --git a/omegaml/tests/test_tfestimator.py b/omegaml/tests/test_tfestimator.py index ad7afa3f..a209717c 100644 --- a/omegaml/tests/test_tfestimator.py +++ b/omegaml/tests/test_tfestimator.py @@ -1,3 +1,4 @@ +from inspect import isfunction from unittest import TestCase from omegaml import Omega @@ -25,13 +26,30 @@ def make_data(): return train_x, train_y, test_x, test_y -def make_estimator(model_dir=None): - import tensorflow as tf - feature_columns = [tf.feature_column.numeric_column(key=key) - for key in ['f1', 'f2', 'f3', 'f4']] - classifier = tf.estimator.LinearClassifier(feature_columns=feature_columns, - n_classes=3, model_dir=model_dir) - return classifier +def make_estimator_fn(): + # this is to ensure we get a serializable function + def make_estimator(model_dir=None): + import tensorflow as tf + feature_columns = [tf.feature_column.numeric_column(key=key) + for key in ['f1', 'f2', 'f3', 'f4']] + classifier = tf.estimator.LinearClassifier(feature_columns=feature_columns, + n_classes=3, model_dir=model_dir) + return classifier + + return make_estimator + + +def make_input_fn(): + # create classifier and save untrained + # we need to use a custom input_fn as the default won't be able to figure + # out column names from numpy inputs + def input_fn(mode, X, Y=None, batch_size=1): + import tensorflow as tf + X = { + 'f{}'.format(i + 1): X[:, i] for i in range(X.shape[1]) + } + return tf.estimator.inputs.numpy_input_fn(x=X, y=Y, num_epochs=1, shuffle=False) + return input_fn class TFEstimatorModelBackendTests(OmegaTestMixin, TestCase): @@ -45,7 +63,7 @@ def test_fit_predict(self): import tensorflow as tf om = self.om # create classifier - estmdl = TFEstimatorModel(estimator_fn=make_estimator) + estmdl = TFEstimatorModel(estimator_fn=make_estimator_fn()) train_x, train_y, test_x, test_y = make_data() classifier = estmdl.fit(train_x, train_y) self.assertIsInstance(classifier, tf.estimator.LinearClassifier) @@ -64,15 +82,13 @@ def test_fit_predict(self): def test_fit_predict_from_numpy(self): import tensorflow as tf om = self.om - # create classifier - estmdl = TFEstimatorModel(estimator_fn=make_estimator) + # note we use a custom input_fn + estmdl = TFEstimatorModel(estimator_fn=make_estimator_fn(), input_fn=make_input_fn()) train_x, train_y, test_x, test_y = make_data() # create a feature dict from a numpy array - train_x = train_x.as_matrix() # numpy - train_x = { - 'f{}'.format(i + 1): train_x[:, i] for i in range(train_x.shape[1]) - } # dict of features + train_x = train_x.as_matrix() # numpy train_y = train_y.as_matrix() + test_x = test_x.as_matrix() classifier = estmdl.fit(train_x, train_y) self.assertIsInstance(classifier, tf.estimator.LinearClassifier) # score @@ -90,7 +106,7 @@ def test_fit_predict_from_numpy(self): def test_save_load_unfitted(self): om = self.om # create classifier and save - estmdl = TFEstimatorModel(estimator_fn=make_estimator) + estmdl = TFEstimatorModel(estimator_fn=make_estimator_fn()) meta = om.models.put(estmdl, 'estimator-model') # restore and use estmdl_r = om.models.get('estimator-model') @@ -103,11 +119,27 @@ def test_save_load_unfitted(self): self.assertIn('probabilities', predict[0]) self.assertIn('classes', predict[0]) + def test_save_load_estimator_model(self): + import tensorflow as tf + om = self.om + # create classifier and save + estmdl = TFEstimatorModel(estimator_fn=make_estimator_fn()) + meta = om.models.put(estmdl, 'estimator-model') + # restore and use + estmdl_r = om.models.get('estimator-model') + # check we have a restored instance + self.assertIsNot(estmdl_r, estmdl) + self.assertNotEqual(estmdl.estimator.model_dir, estmdl_r.estimator.model_dir) + self.assertIsInstance(estmdl_r, estmdl.__class__) + self.assertIsNot(estmdl_r.estimator_fn, make_estimator_fn()) + self.assertTrue(isfunction(estmdl.estimator_fn)) + self.assertIsInstance(estmdl_r.estimator_fn(), tf.estimator.Estimator) + def test_save_load_fitted(self): import numpy as np om = self.om # create classifier and save - estmdl = TFEstimatorModel(estimator_fn=make_estimator) + estmdl = TFEstimatorModel(estimator_fn=make_estimator_fn()) train_x, train_y, test_x, test_y = make_data() estmdl.fit(train_x, train_y) predict = [v for v in estmdl.predict(test_x)] @@ -126,7 +158,7 @@ def test_save_load_fitted_inerror(self): import numpy as np om = self.om # create classifier and save untrained - estmdl = TFEstimatorModel(estimator_fn=make_estimator) + estmdl = TFEstimatorModel(estimator_fn=make_estimator_fn()) om.models.put(estmdl, 'estimator-model') # create dataasets train_x, train_y, test_x, test_y = make_data() @@ -150,8 +182,9 @@ def test_save_load_fitted_inerror(self): def test_runtime_fit(self): import pandas as pd om = self.om - # create classifier and save untrained - estmdl = TFEstimatorModel(estimator_fn=make_estimator) + # create classifier and save untrained, note we use the default input_fn + # provided by TFEstimatorModel as it deals easily with DataFrames + estmdl = TFEstimatorModel(estimator_fn=make_estimator_fn()) train_x, train_y, test_x, test_y = make_data() om.datasets.put(train_x, 'train_x', append=False) om.datasets.put(train_y, 'train_y', append=False) @@ -164,37 +197,17 @@ def test_runtime_fit(self): result = om.runtime.model('estimator-model').predict('test_x').get() self.assertIsInstance(result, pd.DataFrame) - def test_runtime_predict(self): - import pandas as pd - om = self.om - # create classifier and save untrained - estmdl = TFEstimatorModel(estimator_fn=make_estimator) - train_x, train_y, test_x, test_y = make_data() - om.datasets.put(train_x, 'train_x', append=False) - om.datasets.put(train_y, 'train_y', append=False) - om.models.put(estmdl, 'estimator-model') - meta = om.runtime.model('estimator-model').fit('train_x', 'train_y').get() - self.assertIsInstance(meta, Metadata) - # predict using fitted model in runtime - om.datasets.put(test_x, 'test_x', append=False) - om.datasets.put(test_y, 'test_y', append=False) - result = om.runtime.model('estimator-model').predict('test_x').get() - self.assertIsInstance(result, pd.Series) - self.assertAlmostEqual(result['accuracy'], 1.0) - def test_runtime_predict_from_numpy(self): import pandas as pd om = self.om - # create classifier and save untrained - estmdl = TFEstimatorModel(estimator_fn=make_estimator) + estmdl = TFEstimatorModel(estimator_fn=make_estimator_fn(), input_fn=make_input_fn()) train_x, train_y, test_x, test_y = make_data() train_x = train_x.as_matrix() # numpy - train_x = { - 'f{}'.format(i + 1): train_x[:, i].tolist() for i in range(train_x.shape[1]) - } # dict of features train_y = train_y.as_matrix() + test_x = test_x.as_matrix() om.datasets.put(train_x, 'train_x') om.datasets.put(train_y, 'train_y') + om.datasets.put(test_x, 'test_x') om.models.put(estmdl, 'estimator-model') meta = om.runtime.model('estimator-model').fit('train_x', 'train_y').get() self.assertIsInstance(meta, Metadata) @@ -202,14 +215,13 @@ def test_runtime_predict_from_numpy(self): om.datasets.put(test_x, 'test_x', append=False) om.datasets.put(test_y, 'test_y', append=False) result = om.runtime.model('estimator-model').predict('test_x').get() - self.assertIsInstance(result, pd.Series) - self.assertAlmostEqual(result['accuracy'], 1.0) + self.assertIsInstance(result, pd.DataFrame) def test_runtime_score(self): import pandas as pd om = self.om # create classifier and save untrained - estmdl = TFEstimatorModel(estimator_fn=make_estimator) + estmdl = TFEstimatorModel(estimator_fn=make_estimator_fn()) train_x, train_y, test_x, test_y = make_data() om.datasets.put(train_x, 'train_x', append=False) om.datasets.put(train_y, 'train_y', append=False)