From 6821507cb34618254f631bf6927673ccfc2ae171 Mon Sep 17 00:00:00 2001
From: Patrick <miraculixx@gmx.ch>
Date: Tue, 30 Jul 2019 21:53:33 +0200
Subject: [PATCH] update tf tests

---
 docker-compose-dev.yml                        |   2 +-
 .../backends/tensorflow/tfestimatormodel.py   |  33 ++++--
 omegaml/restapi/tests/test_api.py             |   2 +-
 omegaml/store/base.py                         |   2 +-
 omegaml/tests/test_tfestimator.py             | 102 ++++++++++--------
 5 files changed, 87 insertions(+), 54 deletions(-)

diff --git a/docker-compose-dev.yml b/docker-compose-dev.yml
index fcf38482..41cd26aa 100644
--- a/docker-compose-dev.yml
+++ b/docker-compose-dev.yml
@@ -5,7 +5,7 @@ services:
     ports:
       - "27019:27017"
       - "27020:28017"
-    command: ["--auth", "--replSet", "rs0", "--oplogSize", "100"]
+    command: ["--auth", "--oplogSize", "100"]
   rabbitmq:
     image: rabbitmq
     ports:
diff --git a/omegaml/backends/tensorflow/tfestimatormodel.py b/omegaml/backends/tensorflow/tfestimatormodel.py
index 2d4739a2..7ba45c06 100644
--- a/omegaml/backends/tensorflow/tfestimatormodel.py
+++ b/omegaml/backends/tensorflow/tfestimatormodel.py
@@ -1,3 +1,4 @@
+# import glob
 import glob
 import os
 import tempfile
@@ -24,10 +25,23 @@ class TFEstimatorModel(object):
         The estimator_fn returns a tf.estimator.Estimator or subclass.
     """
 
-    def __init__(self, estimator_fn, model_dir=None):
+    def __init__(self, estimator_fn, input_fn=None, model_dir=None):
+        """
+
+        Args:
+            estimator_fn (func): the function to return a valid tf.estimator.Estimator instance. Called as
+                                 fn(model_dir=)
+            input_fn (func|dict): the function to create the input_fn as fn(mode, X, Y, batch_size=n), where mode
+                                  is either 'fit', 'evaluate', or 'predict'. If not provide defaults to an input_fn
+                                  that tries to infer the correct input_fn from the method and input arguments. If
+                                  provided as a dict, must contain the 'fit', 'evaluate' and 'predict' keys where
+                                  each value is a valid input_fn as fn(X, Y, batch_size=n).
+            model_dir (str): the model directory to use. Defaults to whatever estimator_fn/Estimator instance sets
+        """
         self.estimator_fn = estimator_fn
-        self._model_dir = None
+        self._model_dir = model_dir
         self._estimator = None
+        self._input_fn = input_fn
 
     @property
     def model_dir(self):
@@ -44,11 +58,12 @@ def restore(self, model_dir):
         self._model_dir = model_dir
         return self
 
-    def make_input_fn(self, X, Y, batch_size=1):
+    def make_input_fn(self, mode, X, Y=None, batch_size=1):
         """
         Return a tf.data.Dataset from the input provided
 
         Args:
+            mode (str): calling mode, either 'fit', 'predict' or 'evaluate'
             X (NDArray|Tensor|Dataset): features, or Dataset of (features, labels)
             Y (NDArray|Tensor|Dataset): labels, optional
 
@@ -67,6 +82,12 @@ def make_input_fn(self, X, Y, batch_size=1):
         import pandas as pd
         import numpy as np
 
+        if self._input_fn is not None:
+            if isinstance(self._input_fn, dict):
+                return self._input_fn[mode](X, Y=Y, batch_size=batch_size)
+            else:
+                return self._input_fn(mode, X, Y=Y, batch_size=batch_size)
+
         def input_fn():
             # if we have a dataset, use that
             if isinstance(X, tf.data.Dataset):
@@ -97,7 +118,7 @@ def fit(self, X=None, Y=None, input_fn=None, batch_size=100, **kwargs):
         """
         assert (ok(X, object) or ok(input_fn, object)), "specify either X, Y or input_fn, not both"
         if input_fn is None:
-            input_fn = self.make_input_fn(X, Y, batch_size=batch_size)
+            input_fn = self.make_input_fn('fit', X, Y, batch_size=batch_size)
         return self.estimator.train(input_fn=input_fn)
 
     def score(self, X=None, Y=None, input_fn=None, batch_size=100, **kwargs):
@@ -108,7 +129,7 @@ def score(self, X=None, Y=None, input_fn=None, batch_size=100, **kwargs):
         """
         assert (ok(X, object) or ok(input_fn, object)), "specify either X, Y or input_fn, not both"
         if input_fn is None:
-            input_fn = self.make_input_fn(X, Y, batch_size=batch_size)
+            input_fn = self.make_input_fn('score', X, Y, batch_size=batch_size)
         return self.estimator.evaluate(input_fn=input_fn)
 
     def predict(self, X=None, Y=None, input_fn=None, batch_size=1, **kwargs):
@@ -121,7 +142,7 @@ def predict(self, X=None, Y=None, input_fn=None, batch_size=1, **kwargs):
         options2 = (X is not None) and (input_fn is None)
         assert options1 or options2, "specify either X, Y or input_fn, not both"
         if input_fn is None:
-            input_fn = self.make_input_fn(X, Y, batch_size=batch_size)
+            input_fn = self.make_input_fn('predict', X, Y, batch_size=batch_size)
         return self.estimator.predict(input_fn=input_fn)
 
 
diff --git a/omegaml/restapi/tests/test_api.py b/omegaml/restapi/tests/test_api.py
index f727d1e9..371424e5 100644
--- a/omegaml/restapi/tests/test_api.py
+++ b/omegaml/restapi/tests/test_api.py
@@ -30,7 +30,7 @@ def test_predict(self):
         self.assertEqual(resp.status_code, 200)
         data = resp.get_json()
         self.assertEqual(data.get('model'), 'regression')
-        self.assertEqual(data.get('result'), [[10.]])
+        self.assertEqual(data.get('result'), [10.])
 
     def test_dataset_query(self):
         om = self.om
diff --git a/omegaml/store/base.py b/omegaml/store/base.py
index 1ff8c246..fb965fc9 100644
--- a/omegaml/store/base.py
+++ b/omegaml/store/base.py
@@ -386,7 +386,7 @@ def put(self, obj, name, attributes=None, **kwargs):
                                            **kwargs)
         elif isinstance(obj, (dict, list, tuple)):
             if kwargs.pop('as_hdf', False):
-                self.put_pyobj_as_hdf(obj, name,
+                return self.put_pyobj_as_hdf(obj, name,
                                       attributes=attributes, **kwargs)
             return self.put_pyobj_as_document(obj, name,
                                               attributes=attributes,
diff --git a/omegaml/tests/test_tfestimator.py b/omegaml/tests/test_tfestimator.py
index ad7afa3f..a209717c 100644
--- a/omegaml/tests/test_tfestimator.py
+++ b/omegaml/tests/test_tfestimator.py
@@ -1,3 +1,4 @@
+from inspect import isfunction
 from unittest import TestCase
 
 from omegaml import Omega
@@ -25,13 +26,30 @@ def make_data():
     return train_x, train_y, test_x, test_y
 
 
-def make_estimator(model_dir=None):
-    import tensorflow as tf
-    feature_columns = [tf.feature_column.numeric_column(key=key)
-                       for key in ['f1', 'f2', 'f3', 'f4']]
-    classifier = tf.estimator.LinearClassifier(feature_columns=feature_columns,
-                                               n_classes=3, model_dir=model_dir)
-    return classifier
+def make_estimator_fn():
+    # this is to ensure we get a serializable function
+    def make_estimator(model_dir=None):
+        import tensorflow as tf
+        feature_columns = [tf.feature_column.numeric_column(key=key)
+                           for key in ['f1', 'f2', 'f3', 'f4']]
+        classifier = tf.estimator.LinearClassifier(feature_columns=feature_columns,
+                                                   n_classes=3, model_dir=model_dir)
+        return classifier
+
+    return make_estimator
+
+
+def make_input_fn():
+    # create classifier and save untrained
+    # we need to use a custom input_fn as the default won't be able to figure
+    # out column names from numpy inputs
+    def input_fn(mode, X, Y=None, batch_size=1):
+        import tensorflow as tf
+        X = {
+            'f{}'.format(i + 1): X[:, i] for i in range(X.shape[1])
+        }
+        return tf.estimator.inputs.numpy_input_fn(x=X, y=Y, num_epochs=1, shuffle=False)
+    return input_fn
 
 
 class TFEstimatorModelBackendTests(OmegaTestMixin, TestCase):
@@ -45,7 +63,7 @@ def test_fit_predict(self):
         import tensorflow as tf
         om = self.om
         # create classifier
-        estmdl = TFEstimatorModel(estimator_fn=make_estimator)
+        estmdl = TFEstimatorModel(estimator_fn=make_estimator_fn())
         train_x, train_y, test_x, test_y = make_data()
         classifier = estmdl.fit(train_x, train_y)
         self.assertIsInstance(classifier, tf.estimator.LinearClassifier)
@@ -64,15 +82,13 @@ def test_fit_predict(self):
     def test_fit_predict_from_numpy(self):
         import tensorflow as tf
         om = self.om
-        # create classifier
-        estmdl = TFEstimatorModel(estimator_fn=make_estimator)
+        # note we use a custom input_fn
+        estmdl = TFEstimatorModel(estimator_fn=make_estimator_fn(), input_fn=make_input_fn())
         train_x, train_y, test_x, test_y = make_data()
         # create a feature dict from a numpy array
-        train_x = train_x.as_matrix() # numpy
-        train_x = {
-            'f{}'.format(i + 1): train_x[:, i] for i in range(train_x.shape[1])
-        } # dict of features
+        train_x = train_x.as_matrix()  # numpy
         train_y = train_y.as_matrix()
+        test_x = test_x.as_matrix()
         classifier = estmdl.fit(train_x, train_y)
         self.assertIsInstance(classifier, tf.estimator.LinearClassifier)
         # score
@@ -90,7 +106,7 @@ def test_fit_predict_from_numpy(self):
     def test_save_load_unfitted(self):
         om = self.om
         # create classifier and save
-        estmdl = TFEstimatorModel(estimator_fn=make_estimator)
+        estmdl = TFEstimatorModel(estimator_fn=make_estimator_fn())
         meta = om.models.put(estmdl, 'estimator-model')
         # restore and use
         estmdl_r = om.models.get('estimator-model')
@@ -103,11 +119,27 @@ def test_save_load_unfitted(self):
         self.assertIn('probabilities', predict[0])
         self.assertIn('classes', predict[0])
 
+    def test_save_load_estimator_model(self):
+        import tensorflow as tf
+        om = self.om
+        # create classifier and save
+        estmdl = TFEstimatorModel(estimator_fn=make_estimator_fn())
+        meta = om.models.put(estmdl, 'estimator-model')
+        # restore and use
+        estmdl_r = om.models.get('estimator-model')
+        # check we have a restored instance
+        self.assertIsNot(estmdl_r, estmdl)
+        self.assertNotEqual(estmdl.estimator.model_dir, estmdl_r.estimator.model_dir)
+        self.assertIsInstance(estmdl_r, estmdl.__class__)
+        self.assertIsNot(estmdl_r.estimator_fn, make_estimator_fn())
+        self.assertTrue(isfunction(estmdl.estimator_fn))
+        self.assertIsInstance(estmdl_r.estimator_fn(), tf.estimator.Estimator)
+
     def test_save_load_fitted(self):
         import numpy as np
         om = self.om
         # create classifier and save
-        estmdl = TFEstimatorModel(estimator_fn=make_estimator)
+        estmdl = TFEstimatorModel(estimator_fn=make_estimator_fn())
         train_x, train_y, test_x, test_y = make_data()
         estmdl.fit(train_x, train_y)
         predict = [v for v in estmdl.predict(test_x)]
@@ -126,7 +158,7 @@ def test_save_load_fitted_inerror(self):
         import numpy as np
         om = self.om
         # create classifier and save untrained
-        estmdl = TFEstimatorModel(estimator_fn=make_estimator)
+        estmdl = TFEstimatorModel(estimator_fn=make_estimator_fn())
         om.models.put(estmdl, 'estimator-model')
         # create dataasets
         train_x, train_y, test_x, test_y = make_data()
@@ -150,8 +182,9 @@ def test_save_load_fitted_inerror(self):
     def test_runtime_fit(self):
         import pandas as pd
         om = self.om
-        # create classifier and save untrained
-        estmdl = TFEstimatorModel(estimator_fn=make_estimator)
+        # create classifier and save untrained, note we use the default input_fn
+        # provided by TFEstimatorModel as it deals easily with DataFrames
+        estmdl = TFEstimatorModel(estimator_fn=make_estimator_fn())
         train_x, train_y, test_x, test_y = make_data()
         om.datasets.put(train_x, 'train_x', append=False)
         om.datasets.put(train_y, 'train_y', append=False)
@@ -164,37 +197,17 @@ def test_runtime_fit(self):
         result = om.runtime.model('estimator-model').predict('test_x').get()
         self.assertIsInstance(result, pd.DataFrame)
 
-    def test_runtime_predict(self):
-        import pandas as pd
-        om = self.om
-        # create classifier and save untrained
-        estmdl = TFEstimatorModel(estimator_fn=make_estimator)
-        train_x, train_y, test_x, test_y = make_data()
-        om.datasets.put(train_x, 'train_x', append=False)
-        om.datasets.put(train_y, 'train_y', append=False)
-        om.models.put(estmdl, 'estimator-model')
-        meta = om.runtime.model('estimator-model').fit('train_x', 'train_y').get()
-        self.assertIsInstance(meta, Metadata)
-        # predict using fitted model in runtime
-        om.datasets.put(test_x, 'test_x', append=False)
-        om.datasets.put(test_y, 'test_y', append=False)
-        result = om.runtime.model('estimator-model').predict('test_x').get()
-        self.assertIsInstance(result, pd.Series)
-        self.assertAlmostEqual(result['accuracy'], 1.0)
-
     def test_runtime_predict_from_numpy(self):
         import pandas as pd
         om = self.om
-        # create classifier and save untrained
-        estmdl = TFEstimatorModel(estimator_fn=make_estimator)
+        estmdl = TFEstimatorModel(estimator_fn=make_estimator_fn(), input_fn=make_input_fn())
         train_x, train_y, test_x, test_y = make_data()
         train_x = train_x.as_matrix()  # numpy
-        train_x = {
-            'f{}'.format(i + 1): train_x[:, i].tolist() for i in range(train_x.shape[1])
-        }  # dict of features
         train_y = train_y.as_matrix()
+        test_x = test_x.as_matrix()
         om.datasets.put(train_x, 'train_x')
         om.datasets.put(train_y, 'train_y')
+        om.datasets.put(test_x, 'test_x')
         om.models.put(estmdl, 'estimator-model')
         meta = om.runtime.model('estimator-model').fit('train_x', 'train_y').get()
         self.assertIsInstance(meta, Metadata)
@@ -202,14 +215,13 @@ def test_runtime_predict_from_numpy(self):
         om.datasets.put(test_x, 'test_x', append=False)
         om.datasets.put(test_y, 'test_y', append=False)
         result = om.runtime.model('estimator-model').predict('test_x').get()
-        self.assertIsInstance(result, pd.Series)
-        self.assertAlmostEqual(result['accuracy'], 1.0)
+        self.assertIsInstance(result, pd.DataFrame)
 
     def test_runtime_score(self):
         import pandas as pd
         om = self.om
         # create classifier and save untrained
-        estmdl = TFEstimatorModel(estimator_fn=make_estimator)
+        estmdl = TFEstimatorModel(estimator_fn=make_estimator_fn())
         train_x, train_y, test_x, test_y = make_data()
         om.datasets.put(train_x, 'train_x', append=False)
         om.datasets.put(train_y, 'train_y', append=False)