Skip to content

Commit

Permalink
update tf tests
Browse files Browse the repository at this point in the history
  • Loading branch information
miraculixx committed Jul 30, 2019
1 parent d8b7b98 commit 6821507
Show file tree
Hide file tree
Showing 5 changed files with 87 additions and 54 deletions.
2 changes: 1 addition & 1 deletion docker-compose-dev.yml
Expand Up @@ -5,7 +5,7 @@ services:
ports:
- "27019:27017"
- "27020:28017"
command: ["--auth", "--replSet", "rs0", "--oplogSize", "100"]
command: ["--auth", "--oplogSize", "100"]
rabbitmq:
image: rabbitmq
ports:
Expand Down
33 changes: 27 additions & 6 deletions omegaml/backends/tensorflow/tfestimatormodel.py
@@ -1,3 +1,4 @@
# import glob
import glob
import os
import tempfile
Expand All @@ -24,10 +25,23 @@ class TFEstimatorModel(object):
The estimator_fn returns a tf.estimator.Estimator or subclass.
"""

def __init__(self, estimator_fn, model_dir=None):
def __init__(self, estimator_fn, input_fn=None, model_dir=None):
"""
Args:
estimator_fn (func): the function to return a valid tf.estimator.Estimator instance. Called as
fn(model_dir=)
input_fn (func|dict): the function to create the input_fn as fn(mode, X, Y, batch_size=n), where mode
is either 'fit', 'evaluate', or 'predict'. If not provide defaults to an input_fn
that tries to infer the correct input_fn from the method and input arguments. If
provided as a dict, must contain the 'fit', 'evaluate' and 'predict' keys where
each value is a valid input_fn as fn(X, Y, batch_size=n).
model_dir (str): the model directory to use. Defaults to whatever estimator_fn/Estimator instance sets
"""
self.estimator_fn = estimator_fn
self._model_dir = None
self._model_dir = model_dir
self._estimator = None
self._input_fn = input_fn

@property
def model_dir(self):
Expand All @@ -44,11 +58,12 @@ def restore(self, model_dir):
self._model_dir = model_dir
return self

def make_input_fn(self, X, Y, batch_size=1):
def make_input_fn(self, mode, X, Y=None, batch_size=1):
"""
Return a tf.data.Dataset from the input provided
Args:
mode (str): calling mode, either 'fit', 'predict' or 'evaluate'
X (NDArray|Tensor|Dataset): features, or Dataset of (features, labels)
Y (NDArray|Tensor|Dataset): labels, optional
Expand All @@ -67,6 +82,12 @@ def make_input_fn(self, X, Y, batch_size=1):
import pandas as pd
import numpy as np

if self._input_fn is not None:
if isinstance(self._input_fn, dict):
return self._input_fn[mode](X, Y=Y, batch_size=batch_size)
else:
return self._input_fn(mode, X, Y=Y, batch_size=batch_size)

def input_fn():
# if we have a dataset, use that
if isinstance(X, tf.data.Dataset):
Expand Down Expand Up @@ -97,7 +118,7 @@ def fit(self, X=None, Y=None, input_fn=None, batch_size=100, **kwargs):
"""
assert (ok(X, object) or ok(input_fn, object)), "specify either X, Y or input_fn, not both"
if input_fn is None:
input_fn = self.make_input_fn(X, Y, batch_size=batch_size)
input_fn = self.make_input_fn('fit', X, Y, batch_size=batch_size)
return self.estimator.train(input_fn=input_fn)

def score(self, X=None, Y=None, input_fn=None, batch_size=100, **kwargs):
Expand All @@ -108,7 +129,7 @@ def score(self, X=None, Y=None, input_fn=None, batch_size=100, **kwargs):
"""
assert (ok(X, object) or ok(input_fn, object)), "specify either X, Y or input_fn, not both"
if input_fn is None:
input_fn = self.make_input_fn(X, Y, batch_size=batch_size)
input_fn = self.make_input_fn('score', X, Y, batch_size=batch_size)
return self.estimator.evaluate(input_fn=input_fn)

def predict(self, X=None, Y=None, input_fn=None, batch_size=1, **kwargs):
Expand All @@ -121,7 +142,7 @@ def predict(self, X=None, Y=None, input_fn=None, batch_size=1, **kwargs):
options2 = (X is not None) and (input_fn is None)
assert options1 or options2, "specify either X, Y or input_fn, not both"
if input_fn is None:
input_fn = self.make_input_fn(X, Y, batch_size=batch_size)
input_fn = self.make_input_fn('predict', X, Y, batch_size=batch_size)
return self.estimator.predict(input_fn=input_fn)


Expand Down
2 changes: 1 addition & 1 deletion omegaml/restapi/tests/test_api.py
Expand Up @@ -30,7 +30,7 @@ def test_predict(self):
self.assertEqual(resp.status_code, 200)
data = resp.get_json()
self.assertEqual(data.get('model'), 'regression')
self.assertEqual(data.get('result'), [[10.]])
self.assertEqual(data.get('result'), [10.])

def test_dataset_query(self):
om = self.om
Expand Down
2 changes: 1 addition & 1 deletion omegaml/store/base.py
Expand Up @@ -386,7 +386,7 @@ def put(self, obj, name, attributes=None, **kwargs):
**kwargs)
elif isinstance(obj, (dict, list, tuple)):
if kwargs.pop('as_hdf', False):
self.put_pyobj_as_hdf(obj, name,
return self.put_pyobj_as_hdf(obj, name,
attributes=attributes, **kwargs)
return self.put_pyobj_as_document(obj, name,
attributes=attributes,
Expand Down
102 changes: 57 additions & 45 deletions omegaml/tests/test_tfestimator.py
@@ -1,3 +1,4 @@
from inspect import isfunction
from unittest import TestCase

from omegaml import Omega
Expand Down Expand Up @@ -25,13 +26,30 @@ def make_data():
return train_x, train_y, test_x, test_y


def make_estimator(model_dir=None):
import tensorflow as tf
feature_columns = [tf.feature_column.numeric_column(key=key)
for key in ['f1', 'f2', 'f3', 'f4']]
classifier = tf.estimator.LinearClassifier(feature_columns=feature_columns,
n_classes=3, model_dir=model_dir)
return classifier
def make_estimator_fn():
# this is to ensure we get a serializable function
def make_estimator(model_dir=None):
import tensorflow as tf
feature_columns = [tf.feature_column.numeric_column(key=key)
for key in ['f1', 'f2', 'f3', 'f4']]
classifier = tf.estimator.LinearClassifier(feature_columns=feature_columns,
n_classes=3, model_dir=model_dir)
return classifier

return make_estimator


def make_input_fn():
# create classifier and save untrained
# we need to use a custom input_fn as the default won't be able to figure
# out column names from numpy inputs
def input_fn(mode, X, Y=None, batch_size=1):
import tensorflow as tf
X = {
'f{}'.format(i + 1): X[:, i] for i in range(X.shape[1])
}
return tf.estimator.inputs.numpy_input_fn(x=X, y=Y, num_epochs=1, shuffle=False)
return input_fn


class TFEstimatorModelBackendTests(OmegaTestMixin, TestCase):
Expand All @@ -45,7 +63,7 @@ def test_fit_predict(self):
import tensorflow as tf
om = self.om
# create classifier
estmdl = TFEstimatorModel(estimator_fn=make_estimator)
estmdl = TFEstimatorModel(estimator_fn=make_estimator_fn())
train_x, train_y, test_x, test_y = make_data()
classifier = estmdl.fit(train_x, train_y)
self.assertIsInstance(classifier, tf.estimator.LinearClassifier)
Expand All @@ -64,15 +82,13 @@ def test_fit_predict(self):
def test_fit_predict_from_numpy(self):
import tensorflow as tf
om = self.om
# create classifier
estmdl = TFEstimatorModel(estimator_fn=make_estimator)
# note we use a custom input_fn
estmdl = TFEstimatorModel(estimator_fn=make_estimator_fn(), input_fn=make_input_fn())
train_x, train_y, test_x, test_y = make_data()
# create a feature dict from a numpy array
train_x = train_x.as_matrix() # numpy
train_x = {
'f{}'.format(i + 1): train_x[:, i] for i in range(train_x.shape[1])
} # dict of features
train_x = train_x.as_matrix() # numpy
train_y = train_y.as_matrix()
test_x = test_x.as_matrix()
classifier = estmdl.fit(train_x, train_y)
self.assertIsInstance(classifier, tf.estimator.LinearClassifier)
# score
Expand All @@ -90,7 +106,7 @@ def test_fit_predict_from_numpy(self):
def test_save_load_unfitted(self):
om = self.om
# create classifier and save
estmdl = TFEstimatorModel(estimator_fn=make_estimator)
estmdl = TFEstimatorModel(estimator_fn=make_estimator_fn())
meta = om.models.put(estmdl, 'estimator-model')
# restore and use
estmdl_r = om.models.get('estimator-model')
Expand All @@ -103,11 +119,27 @@ def test_save_load_unfitted(self):
self.assertIn('probabilities', predict[0])
self.assertIn('classes', predict[0])

def test_save_load_estimator_model(self):
import tensorflow as tf
om = self.om
# create classifier and save
estmdl = TFEstimatorModel(estimator_fn=make_estimator_fn())
meta = om.models.put(estmdl, 'estimator-model')
# restore and use
estmdl_r = om.models.get('estimator-model')
# check we have a restored instance
self.assertIsNot(estmdl_r, estmdl)
self.assertNotEqual(estmdl.estimator.model_dir, estmdl_r.estimator.model_dir)
self.assertIsInstance(estmdl_r, estmdl.__class__)
self.assertIsNot(estmdl_r.estimator_fn, make_estimator_fn())
self.assertTrue(isfunction(estmdl.estimator_fn))
self.assertIsInstance(estmdl_r.estimator_fn(), tf.estimator.Estimator)

def test_save_load_fitted(self):
import numpy as np
om = self.om
# create classifier and save
estmdl = TFEstimatorModel(estimator_fn=make_estimator)
estmdl = TFEstimatorModel(estimator_fn=make_estimator_fn())
train_x, train_y, test_x, test_y = make_data()
estmdl.fit(train_x, train_y)
predict = [v for v in estmdl.predict(test_x)]
Expand All @@ -126,7 +158,7 @@ def test_save_load_fitted_inerror(self):
import numpy as np
om = self.om
# create classifier and save untrained
estmdl = TFEstimatorModel(estimator_fn=make_estimator)
estmdl = TFEstimatorModel(estimator_fn=make_estimator_fn())
om.models.put(estmdl, 'estimator-model')
# create dataasets
train_x, train_y, test_x, test_y = make_data()
Expand All @@ -150,8 +182,9 @@ def test_save_load_fitted_inerror(self):
def test_runtime_fit(self):
import pandas as pd
om = self.om
# create classifier and save untrained
estmdl = TFEstimatorModel(estimator_fn=make_estimator)
# create classifier and save untrained, note we use the default input_fn
# provided by TFEstimatorModel as it deals easily with DataFrames
estmdl = TFEstimatorModel(estimator_fn=make_estimator_fn())
train_x, train_y, test_x, test_y = make_data()
om.datasets.put(train_x, 'train_x', append=False)
om.datasets.put(train_y, 'train_y', append=False)
Expand All @@ -164,52 +197,31 @@ def test_runtime_fit(self):
result = om.runtime.model('estimator-model').predict('test_x').get()
self.assertIsInstance(result, pd.DataFrame)

def test_runtime_predict(self):
import pandas as pd
om = self.om
# create classifier and save untrained
estmdl = TFEstimatorModel(estimator_fn=make_estimator)
train_x, train_y, test_x, test_y = make_data()
om.datasets.put(train_x, 'train_x', append=False)
om.datasets.put(train_y, 'train_y', append=False)
om.models.put(estmdl, 'estimator-model')
meta = om.runtime.model('estimator-model').fit('train_x', 'train_y').get()
self.assertIsInstance(meta, Metadata)
# predict using fitted model in runtime
om.datasets.put(test_x, 'test_x', append=False)
om.datasets.put(test_y, 'test_y', append=False)
result = om.runtime.model('estimator-model').predict('test_x').get()
self.assertIsInstance(result, pd.Series)
self.assertAlmostEqual(result['accuracy'], 1.0)

def test_runtime_predict_from_numpy(self):
import pandas as pd
om = self.om
# create classifier and save untrained
estmdl = TFEstimatorModel(estimator_fn=make_estimator)
estmdl = TFEstimatorModel(estimator_fn=make_estimator_fn(), input_fn=make_input_fn())
train_x, train_y, test_x, test_y = make_data()
train_x = train_x.as_matrix() # numpy
train_x = {
'f{}'.format(i + 1): train_x[:, i].tolist() for i in range(train_x.shape[1])
} # dict of features
train_y = train_y.as_matrix()
test_x = test_x.as_matrix()
om.datasets.put(train_x, 'train_x')
om.datasets.put(train_y, 'train_y')
om.datasets.put(test_x, 'test_x')
om.models.put(estmdl, 'estimator-model')
meta = om.runtime.model('estimator-model').fit('train_x', 'train_y').get()
self.assertIsInstance(meta, Metadata)
# predict using fitted model in runtime
om.datasets.put(test_x, 'test_x', append=False)
om.datasets.put(test_y, 'test_y', append=False)
result = om.runtime.model('estimator-model').predict('test_x').get()
self.assertIsInstance(result, pd.Series)
self.assertAlmostEqual(result['accuracy'], 1.0)
self.assertIsInstance(result, pd.DataFrame)

def test_runtime_score(self):
import pandas as pd
om = self.om
# create classifier and save untrained
estmdl = TFEstimatorModel(estimator_fn=make_estimator)
estmdl = TFEstimatorModel(estimator_fn=make_estimator_fn())
train_x, train_y, test_x, test_y = make_data()
om.datasets.put(train_x, 'train_x', append=False)
om.datasets.put(train_y, 'train_y', append=False)
Expand Down

0 comments on commit 6821507

Please sign in to comment.