Skip to content

Commit

Permalink
Set log_model_signatures=False by default for `mlflow.tensorflow.au…
Browse files Browse the repository at this point in the history
…tolog()` (#5652)

* Impl and test

Signed-off-by: dbczumar <corey.zumar@databricks.com>

* Format

Signed-off-by: dbczumar <corey.zumar@databricks.com>

* Test fixes

Signed-off-by: dbczumar <corey.zumar@databricks.com>

* Format

Signed-off-by: dbczumar <corey.zumar@databricks.com>

* Reenable

Signed-off-by: dbczumar <corey.zumar@databricks.com>

* Fix numpy incompat, fix tests

Signed-off-by: dbczumar <corey.zumar@databricks.com>

* Fix

Signed-off-by: dbczumar <corey.zumar@databricks.com>

* error code

Signed-off-by: dbczumar <corey.zumar@databricks.com>

* Simplify

Signed-off-by: dbczumar <corey.zumar@databricks.com>

* format

Signed-off-by: dbczumar <corey.zumar@databricks.com>

* Remove harmful fixture

Signed-off-by: dbczumar <corey.zumar@databricks.com>

* Docstring

Signed-off-by: dbczumar <corey.zumar@databricks.com>

* Format

Signed-off-by: dbczumar <corey.zumar@databricks.com>
  • Loading branch information
dbczumar authored and BenWilson2 committed Apr 10, 2022
1 parent eaba6f9 commit 6822c55
Show file tree
Hide file tree
Showing 4 changed files with 101 additions and 89 deletions.
81 changes: 50 additions & 31 deletions mlflow/tensorflow/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
from mlflow.models.model import MLMODEL_FILE_NAME, _LOG_MODEL_METADATA_WARNING_TEMPLATE
from mlflow.models.signature import ModelSignature
from mlflow.models.utils import ModelInputExample, _save_example
from mlflow.protos.databricks_pb2 import INVALID_PARAMETER_VALUE
from mlflow.tracking import MlflowClient
from mlflow.tracking.artifact_utils import _download_artifact_from_uri, get_artifact_uri
from mlflow.utils.annotations import keyword_only
Expand Down Expand Up @@ -61,6 +62,7 @@
log_fn_args_as_params,
batch_metrics_logger,
get_autologging_config,
AUTOLOGGING_CONF_KEY_IS_GLOBALLY_CONFIGURED,
)
from mlflow.entities import Metric
from mlflow.tracking._model_registry import DEFAULT_AWAIT_MAX_SLEEP_SECONDS
Expand Down Expand Up @@ -654,7 +656,7 @@ def autolog(
silent=False,
registered_model_name=None,
log_input_examples=False,
log_model_signatures=True,
log_model_signatures=False,
): # pylint: disable=unused-argument
# pylint: disable=E0611
"""
Expand Down Expand Up @@ -731,7 +733,13 @@ def autolog(
:py:class:`ModelSignatures <mlflow.models.ModelSignature>`
describing model inputs and outputs are collected and logged along
with tf/keras model artifacts during training. If ``False``,
signatures are not logged.
signatures are not logged. ``False`` by default because
logging TensorFlow models with signatures changes their pyfunc
inference behavior when Pandas DataFrames are passed to
``predict()``: when a signature is present, an ``np.ndarray``
(for single-output models) or a mapping from
``str`` -> ``np.ndarray`` (for multi-output models) is returned;
when a signature is not present, a Pandas DataFrame is returned.
"""
import tensorflow

Expand Down Expand Up @@ -935,7 +943,13 @@ def _get_input_data_slice():
input_example_slice = None
if isinstance(input_training_data, np.ndarray):
input_example_slice = input_training_data[:INPUT_EXAMPLE_SAMPLE_ROWS]
elif isinstance(input_training_data, tensorflow.data.Dataset):
elif (
isinstance(input_training_data, tensorflow.data.Dataset)
and
# TensorFlow < 2.1.0 does not include methods for converting
# a tf.data.Dataset to a numpy array, such as `as_numpy_iterator()`
Version(tensorflow.__version__) >= Version("2.1.0")
):
steps = 1
if history.params is not None and "steps" in history.params:
steps = history.params["steps"]
Expand Down Expand Up @@ -971,44 +985,49 @@ def _extract_n_steps(input_example_n_steps):
]

else:
warnings.warn(
"Tensorflow keras autologging only "
"supports input types of: numpy.ndarray, "
"dict(<key> -> numpy.ndarray), tensorflow.data.Dataset, "
"or tensorflow.keras.utils.Sequence"
raise MlflowException(
"Cannot log input example or model signature for input with type"
f" {type(input_training_data)}. TensorFlow Keras autologging can"
" only log input examples and model signatures for the following"
" input types: numpy.ndarray, dict[string -> numpy.ndarray],"
" tensorflow.keras.utils.Sequence, and"
" tensorflow.data.Dataset (TensorFlow >= 2.1.0 required)",
INVALID_PARAMETER_VALUE,
)

return input_example_slice

def _infer_model_signature(input_data_slice):
try:
original_stop_training = history.model.stop_training
model_output = history.model.predict(input_data_slice)

if (
Version(tensorflow.__version__) <= Version("2.1.4")
and original_stop_training
):
# For these versions, `stop_training` flag on Model is set to False
# This flag is used by the callback
# (inside ``_log_early_stop_callback_metrics``)
# for logging of early stop metrics. In order for
# that to work, need to force that flag to be True again since doing
# predict on that model sets `stop_training` to false for
# those TF versions
history.model.stop_training = True

model_signature = infer_signature(input_data_slice, model_output)
except TypeError as te:
warnings.warn(str(te))
model_signature = None
return model_signature
# In certain TensorFlow versions, calling `predict()` on model may modify
# the `stop_training` attribute, so we save and restore it accordingly
original_stop_training = history.model.stop_training
model_output = history.model.predict(input_data_slice)
history.model.stop_training = original_stop_training
return infer_signature(input_data_slice, model_output)

input_example, signature = resolve_input_example_and_signature(
_get_input_data_slice,
_infer_model_signature,
log_input_examples,
log_model_signatures,
(
log_model_signatures
and
# `log_model_signatures` is `False` by default for
# `mlflow.tensorflow.autolog()` in order to to preserve
# backwards-compatible inference behavior with older versions of MLflow
# that did not support signature autologging for TensorFlow (
# unfortunately, adding a signature to a TensorFlow model has the
# unintended consequence of changing the output type produced by
# inference with pyfunc `predict()` for Pandas DataFrame inputs).
# However, `log_model_signatures` is `True` by default for
# `mlflow.autolog()`. To ensure that we maintain backwards compatibility
# when TensorFlow autologging is enabled via `mlflow.autolog()`,
# we only enable signature logging if `mlflow.tensorflow.autolog()` is
# called explicitly with `log_model_signatures=True`
not get_autologging_config(
FLAVOR_NAME, AUTOLOGGING_CONF_KEY_IS_GLOBALLY_CONFIGURED, False
)
),
_logger,
)

Expand Down
15 changes: 8 additions & 7 deletions mlflow/tracking/fluent.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
autologging_integration,
AUTOLOGGING_INTEGRATIONS,
autologging_is_disabled,
AUTOLOGGING_CONF_KEY_IS_GLOBALLY_CONFIGURED,
)
from mlflow.utils.import_hooks import register_post_import_hook
from mlflow.utils.mlflow_tags import (
Expand Down Expand Up @@ -1604,8 +1605,6 @@ def print_auto_logged_info(r):
"pytorch_lightning": pytorch.autolog,
}

CONF_KEY_IS_GLOBALLY_CONFIGURED = "globally_configured"

def get_autologging_params(autolog_fn):
try:
needed_params = list(inspect.signature(autolog_fn).parameters.keys())
Expand All @@ -1622,20 +1621,22 @@ def setup_autologging(module):
# Logic is as follows:
# - if a previous_config exists, that means either `mlflow.autolog` or
# `mlflow.integration.autolog` was called.
# - if the config contains `CONF_KEY_IS_GLOBALLY_CONFIGURED`, the configuration
# was set by `mlflow.autolog`, and so we can safely call `autolog_fn` with
# `autologging_params`.
# - if the config contains `AUTOLOGGING_CONF_KEY_IS_GLOBALLY_CONFIGURED`, the
# configuration was set by `mlflow.autolog`, and so we can safely call `autolog_fn`
# with `autologging_params`.
# - if the config doesn't contain this key, the configuration was set by an
# `mlflow.integration.autolog` call, so we should not call `autolog_fn` with
# new configs.
prev_config = AUTOLOGGING_INTEGRATIONS.get(autolog_fn.integration_name)
if prev_config and not prev_config.get(CONF_KEY_IS_GLOBALLY_CONFIGURED, False):
if prev_config and not prev_config.get(
AUTOLOGGING_CONF_KEY_IS_GLOBALLY_CONFIGURED, False
):
return

autologging_params = get_autologging_params(autolog_fn)
autolog_fn(**autologging_params)
AUTOLOGGING_INTEGRATIONS[autolog_fn.integration_name][
CONF_KEY_IS_GLOBALLY_CONFIGURED
AUTOLOGGING_CONF_KEY_IS_GLOBALLY_CONFIGURED
] = True
if not autologging_is_disabled(
autolog_fn.integration_name
Expand Down
6 changes: 6 additions & 0 deletions mlflow/utils/autologging_utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,12 @@
# Flag indicating whether autologging is globally disabled for all integrations.
_AUTOLOGGING_GLOBALLY_DISABLED = False

# Autologging config key indicating whether or not a particular autologging integration
# was configured (i.e. its various `log_models`, `disable`, etc. configuration options
# were set) via a call to `mlflow.autolog()`, rather than via a call to the integration-specific
# autologging method (e.g., `mlflow.tensorflow.autolog()`, ...)
AUTOLOGGING_CONF_KEY_IS_GLOBALLY_CONFIGURED = "globally_configured"

# Dict mapping integration name to its config.
AUTOLOGGING_INTEGRATIONS = {}

Expand Down
Loading

0 comments on commit 6822c55

Please sign in to comment.