mlflow · BenWilson2 · Apr 14, 2023 · Apr 5, 2023 · Apr 6, 2023 · Apr 9, 2023
diff --git a/.github/workflows/cross-version-tests.yml b/.github/workflows/cross-version-tests.yml
@@ -172,6 +172,10 @@ jobs:
           pip install --upgrade pip wheel
           pip install -e .[extras]
           pip install -r requirements/test-requirements.txt
+          if [ "${{ matrix.package }}" = "transformers" ]
+          then
+            pip install accelerate datasets
+          fi
       - name: Install ${{ matrix.package }} ${{ matrix.version }}
         env:
           CACHE_DIR: /home/runner/.cache/wheels

diff --git a/.gitignore b/.gitignore
@@ -106,3 +106,6 @@ a.py
 
 # Log file created by pre-commit hook for black
 .black.log
+
+# Pytest-monitor load testing DB file
+*.pymon
diff --git a/mlflow/deployments/__init__.py b/mlflow/deployments/__init__.py
@@ -42,7 +42,10 @@ def get_predictions(self, predictions_format="dataframe", dtype=None):
         import pandas as pd
 
         if predictions_format == "dataframe":
-            return pd.DataFrame(data=self["predictions"])
+            predictions = self["predictions"]
+            if isinstance(predictions, str):
+                return pd.DataFrame(data=[predictions])
+            return pd.DataFrame(data=predictions)
         elif predictions_format == "ndarray":
             return np.array(self["predictions"], dtype)
         else:

diff --git a/mlflow/ml-package-versions.yml b/mlflow/ml-package-versions.yml
@@ -464,6 +464,7 @@ transformers:
     minimum: "4.25.1"
     maximum: "4.27.4"
     requirements:
-      ">= 0.0.0": ["datasets", "huggingface_hub", "torch", "torchvision", "tensorflow"]
+      ">= 0.0.0":
+        ["datasets", "huggingface_hub", "torch", "torchvision", "tensorflow", "accelerate"]
     run: |
       pytest tests/transformers/test_transformers_model_export.py
diff --git a/mlflow/models/signature.py b/mlflow/models/signature.py
@@ -52,7 +52,7 @@ def to_dict(self) -> Dict[str, Any]:
         Serialize into a 'jsonable' dictionary.
 
         Input and output schema are represented as json strings. This is so that the
-        representation is compact when embedded in a MLmofel yaml file.
+        representation is compact when embedded in an MLmodel yaml file.
 
         :return: dictionary representation with input and output schema represented as json strings.
         """

diff --git a/mlflow/models/utils.py b/mlflow/models/utils.py
@@ -26,18 +26,12 @@
 except ImportError:
     HAS_SCIPY = False
 
-ModelInputExample = Union[pd.DataFrame, np.ndarray, dict, list, "csr_matrix", "csc_matrix"]
+ModelInputExample = Union[pd.DataFrame, np.ndarray, dict, list, "csr_matrix", "csc_matrix", str]
 
 PyFuncInput = Union[
-    pd.DataFrame,
-    pd.Series,
-    np.ndarray,
-    "csc_matrix",
-    "csr_matrix",
-    List[Any],
-    Dict[str, Any],
+    pd.DataFrame, pd.Series, np.ndarray, "csc_matrix", "csr_matrix", List[Any], Dict[str, Any], str
 ]
-PyFuncOutput = Union[pd.DataFrame, pd.Series, np.ndarray, list]
+PyFuncOutput = Union[pd.DataFrame, pd.Series, np.ndarray, list, str]
 
 
 class _Example:
@@ -127,6 +121,13 @@ def _handle_dataframe_input(input_ex):
             if isinstance(input_ex, dict):
                 if all(_is_scalar(x) for x in input_ex.values()):
                     input_ex = pd.DataFrame([input_ex])
+                elif all(isinstance(x, (str, list)) for x in input_ex.values()):
+                    for value in input_ex.values():
+                        if isinstance(value, list) and not all(_is_scalar(x) for x in value):
+                            raise TypeError(
+                                "List values within dictionaries must be of scalar type."
+                            )
+                    input_ex = pd.DataFrame(input_ex)
                 else:
                     raise TypeError(
                         "Data in the dictionary must be scalar or of type numpy.ndarray"
@@ -141,6 +142,8 @@ def _handle_dataframe_input(input_ex):
                     input_ex = pd.DataFrame([input_ex], columns=range(len(input_ex)))
                 else:
                     input_ex = pd.DataFrame(input_ex)
+            elif isinstance(input_ex, str):
+                input_ex = pd.DataFrame([input_ex])
             elif not isinstance(input_ex, pd.DataFrame):
                 try:
                     import pyspark.sql.dataframe
@@ -609,9 +612,16 @@ def _enforce_schema(pf_input: PyFuncInput, input_schema: Schema):
     if isinstance(pf_input, pd.Series):
         pf_input = pd.DataFrame(pf_input)
     if not input_schema.is_tensor_spec():
-        if isinstance(pf_input, (list, np.ndarray, dict, pd.Series)):
+        if isinstance(pf_input, (list, np.ndarray, dict, pd.Series, str)):
             try:
-                pf_input = pd.DataFrame(pf_input)
+                if isinstance(pf_input, dict) and all(
+                    not isinstance(value, (dict, list)) for value in pf_input.values()
+                ):
+                    pf_input = pd.DataFrame(pf_input, index=[0])
+                elif isinstance(pf_input, str):
+                    pf_input = pd.DataFrame({"inputs": pf_input}, index=[0])
+                else:
+                    pf_input = pd.DataFrame(pf_input)
             except Exception as e:
                 raise MlflowException(
                     "This model contains a column-based signature, which suggests a DataFrame"
@@ -678,6 +688,7 @@ def validate_schema(data: PyFuncInput, expected_schema: Schema) -> None:
                  - scipy.sparse.csr_matrix
                  - List[Any]
                  - Dict[str, Any]
+                 - str
     :param expected_schema: Expected :py:class:`Schema <mlflow.types.Schema>` of the input data.
     :raises: A :py:class:`mlflow.exceptions.MlflowException`. when the input data does
              not match the schema.