mlflow · serena-ruan · Jan 30, 2024 · Jan 24, 2024 · Jan 24, 2024 · Jan 24, 2024
diff --git a/mlflow/langchain/__init__.py b/mlflow/langchain/__init__.py
@@ -319,7 +319,7 @@ def log_model(
     Args:
         lc_model: A LangChain model, which could be a
             `Chain <https://python.langchain.com/docs/modules/chains/>`_,
-            `Agent <https://python.langchain.com/docs/modules/agents/>`, or
+            `Agent <https://python.langchain.com/docs/modules/agents/>`_, or
             `retriever <https://python.langchain.com/docs/modules/data_connection/retrievers/>`_.
         artifact_path: Run-relative artifact path.
         conda_env: {{ conda_env }}

diff --git a/mlflow/langchain/retriever_chain.py b/mlflow/langchain/retriever_chain.py
@@ -30,7 +30,8 @@ class _RetrieverChain(Chain):
 
     See :ref:`log-retriever-chain` for how to log the ``_RetrieverChain``.
 
-    :param retriever: The retriever to wrap.
+    Args:
+        retriever: The retriever to wrap.
     """
 
     input_key: str = "query"
@@ -65,10 +66,12 @@ def _call(
         """Run _get_docs on input query.
         Returns the retrieved documents under the key 'source_documents'.
         Example:
+
         .. code-block:: python
-        chain = _RetrieverChain(retriever=...)
-        res = chain({'query': 'This is my query'})
-        docs = res['source_documents']
+
+            chain = _RetrieverChain(retriever=...)
+            res = chain({"query": "This is my query"})
+            docs = res["source_documents"]
         """
         question = inputs[self.input_key]
         docs = self._get_docs(question)
@@ -86,11 +89,14 @@ async def _acall(
     ) -> Dict[str, Any]:
         """Run _get_docs on input query.
         Returns the retrieved documents under the key 'source_documents'.
+
         Example:
+
         .. code-block:: python
-        chain = _RetrieverChain(retriever=...)
-        res = chain({'query': 'This is my query'})
-        docs = res['source_documents']
+
+            chain = _RetrieverChain(retriever=...)
+            res = chain({"query": "This is my query"})
+            docs = res["source_documents"]
         """
         question = inputs[self.input_key]
         docs = await self._aget_docs(question)

diff --git a/mlflow/langchain/runnables.py b/mlflow/langchain/runnables.py
@@ -297,7 +297,7 @@ def _save_runnable_with_steps(steps, file_path: Union[Path, str], loader_fn=None
 
 def _save_runnable_branch(model, file_path, loader_fn, persist_dir):
     """
-    save runnable branch in to path.
+    Save runnable branch in to path.
     """
     save_path = Path(file_path) if isinstance(file_path, str) else file_path
     save_path.mkdir(parents=True, exist_ok=True)

diff --git a/mlflow/lightgbm/__init__.py b/mlflow/lightgbm/__init__.py
diff --git a/mlflow/onnx/__init__.py b/mlflow/onnx/__init__.py
diff --git a/mlflow/pmdarima/__init__.py b/mlflow/pmdarima/__init__.py
@@ -415,11 +415,13 @@ def load_model(model_uri, dst_path=None):
 
     Args:
         model_uri: The location, in URI format, of the MLflow model. For example:
+
             - ``/Users/me/path/to/local/model``
             - ``relative/path/to/local/model``
             - ``s3://my_bucket/path/to/model``
             - ``runs:/<mlflow_run_id>/run-relative/path/to/model``
             - ``mlflow-artifacts:/path/to/model``
+
             For more information about supported URI schemes, see
             `Referencing Artifacts <https://www.mlflow.org/docs/latest/tracking.html#
             artifact-locations>`_.

diff --git a/mlflow/prophet/__init__.py b/mlflow/prophet/__init__.py
@@ -321,10 +321,12 @@ def load_model(model_uri, dst_path=None):
 
     Args:
         model_uri: The location, in URI format, of the MLflow model. For example:
+
             - ``/Users/me/path/to/local/model``
             - ``relative/path/to/local/model``
             - ``s3://my_bucket/path/to/model``
             - ``runs:/<mlflow_run_id>/run-relative/path/to/model``
+
             For more information about supported URI schemes, see
             `Referencing Artifacts <https://www.mlflow.org/docs/latest/tracking.html#
             artifact-locations>`_.

diff --git a/mlflow/pyspark/ml/__init__.py b/mlflow/pyspark/ml/__init__.py
@@ -184,8 +184,9 @@ def _should_log_model(spark_model):
 
 def _get_estimator_info_tags(estimator):
     """
-    :return: A dictionary of MLflow run tag keys and values
-             describing the specified estimator.
+    Returns:
+        A dictionary of MLflow run tag keys and values
+        describing the specified estimator.
     """
     return {
         "estimator_name": estimator.__class__.__name__,
@@ -870,55 +871,57 @@ def autolog(
     .. _TrainValidationSplit:
         https://spark.apache.org/docs/latest/api/python/reference/api/pyspark.ml.tuning.TrainValidationSplit.html#pyspark.ml.tuning.TrainValidationSplit
 
-    :param log_models: If ``True``, if trained models are in allowlist, they are logged as MLflow
-                       model artifacts. If ``False``, trained models are not logged.
-                       Note: the built-in allowlist excludes some models (e.g. ALS models) which
-                       can be large. To specify a custom allowlist, create a file containing a
-                       newline-delimited list of fully-qualified estimator classnames, and set
-                       the "spark.mlflow.pysparkml.autolog.logModelAllowlistFile" Spark config
-                       to the path of your allowlist file.
-    :param log_datasets: If ``True``, dataset information is logged to MLflow Tracking.
-                         If ``False``, dataset information is not logged.
-    :param disable: If ``True``, disables the scikit-learn autologging integration. If ``False``,
-                    enables the pyspark ML autologging integration.
-    :param exclusive: If ``True``, autologged content is not logged to user-created fluent runs.
-                      If ``False``, autologged content is logged to the active fluent run,
-                      which may be user-created.
-    :param disable_for_unsupported_versions: If ``True``, disable autologging for versions of
-                      pyspark that have not been tested against this version of the MLflow
-                      client or are incompatible.
-    :param silent: If ``True``, suppress all event logs and warnings from MLflow during pyspark ML
-                   autologging. If ``False``, show all events and warnings during pyspark ML
-                   autologging.
-    :param log_post_training_metrics: If ``True``, post training metrics are logged. Defaults to
-                                      ``True``. See the `post training metrics`_ section for more
-                                      details.
-    :param registered_model_name: If given, each time a model is trained, it is registered as a
-                                  new model version of the registered model with this name.
-                                  The registered model is created if it does not already exist.
-    :param log_input_examples: If ``True``, input examples from training datasets are collected and
-                               logged along with pyspark ml model artifacts during training. If
-                               ``False``, input examples are not logged.
-    :param log_model_signatures: If ``True``,
-                                 :py:class:`ModelSignatures <mlflow.models.ModelSignature>`
-                                 describing model inputs and outputs are collected and logged along
-                                 with spark ml pipeline/estimator artifacts during training.
-                                 If ``False`` signatures are not logged.
-
-                                 .. warning::
-
-                                    Currently, only scalar Spark data types are supported. If
-                                    model inputs/outputs contain non-scalar Spark data types such
-                                    as ``pyspark.ml.linalg.Vector``, signatures are not logged.
-    :param log_model_allowlist: If given, it overrides the default log model allowlist in mlflow.
-                                This takes precedence over the spark configuration of
-                                "spark.mlflow.pysparkml.autolog.logModelAllowlistFile".
-
-    **The default log model allowlist in mlflow**
-        .. literalinclude:: ../../../mlflow/pyspark/ml/log_model_allowlist.txt
-           :language: text
-
-    :param extra_tags: A dictionary of extra tags to set on each managed run created by autologging.
+    Args:
+        log_models: If ``True``, if trained models are in allowlist, they are logged as MLflow
+            model artifacts. If ``False``, trained models are not logged.
+            Note: the built-in allowlist excludes some models (e.g. ALS models) which
+            can be large. To specify a custom allowlist, create a file containing a
+            newline-delimited list of fully-qualified estimator classnames, and set
+            the "spark.mlflow.pysparkml.autolog.logModelAllowlistFile" Spark config
+            to the path of your allowlist file.
+        log_datasets: If ``True``, dataset information is logged to MLflow Tracking.
+            If ``False``, dataset information is not logged.
+        disable: If ``True``, disables the scikit-learn autologging integration. If ``False``,
+            enables the pyspark ML autologging integration.
+        exclusive: If ``True``, autologged content is not logged to user-created fluent runs.
+            If ``False``, autologged content is logged to the active fluent run,
+            which may be user-created.
+        disable_for_unsupported_versions: If ``True``, disable autologging for versions of
+            pyspark that have not been tested against this version of the MLflow
+            client or are incompatible.
+        silent: If ``True``, suppress all event logs and warnings from MLflow during pyspark ML
+            autologging. If ``False``, show all events and warnings during pyspark ML
+            autologging.
+        log_post_training_metrics: If ``True``, post training metrics are logged. Defaults to
+            ``True``. See the `post training metrics`_ section for more
+            details.
+        registered_model_name: If given, each time a model is trained, it is registered as a
+            new model version of the registered model with this name.
+            The registered model is created if it does not already exist.
+        log_input_examples: If ``True``, input examples from training datasets are collected and
+            logged along with pyspark ml model artifacts during training. If
+            ``False``, input examples are not logged.
+        log_model_signatures: If ``True``,
+            :py:class:`ModelSignatures <mlflow.models.ModelSignature>`
+            describing model inputs and outputs are collected and logged along
+            with spark ml pipeline/estimator artifacts during training.
+            If ``False`` signatures are not logged.
+
+            .. warning::
+
+                Currently, only scalar Spark data types are supported. If
+                model inputs/outputs contain non-scalar Spark data types such
+                as ``pyspark.ml.linalg.Vector``, signatures are not logged.
+
+        log_model_allowlist: If given, it overrides the default log model allowlist in mlflow.
+            This takes precedence over the spark configuration of
+            "spark.mlflow.pysparkml.autolog.logModelAllowlistFile".
+
+            **The default log model allowlist in mlflow**
+                .. literalinclude:: ../../../mlflow/pyspark/ml/log_model_allowlist.txt
+                    :language: text
+
+        extra_tags: A dictionary of extra tags to set on each managed run created by autologging.
     """
     from pyspark.ml.base import Estimator, Model
     from pyspark.ml.evaluation import Evaluator

diff --git a/mlflow/pyspark/ml/_autolog.py b/mlflow/pyspark/ml/_autolog.py
@@ -20,8 +20,11 @@ def cast_spark_df_with_vector_to_array(input_spark_df):
     Finds columns of vector type in a spark dataframe and
     casts them to array<double> type.
 
-    :param input_spark_df:
-    :return: a spark dataframe with vector columns transformed to array<double> type
+    Args:
+        input_spark_df:
+
+    Returns:
+        A spark dataframe with vector columns transformed to array<double> type
     """
     vector_type_columns = [
         _field.name for _field in input_spark_df.schema if isinstance(_field.dataType, VectorUDT)
@@ -37,20 +40,25 @@ def _do_pipeline_transform(df: DataFrame, transformer: Union[Transformer, Pipeli
     """
     A util method that runs transform on a pipeline model/transformer
 
-    :param df:a spark dataframe
-    :return: output transformed dataframe using pipeline model/transformer
+    Args:
+        df: a spark dataframe
+
+    Returns:
+        output transformed dataframe using pipeline model/transformer
     """
     return transformer.transform(df)
 
 
 def _get_struct_type_by_cols(input_fields: Set[str], df_schema: t.StructType) -> t.StructType:
     """
+    Args:
+        input_fields: A set of input columns to be
+                    intersected with the input dataset's columns.
+        df_schema: A Spark dataframe schema to compare input_fields
 
-    :param input_fields: A set of input columns to be
-                 intersected with the input dataset's columns.
-    :param df_schema: A Spark dataframe schema to compare input_fields
-    :return:A StructType from the intersection of given columns and
-            the columns present in the training dataset
+    Returns:
+        A StructType from the intersection of given columns and
+        the columns present in the training dataset
     """
     if len(input_fields) > 0:
         return t.StructType([_field for _field in df_schema.fields if _field.name in input_fields])
@@ -67,10 +75,13 @@ def get_feature_cols(
     if `input_fields` is set to include non-feature columns those
     will be included in the return set of column names.
 
-    :param df: An input spark dataframe.
-    :param transformer: A pipeline/transformer to get the required feature columns
-    :return: A set of all the feature columns that are required
-             for the pipeline/transformer plus any initial columns passed in.
+    Args:
+        df: An input spark dataframe.
+        transformer: A pipeline/transformer to get the required feature columns
+
+    Returns:
+        A set of all the feature columns that are required
+        for the pipeline/transformer plus any initial columns passed in.
     """
     feature_cols = set()
     df_subset = df.limit(1).cache()