mlflow · harupy · Dec 5, 2023 · Nov 16, 2023 · Nov 16, 2023 · Nov 16, 2023
diff --git a/.github/ISSUE_TEMPLATE/bug_report_template.yaml b/.github/ISSUE_TEMPLATE/bug_report_template.yaml
@@ -201,12 +201,12 @@ body:
           required: false
         - label: "`area/build`: Build and test infrastructure for MLflow"
           required: false
+        - label: "`area/deployments`: MLflow Deployments client APIs, server, and third-party Deployments integrations"
+          required: false
         - label: "`area/docs`: MLflow documentation pages"
           required: false
         - label: "`area/examples`: Example code"
           required: false
-        - label: "`area/gateway`: AI Gateway service, Gateway client APIs, third-party Gateway integrations"
-          required: false
         - label: "`area/model-registry`: Model Registry service, APIs, and the fluent client calls for Model Registry"
           required: false
         - label: "`area/models`: MLmodel format, model serialization/deserialization, flavors"

diff --git a/.github/ISSUE_TEMPLATE/feature_request_template.yaml b/.github/ISSUE_TEMPLATE/feature_request_template.yaml
@@ -62,12 +62,12 @@ body:
           required: false
         - label: "`area/build`: Build and test infrastructure for MLflow"
           required: false
+        - label: "`area/deployments`: MLflow Deployments client APIs, server, and third-party Deployments integrations"
+          required: false
         - label: "`area/docs`: MLflow documentation pages"
           required: false
         - label: "`area/examples`: Example code"
           required: false
-        - label: "`area/gateway`: AI Gateway service, Gateway client APIs, third-party Gateway integrations"
-          required: false
         - label: "`area/model-registry`: Model Registry service, APIs, and the fluent client calls for Model Registry"
           required: false
         - label: "`area/models`: MLmodel format, model serialization/deserialization, flavors"

diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md
@@ -38,9 +38,9 @@ Components
 
 - [ ] `area/artifacts`: Artifact stores and artifact logging
 - [ ] `area/build`: Build and test infrastructure for MLflow
+- [ ] `area/deployments`: MLflow Deployments client APIs, server, and third-party Deployments integrations
 - [ ] `area/docs`: MLflow documentation pages
 - [ ] `area/examples`: Example code
-- [ ] `area/gateway`: AI Gateway service, Gateway client APIs, third-party Gateway integrations
 - [ ] `area/model-registry`: Model Registry service, APIs, and the fluent client calls for Model Registry
 - [ ] `area/models`: MLmodel format, model serialization/deserialization, flavors
 - [ ] `area/recipes`: Recipes, Recipe APIs, Recipe configs, Recipe Templates

diff --git a/.github/workflows/deployments.yml b/.github/workflows/deployments.yml
@@ -0,0 +1,36 @@
+name: Deployments
+
+on:
+  pull_request:
+  push:
+    branches:
+      - master
+      - branch-[0-9]+.[0-9]+
+
+permissions:
+  contents: read
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event_name }}-${{ github.ref }}
+  cancel-in-progress: true
+
+defaults:
+  run:
+    shell: bash --noprofile --norc -exo pipefail {0}
+
+jobs:
+  deployments:
+    if: github.event_name != 'pull_request' || github.event.pull_request.draft == false
+    runs-on: ubuntu-latest
+    timeout-minutes: 30
+    steps:
+      - uses: actions/checkout@v3
+      - uses: ./.github/actions/untracked
+      - uses: ./.github/actions/setup-python
+      - name: Install dependencies
+        run: |
+          pip install .[gateway] \
+            pytest pytest-timeout pytest-asyncio httpx psutil
+      - name: Run tests
+        run: |
+          pytest tests/deployments/databricks tests/deployments/mlflow
diff --git a/examples/deployments/databricks.py b/examples/deployments/databricks.py
@@ -0,0 +1,100 @@
+"""
+Usage
+-----
+databricks secrets create-scope <scope>
+databricks secrets put-secret <scope> openai-api-key --string-value $OPENAI_API_KEY
+python examples/deployments/databricks.py --secret <scope>/openai-api-key
+-----
+"""
+import argparse
+import uuid
+
+from mlflow.deployments import get_deploy_client
+
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--secret", type=str, help="Secret (e.g. secrets/scope/key)")
+    return parser.parse_args()
+
+
+def main():
+    args = parse_args()
+    client = get_deploy_client("databricks")
+    name = f"test-endpoint-{uuid.uuid4()}"
+    client.create_endpoint(
+        name=name,
+        config={
+            "served_entities": [
+                {
+                    "name": "test",
+                    "external_model": {
+                        "name": "gpt-4",
+                        "provider": "openai",
+                        "task": "llm/v1/chat",
+                        "openai_config": {
+                            "openai_api_key": "{{" + args.secret + "}}",
+                        },
+                    },
+                }
+            ],
+            "tags": [
+                {
+                    "key": "foo",
+                    "value": "bar",
+                }
+            ],
+            "rate_limits": [
+                {
+                    "key": "user",
+                    "renewal_period": "minute",
+                    "calls": 5,
+                }
+            ],
+        },
+    )
+    try:
+        client.update_endpoint(
+            endpoint=name,
+            config={
+                "served_entities": [
+                    {
+                        "name": "test",
+                        "external_model": {
+                            "name": "gpt-4",
+                            "provider": "openai",
+                            "task": "llm/v1/chat",
+                            "openai_config": {
+                                "openai_api_key": "{{" + args.secret + "}}",
+                            },
+                        },
+                    }
+                ],
+                "rate_limits": [
+                    {
+                        "key": "user",
+                        "renewal_period": "minute",
+                        "calls": 10,
+                    }
+                ],
+            },
+        )
+        print(client.list_endpoints()[:3])
+        print(client.get_endpoint(endpoint=name))
+        print(
+            client.predict(
+                endpoint=name,
+                inputs={
+                    "messages": [
+                        {"role": "user", "content": "Hello!"},
+                    ],
+                    "max_tokens": 128,
+                },
+            ),
+        )
+    finally:
+        client.delete_endpoint(endpoint=name)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/mlflow/deployments/__init__.py b/mlflow/deployments/__init__.py
@@ -16,6 +16,7 @@
 import json
 
 from mlflow.deployments.base import BaseDeploymentClient
+from mlflow.deployments.databricks import DatabricksDeploymentClient  # noqa: F401
 from mlflow.deployments.interface import get_deploy_client, run_local
 from mlflow.exceptions import MlflowException
 from mlflow.protos.databricks_pb2 import INVALID_PARAMETER_VALUE
@@ -87,4 +88,10 @@ def from_json(cls, json_str):
         return PredictionsResponse(parsed_response)
 
 
-__all__ = ["get_deploy_client", "run_local", "BaseDeploymentClient", "PredictionsResponse"]
+__all__ = [
+    "get_deploy_client",
+    "run_local",
+    "BaseDeploymentClient",
+    "DatabricksDeploymentClient",
+    "PredictionsResponse",
+]
diff --git a/mlflow/deployments/constants.py b/mlflow/deployments/constants.py
@@ -0,0 +1,21 @@
+from mlflow.environment_variables import _EnvironmentVariable
+
+# TODO: Move this to mlflow.environment_variables before merging to master
+# Specifies the timeout for deployment client APIs to declare a request has timed out
+MLFLOW_DEPLOYMENT_PREDICT_TIMEOUT = _EnvironmentVariable(
+    "MLFLOW_DEPLOYMENT_PREDICT_TIMEOUT", int, 120
+)
+
+# Abridged retryable error codes for deployments clients.
+# These are modified from the standard MLflow Tracking server retry codes for the MLflowClient to
+# remove timeouts from the list of the retryable conditions. A long-running timeout with
+# retries for the proxied providers generally indicates an issue with the underlying query or
+# the model being served having issues responding to the query due to parameter configuration.
+MLFLOW_DEPLOYMENT_CLIENT_REQUEST_RETRY_CODES = frozenset(
+    [
+        429,  # Too many requests
+        500,  # Server Error
+        502,  # Bad Gateway
+        503,  # Service Unavailable
+    ]
+)
diff --git a/mlflow/deployments/databricks/__init__.py b/mlflow/deployments/databricks/__init__.py
@@ -0,0 +1,147 @@
+import posixpath
+from typing import Any, Dict, Optional
+
+from mlflow.deployments import BaseDeploymentClient
+from mlflow.deployments.constants import (
+    MLFLOW_DEPLOYMENT_CLIENT_REQUEST_RETRY_CODES,
+    MLFLOW_DEPLOYMENT_PREDICT_TIMEOUT,
+)
+from mlflow.environment_variables import MLFLOW_HTTP_REQUEST_TIMEOUT
+from mlflow.utils import AttrDict
+from mlflow.utils.databricks_utils import get_databricks_host_creds
+from mlflow.utils.rest_utils import augmented_raise_for_status, http_request
+
+
+class DatabricksEndpoint(AttrDict):
+    pass
+
+
+class DatabricksDeploymentClient(BaseDeploymentClient):
+    """
+    TODO
+    """
+
+    def create_deployment(self, name, model_uri, flavor=None, config=None, endpoint=None):
+        """
+        .. warning::
+
+            This method is not implemented for `DatabricksDeploymentClient`.
+        """
+        raise NotImplementedError
+
+    def update_deployment(self, name, model_uri=None, flavor=None, config=None, endpoint=None):
+        """
+        .. warning::
+
+            This method is not implemented for `DatabricksDeploymentClient`.
+        """
+        raise NotImplementedError
+
+    def delete_deployment(self, name, config=None, endpoint=None):
+        """
+        .. warning::
+
+            This method is not implemented for `DatabricksDeploymentClient`.
+        """
+        raise NotImplementedError
+
+    def list_deployments(self, endpoint=None):
+        """
+        .. warning::
+
+            This method is not implemented for `DatabricksDeploymentClient`.
+        """
+        raise NotImplementedError
+
+    def get_deployment(self, name, endpoint=None):
+        """
+        .. warning::
+
+            This method is not implemented for `DatabricksDeploymentClient`.
+        """
+        raise NotImplementedError
+
+    def _call_endpoint(
+        self,
+        *,
+        method: str,
+        prefix: str = "/api/2.0",
+        route: Optional[str] = None,
+        json_body: Optional[Dict[str, Any]] = None,
+        timeout: int = MLFLOW_HTTP_REQUEST_TIMEOUT.get(),
+    ):
+        call_kwargs = {}
+        if method.lower() == "get":
+            call_kwargs["params"] = json_body
+        else:
+            call_kwargs["json"] = json_body
+
+        response = http_request(
+            host_creds=get_databricks_host_creds(self.target_uri),
+            endpoint=posixpath.join(prefix, "serving-endpoints", route or ""),
+            method=method,
+            timeout=timeout,
+            raise_on_status=False,
+            retry_codes=MLFLOW_DEPLOYMENT_CLIENT_REQUEST_RETRY_CODES,
+            **call_kwargs,
+        )
+        augmented_raise_for_status(response)
+        return DatabricksEndpoint(response.json())
+
+    def predict(self, deployment_name=None, inputs=None, endpoint=None):
+        """
+        TODO
+        """
+        return self._call_endpoint(
+            method="POST",
+            prefix="/",
+            route=posixpath.join(endpoint, "invocations"),
+            json_body=inputs,
+            timeout=MLFLOW_DEPLOYMENT_PREDICT_TIMEOUT.get(),
+        )
+
+    def create_endpoint(self, name, config=None):
+        """
+        TODO
+        """
+        config = config.copy() if config else {}  # avoid mutating config
+        extras = {}
+        for key in ("tags", "rate_limits"):
+            if tags := config.pop(key, None):
+                extras[key] = tags
+        payload = {"name": name, "config": config, **extras}
+        return self._call_endpoint(method="POST", json_body=payload)
+
+    def update_endpoint(self, endpoint, config=None):
+        """
+        TODO
+        """
+        return self._call_endpoint(
+            method="PUT", route=posixpath.join(endpoint, "config"), json_body=config
+        )
+
+    def delete_endpoint(self, endpoint):
+        """
+        TODO
+        """
+        return self._call_endpoint(method="DELETE", route=endpoint)
+
+    def list_endpoints(self):
+        """
+        TODO
+        """
+        return self._call_endpoint(method="GET").endpoints
+
+    def get_endpoint(self, endpoint):
+        """
+        TODO
+        """
+        return self._call_endpoint(method="GET", route=endpoint)
+
+
+def run_local(name, model_uri, flavor=None, config=None):
+    pass
+
+
+def target_help():
+    pass