Migrate AI gateway (#10420)

Signed-off-by: harupy <17039389+harupy@users.noreply.github.com> Signed-off-by: Harutaka Kawamura <hkawamura0130@gmail.com> Signed-off-by: dbczumar <corey.zumar@databricks.com> Signed-off-by: Corey Zumar <39497902+dbczumar@users.noreply.github.com> Signed-off-by: mlflow-automation <mlflow-automation@users.noreply.github.com> Signed-off-by: B-Step62 <yuki.watanabe@databricks.com> Signed-off-by: Sunish Sheth <sunishsheth2009@gmail.com> Signed-off-by: Daniel Lok <daniel.lok@databricks.com> Signed-off-by: Prithvi Kannan <prithvi.kannan@databricks.com> Co-authored-by: Corey Zumar <39497902+dbczumar@users.noreply.github.com> Co-authored-by: mlflow-automation <mlflow-automation@users.noreply.github.com> Co-authored-by: Yuki Watanabe <31463517+B-Step62@users.noreply.github.com> Co-authored-by: Sunish Sheth <sunishsheth2009@gmail.com> Co-authored-by: Prithvi Kannan <46332835+prithvikannan@users.noreply.github.com> Co-authored-by: Ben Wilson <39283302+BenWilson2@users.noreply.github.com> Co-authored-by: Daniel Lok <daniel.lok@databricks.com> Co-authored-by: Prithvi Kannan <prithvi.kannan@databricks.com>
mlflow · Dec 5, 2023 · 2ef3a13 · 2ef3a13
1 parent 66616de
commit 2ef3a13
Show file tree

Hide file tree

Showing 149 changed files with 7,570 additions and 2,225 deletions.
diff --git a/.github/ISSUE_TEMPLATE/bug_report_template.yaml b/.github/ISSUE_TEMPLATE/bug_report_template.yaml
@@ -201,12 +201,12 @@ body:
           required: false
         - label: "`area/build`: Build and test infrastructure for MLflow"
           required: false
+        - label: "`area/deployments`: MLflow Deployments client APIs, server, and third-party Deployments integrations"
+          required: false
         - label: "`area/docs`: MLflow documentation pages"
           required: false
         - label: "`area/examples`: Example code"
           required: false
-        - label: "`area/gateway`: AI Gateway service, Gateway client APIs, third-party Gateway integrations"
-          required: false
         - label: "`area/model-registry`: Model Registry service, APIs, and the fluent client calls for Model Registry"
           required: false
         - label: "`area/models`: MLmodel format, model serialization/deserialization, flavors"

diff --git a/.github/ISSUE_TEMPLATE/feature_request_template.yaml b/.github/ISSUE_TEMPLATE/feature_request_template.yaml
@@ -62,12 +62,12 @@ body:
           required: false
         - label: "`area/build`: Build and test infrastructure for MLflow"
           required: false
+        - label: "`area/deployments`: MLflow Deployments client APIs, server, and third-party Deployments integrations"
+          required: false
         - label: "`area/docs`: MLflow documentation pages"
           required: false
         - label: "`area/examples`: Example code"
           required: false
-        - label: "`area/gateway`: AI Gateway service, Gateway client APIs, third-party Gateway integrations"
-          required: false
         - label: "`area/model-registry`: Model Registry service, APIs, and the fluent client calls for Model Registry"
           required: false
         - label: "`area/models`: MLmodel format, model serialization/deserialization, flavors"

diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md
@@ -38,9 +38,9 @@ Components
 
 - [ ] `area/artifacts`: Artifact stores and artifact logging
 - [ ] `area/build`: Build and test infrastructure for MLflow
+- [ ] `area/deployments`: MLflow Deployments client APIs, server, and third-party Deployments integrations
 - [ ] `area/docs`: MLflow documentation pages
 - [ ] `area/examples`: Example code
-- [ ] `area/gateway`: AI Gateway service, Gateway client APIs, third-party Gateway integrations
 - [ ] `area/model-registry`: Model Registry service, APIs, and the fluent client calls for Model Registry
 - [ ] `area/models`: MLmodel format, model serialization/deserialization, flavors
 - [ ] `area/recipes`: Recipes, Recipe APIs, Recipe configs, Recipe Templates

diff --git a/.github/workflows/deployments.yml b/.github/workflows/deployments.yml
@@ -0,0 +1,37 @@
+name: Deployments
+
+on:
+  pull_request:
+  push:
+    branches:
+      - master
+      - branch-[0-9]+.[0-9]+
+
+permissions:
+  contents: read
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event_name }}-${{ github.ref }}
+  cancel-in-progress: true
+
+defaults:
+  run:
+    shell: bash --noprofile --norc -exo pipefail {0}
+
+jobs:
+  deployments:
+    if: github.event_name != 'pull_request' || github.event.pull_request.draft == false
+    runs-on: ubuntu-latest
+    timeout-minutes: 30
+    steps:
+      - uses: actions/checkout@v3
+      - uses: ./.github/actions/untracked
+      - uses: ./.github/actions/setup-python
+      - name: Install dependencies
+        run: |
+          pip install --no-dependencies tests/resources/mlflow-test-plugin
+          pip install .[gateway] \
+            pytest pytest-timeout pytest-asyncio httpx psutil sentence-transformers transformers
+      - name: Run tests
+        run: |
+          pytest tests/deployments
diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml
@@ -86,7 +86,7 @@ jobs:
           source dev/setup-ssh.sh
           pytest --splits=${{ matrix.splits }} --group=${{ matrix.group }} --quiet --requires-ssh \
             --ignore-flavors --ignore=tests/examples --ignore=tests/recipes --ignore=tests/evaluate \
-            tests
+            --ignore tests/deployments/server tests
 
   database:
     if: github.event_name != 'pull_request' || github.event.pull_request.draft == false
@@ -344,7 +344,7 @@ jobs:
           export PATH=$PATH:$HADOOP_HOME/bin
           # Run Windows tests
           pytest --splits=${{ matrix.splits }} --group=${{ matrix.group }} \
-            --ignore-flavors --ignore=tests/projects --ignore=tests/examples --ignore=tests/recipes --ignore=tests/evaluate \
+            --ignore-flavors --ignore=tests/projects --ignore=tests/examples --ignore=tests/recipes --ignore=tests/evaluate --ignore tests/deployments/server \
             tests
           # MLeap is incompatible on Windows with PySpark3.4 release. 
           # Reinstate tests when MLeap has released a fix. [ML-30491]

diff --git a/dev/mlflow-typo.sh b/dev/mlflow-typo.sh
@@ -1,6 +1,6 @@
 #!/usr/bin/env bash
 
-if grep -nP '\bM(lf|LF|lF)low\b' "$@"; then
+if grep -nP '(?<!import\s)\bM(lf|LF|lF)low\b(?!\()' "$@"; then
     exit 1
 else
     exit 0

diff --git a/dev/server.py b/dev/server.py
@@ -22,8 +22,8 @@ def main():
             sys.executable,
             "-m",
             "mlflow",
-            "gateway",
-            "start",
+            "deployments",
+            "start-server",
             "--config-path",
             "examples/gateway/openai/config.yaml",
             "--host",
@@ -41,7 +41,7 @@ def main():
         ],
         env={
             **os.environ,
-            "MLFLOW_GATEWAY_URI": f"http://{gateway_host}:{gateway_port}",
+            "MLFLOW_DEPLOYMENTS_TARGET": f"http://{gateway_host}:{gateway_port}",
         },
     ) as server, subprocess.Popen(
         [

diff --git a/docs/gateway_api_docs.py b/docs/gateway_api_docs.py
@@ -21,7 +21,7 @@
       rel="shortcut icon"
       href="../_static/favicon.ico"
     />
-    <title>MLflow Gateway API - Swagger UI</title>
+    <title>MLflow Deployments Server - Swagger UI</title>
   </head>
   <body>
     <div id="swagger-ui"></div>
@@ -80,7 +80,7 @@ def main():
         config_path.write_text(config)
 
         app = create_app_from_path(config_path)
-        docs_build = Path("build/html/llms/gateway")
+        docs_build = Path("build/html/llms/deployments")
         docs_build.mkdir(parents=True, exist_ok=True)
         with docs_build.joinpath("openapi.json").open("w") as f:
             json.dump(app.openapi(), f)

diff --git a/docs/source/_static/images/prompt_modal_1.png b/docs/source/_static/images/prompt_modal_1.png
diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -343,6 +343,7 @@
     ("py:class", "plotly.graph_objects.Figure"),
     ("py:class", "PIL.Image.Image"),
     ("py:class", "mlflow.deployments.base.BaseDeploymentClient"),
+    ("py:class", "mlflow.deployments.server.config.Endpoint"),
     ("py:class", "mlflow.types.schema.DataType"),
     ("py:class", "mlflow.types.schema.ColSpec"),
     ("py:class", "mlflow.types.schema.TensorSpec"),

diff --git a/docs/source/index.rst b/docs/source/index.rst
@@ -73,7 +73,7 @@ Getting Started Guides and Quickstarts
 `LLMs <llms/index.html>`_
 -------------------------
 
-Explore the comprehensive LLM-focused native support in MLflow. From **MLflow AI Gateway** to the **Prompt Engineering UI** and native LLM-focused MLflow flavors like 
+Explore the comprehensive LLM-focused native support in MLflow. From **MLflow Deployments for LLMs** to the **Prompt Engineering UI** and native LLM-focused MLflow flavors like 
 **open-ai**, **transformers**, and **sentence-transformers**, the tutorials and guides here will help to get you started in leveraging the 
 benefits of these powerful natural language deep learning models.  
 You'll learn how MLflow simplifies both using LLMs and developing solutions that leverage LLMs. Important tasks such as prompt development, evaluation of prompts, comparison of  
@@ -100,12 +100,12 @@ LLM Guides and Tutorials
                 </a>
             </div>
             <div class="simple-card">
-                <a href="llms/gateway/index.html" >
+                <a href="llms/deployments/index.html" >
                     <div class="header">
-                        Guide for the MLflow AI Gateway
+                        Guide for the MLflow Deployments for LLMs
                     </div>
                     <p>
-                        Learn how to configure, setup, deploy, and use the MLflow AI Gateway for testing and production use cases of both 
+                        Learn how to configure, setup, deploy, and use the MLflow Deployments for testing and production use cases of both 
                         SaaS and custom open-source LLMs.
                     </p>
                 </a>

diff --git a/docs/source/introduction/index.rst b/docs/source/introduction/index.rst
@@ -27,7 +27,7 @@ foundational components:
 
 * :ref:`Model Registry <registry>`: A systematic approach to model management, the Model Registry assists in handling different versions of models, discerning their current state, and ensuring smooth productionization. It offers a centralized model store, APIs, and UI to collaboratively manage an MLflow Model's full lifecycle, including model lineage, versioning, aliasing, tagging, and annotations.
 
-* :ref:`AI Gateway <gateway>`: This server, equipped with a set of standardized APIs, streamlines access to both SaaS and OSS LLM models. It serves as a unified interface, bolstering security through authenticated access, and offers a common set of APIs for prominent LLMs.
+* :ref:`MLflow Deployments for LLMs <deployments>`: This server, equipped with a set of standardized APIs, streamlines access to both SaaS and OSS LLM models. It serves as a unified interface, bolstering security through authenticated access, and offers a common set of APIs for prominent LLMs.
 
 * :ref:`Evaluate <model-evaluation>`: Designed for in-depth model analysis, this set of tools facilitates objective model comparison, be it traditional ML algorithms or cutting-edge LLMs.
 
@@ -59,7 +59,7 @@ MLflow addresses these challenges by offering a unified platform tailored for th
 
 - **Traceability**: With tools like the Tracking Server, every experiment is logged, ensuring that teams can trace back and understand the evolution of models.
 
-- **Consistency**: Be it accessing models through the AI Gateway or structuring projects with MLflow Recipes, MLflow promotes a consistent approach, reducing both the learning curve and potential errors.
+- **Consistency**: Be it accessing models through the MLflow Deployments for LLMs or structuring projects with MLflow Recipes, MLflow promotes a consistent approach, reducing both the learning curve and potential errors.
 
 - **Flexibility**: MLflow's library-agnostic design ensures compatibility with a wide range of machine learning libraries. It offers comprehensive support across different programming languages, backed by a robust :ref:`rest-api`, :ref:`CLI<cli>`, and APIs for :ref:`python-api`, :ref:`R-api`, and :ref:`java_api`.
 

diff --git a/docs/source/llms/deployments/guides/index.rst b/docs/source/llms/deployments/guides/index.rst
@@ -0,0 +1,26 @@
+Getting Started with MLflow Deployments for LLMs
+================================================
+
+MLflow provides a robust framework for deploying and managing machine learning models. In this tutorial, we will explore how to set up an
+MLflow Deployments Server tailored for OpenAI's models, allowing seamless integration and querying of OpenAI's powerful language models.
+
+What's in this tutorial?
+
+This guide will cover:
+
+- **Installation**: Setting up the necessary dependencies and tools to get your MLflow Deployments Server up and running.
+
+- **Configuration**: How to expose your OpenAI token, configure the deployments server, and define routes for various OpenAI models.
+
+- **Starting the deployments server**: Launching the deployments server and ensuring it's operational.
+
+- **Querying the deployments server**: Interacting with the deployments server using fluent APIs to query various OpenAI models, including completions, chat, and embeddings.
+
+By the end of this tutorial, you'll have a fully functional MLflow Deployments Server tailored for OpenAI, ready to handle and process requests.
+You'll also gain insights into querying different types of routes, providers, and models through the deployments server.
+
+.. toctree::
+    :maxdepth: 1
+
+    Setting Up the MLflow Deployments Server <step1-create-deployments>
+    Querying the MLflow Deployments Server <step2-query-deployments>
diff --git a/docs/source/llms/deployments/guides/step1-create-deployments.rst b/docs/source/llms/deployments/guides/step1-create-deployments.rst
@@ -0,0 +1,99 @@
+Configuring and Starting the Deployments Server
+===============================================
+
+Step 1: Install
+---------------
+First, install MLflow along with the ``genai`` extras to get access to a range of serving-related
+dependencies, including ``uvicorn`` and ``fastapi``. Note that direct dependencies on OpenAI are
+unnecessary, as all supported providers are abstracted from the developer.
+
+.. code-section::
+
+    .. code-block:: bash
+        :name: install-genai
+
+        pip install 'mlflow[genai]'
+
+Step 2: Set the OpenAI Token as an Environment Variable
+-------------------------------------------------------
+Next, set the OpenAI API key as an environment variable in your CLI.
+
+This approach allows the MLflow Deployments Server to read the sensitive API key safely, reducing the risk
+of leaking the token in code. The Deployments Server, when started, will read the value set by this environment
+variable without any additional action required.
+
+.. code-section::
+
+    .. code-block:: bash
+        :name: token
+
+        export OPENAI_API_KEY=your_api_key_here
+
+Step 3: Configure the Deployments Server
+----------------------------------------
+Third, set up several routes for the Deployments Server to host. The configuration of the Deployments Server is done through
+editing a YAML file that is read by the server initialization command (covered in step 4).
+
+Notably, the Deployments Server allows real-time updates to an active server through the YAML configuration;
+service restart is not required for changes to take effect and can instead be done simply by editing the
+configuration file that is defined at server start, permitting dynamic route creation without downtime of the service.
+
+.. code-section::
+
+    .. code-block:: yaml
+        :name: server-config
+
+        endpoints:
+        - name: completions
+            endpoint_type: llm/v1/completions
+            model:
+                provider: openai
+                name: gpt-3.5-turbo
+                config:
+                    openai_api_key: $OPENAI_API_KEY
+
+        - name: chat
+            endpoint_type: llm/v1/chat
+            model:
+                provider: openai
+                name: gpt-4
+                config:
+                    openai_api_key: $OPENAI_API_KEY
+
+        - name: chat_3.5
+            endpoint_type: llm/v1/chat
+            model:
+                provider: openai
+                name: gpt-3.5-turbo
+                config:
+                    openai_api_key: $OPENAI_API_KEY
+
+        - name: embeddings
+            endpoint_type: llm/v1/embeddings
+            model:
+                provider: openai
+                name: text-embedding-ada-002
+                config:
+                    openai_api_key: $OPENAI_API_KEY
+
+
+Step 4: Start the Server
+-------------------------
+Fourth, let's test the deployments server!
+
+To launch the deployments server using a YAML config file, use the deployments CLI command.
+
+The deployments server will automatically start on ``localhost`` at port ``5000``, accessible via
+the URL: ``http://localhost:5000``. To modify these default settings, use the
+``mlflow deployments start-server --help`` command to view additional configuration options.
+
+.. code-section::
+
+    .. code-block:: bash
+        :name: start-server
+
+        mlflow deployments start-server --config-path config.yaml
+
+.. note::
+    MLflow Deployments Server automatically creates API docs. You can validate your deployment server
+    is running by viewing the docs. Go to `http://{host}:{port}` in your web browser.