prokube · tibrk · Mar 12, 2026 · Mar 12, 2026 · Mar 13, 2026
diff --git a/serving/mlflow-kserve-inference-protocols/inference_protocol_version_example.ipynb b/serving/mlflow-kserve-inference-protocols/inference_protocol_version_example.ipynb
diff --git a/serving/mlflow-kserve-inference-protocols/v1-InferenceService.yaml b/serving/mlflow-kserve-inference-protocols/v1-InferenceService.yaml
@@ -0,0 +1,12 @@
+apiVersion: "serving.kserve.io/v1beta1"
+kind: "InferenceService"
+metadata:
+  name: "v1-mobile-price-classification-inference"
+  namespace: "<workspace-name>"
+spec:
+  predictor:
+    model:
+      modelFormat:
+        name: sklearn
+      protocolVersion: v1
+      storageUri: "mlflow://models/mobile-price-svm-<username>/1"
diff --git a/serving/mlflow-kserve-inference-protocols/v1-mlflow-inference-body.json b/serving/mlflow-kserve-inference-protocols/v1-mlflow-inference-body.json
@@ -0,0 +1,37 @@
+{
+  "instances": [
+    [
+      0.362057448229793, 1.0, 0.52, 1.0, 0.7368421052631579, 0.0,
+      0.04838709677419355, 0.0, 0.9416666666666668, 0.2857142857142857, 0.8,
+      0.1153061224489796, 0.6088117489986649, 0.860502405130946, 0.5,
+      0.38888888888888884, 0.0, 0.0, 1.0, 0.0
+    ],
+    [
+      0.22712090848363398, 1.0, 0.0, 1.0, 0.21052631578947367, 1.0,
+      0.9516129032258065, 0.7777777777777778, 0.9249999999999999,
+      0.5714285714285714, 0.6000000000000001, 0.3806122448979592,
+      0.23831775700934577, 0.9724746125066808, 0.07142857142857145, 0.0,
+      0.27777777777777773, 1.0, 0.0, 0.0
+    ],
+    [
+      0.8724114896459587, 1.0, 0.9199999999999999, 0.0, 0.05263157894736842,
+      0.0, 0.4032258064516129, 0.8888888888888888, 0.8833333333333334,
+      0.2857142857142857, 0.2, 0.6479591836734695, 0.5781041388518023,
+      0.5718866916087653, 0.8571428571428571, 0.5555555555555556,
+      0.4444444444444445, 0.0, 1.0, 1.0
+    ],
+    [
+      0.6980627922511691, 0.0, 0.0, 1.0, 0.9473684210526315, 1.0,
+      0.3709677419354839, 0.4444444444444445, 0.13333333333333341, 1.0, 1.0,
+      0.15051020408163265, 0.835781041388518, 0.9719401389631213,
+      0.3571428571428571, 0.0, 0.27777777777777773, 1.0, 1.0, 0.0
+    ],
+    [
+      0.623246492985972, 0.0, 0.35999999999999993, 0.0, 0.5789473684210527, 1.0,
+      0.7580645161290323, 0.4444444444444445, 0.2333333333333334,
+      0.7142857142857142, 0.9, 0.3821428571428572, 0.20694259012016014,
+      0.4053981827899519, 0.7142857142857143, 0.4444444444444444,
+      0.27777777777777773, 1.0, 0.0, 1.0
+    ]
+  ]
+}
diff --git a/serving/mlflow-kserve-inference-protocols/v2-InferenceService.yaml b/serving/mlflow-kserve-inference-protocols/v2-InferenceService.yaml
@@ -0,0 +1,12 @@
+apiVersion: "serving.kserve.io/v1beta1"
+kind: "InferenceService"
+metadata:
+  name: "v2-mobile-price-classification-inference"
+  namespace: "<workspace-name>"
+spec:
+  predictor:
+    model:
+      modelFormat:
+        name: mlflow
+      protocolVersion: v2
+      storageUri: "mlflow://models/mobile-price-svm-<username>/1"
diff --git a/serving/mlflow-kserve-inference-protocols/v2-mlflow-inference-body.json b/serving/mlflow-kserve-inference-protocols/v2-mlflow-inference-body.json
@@ -0,0 +1,124 @@
+{
+  "inputs": [
+    {
+      "name": "battery_power",
+      "shape": [5],
+      "datatype": "FP64",
+      "data": [0.362057448229793, 0.22712090848363398, 0.8724114896459587, 0.6980627922511691, 0.623246492985972]
+    },
+    {
+      "name": "blue",
+      "shape": [5],
+      "datatype": "FP64",
+      "data": [1.0, 1.0, 1.0, 0.0, 0.0]
+    },
+    {
+      "name": "clock_speed",
+      "shape": [5],
+      "datatype": "FP64",
+      "data": [0.52, 0.0, 0.9199999999999999, 0.0, 0.35999999999999993]
+    },
+    {
+      "name": "dual_sim",
+      "shape": [5],
+      "datatype": "FP64",
+      "data": [1.0, 1.0, 0.0, 1.0, 0.0]
+    },
+    {
+      "name": "fc",
+      "shape": [5],
+      "datatype": "FP64",
+      "data": [0.7368421052631579, 0.21052631578947367, 0.05263157894736842, 0.9473684210526315, 0.5789473684210527]
+    },
+    {
+      "name": "four_g",
+      "shape": [5],
+      "datatype": "FP64",
+      "data": [0.0, 1.0, 0.0, 1.0, 1.0]
+    },
+    {
+      "name": "int_memory",
+      "shape": [5],
+      "datatype": "FP64",
+      "data": [0.04838709677419355, 0.9516129032258065, 0.4032258064516129, 0.3709677419354839, 0.7580645161290323]
+    },
+    {
+      "name": "m_dep",
+      "shape": [5],
+      "datatype": "FP64",
+      "data": [0.0, 0.7777777777777778, 0.8888888888888888, 0.4444444444444445, 0.4444444444444445]
+    },
+    {
+      "name": "mobile_wt",
+      "shape": [5],
+      "datatype": "FP64",
+      "data": [0.9416666666666668, 0.9249999999999999, 0.8833333333333334, 0.13333333333333341, 0.2333333333333334]
+    },
+    {
+      "name": "n_cores",
+      "shape": [5],
+      "datatype": "FP64",
+      "data": [0.2857142857142857, 0.5714285714285714, 0.2857142857142857, 1.0, 0.7142857142857142]
+    },
+    {
+      "name": "pc",
+      "shape": [5],
+      "datatype": "FP64",
+      "data": [0.8, 0.6000000000000001, 0.2, 1.0, 0.9]
+    },
+    {
+      "name": "px_height",
+      "shape": [5],
+      "datatype": "FP64",
+      "data": [0.1153061224489796, 0.3806122448979592, 0.6479591836734695, 0.15051020408163265, 0.3821428571428572]
+    },
+    {
+      "name": "px_width",
+      "shape": [5],
+      "datatype": "FP64",
+      "data": [0.6088117489986649, 0.23831775700934577, 0.5781041388518023, 0.835781041388518, 0.20694259012016014]
+    },
+    {
+      "name": "ram",
+      "shape": [5],
+      "datatype": "FP64",
+      "data": [0.860502405130946, 0.9724746125066808, 0.5718866916087653, 0.9719401389631213, 0.4053981827899519]
+    },
+    {
+      "name": "sc_h",
+      "shape": [5],
+      "datatype": "FP64",
+      "data": [0.5, 0.07142857142857145, 0.8571428571428571, 0.3571428571428571, 0.7142857142857143]
+    },
+    {
+      "name": "sc_w",
+      "shape": [5],
+      "datatype": "FP64",
+      "data": [0.38888888888888884, 0.0, 0.5555555555555556, 0.0, 0.4444444444444444]
+    },
+    {
+      "name": "talk_time",
+      "shape": [5],
+      "datatype": "FP64",
+      "data": [0.0, 0.27777777777777773, 0.4444444444444445, 0.27777777777777773, 0.27777777777777773]
+    },
+    {
+      "name": "three_g",
+      "shape": [5],
+      "datatype": "FP64",
+      "data": [0.0, 1.0, 0.0, 1.0, 1.0]
+    },
+    {
+      "name": "touch_screen",
+      "shape": [5],
+      "datatype": "FP64",
+      "data": [1.0, 0.0, 1.0, 1.0, 0.0]
+    },
+    {
+      "name": "wifi",
+      "shape": [5],
+      "datatype": "FP64",
+      "data": [0.0, 0.0, 1.0, 0.0, 1.0]
+    }
+  ]
+}
diff --git a/serving/mlflow-kserve-minimal/InferenceService.yaml b/serving/mlflow-kserve-minimal/InferenceService.yaml
@@ -0,0 +1,12 @@
+apiVersion: "serving.kserve.io/v1beta1"
+kind: "InferenceService"
+metadata:
+  name: "<inference-name>"
+  namespace: "<workspace-name>"
+spec:
+  predictor:
+    model:
+      modelFormat:
+        name: mlflow
+      protocolVersion: v2
+      storageUri: "mlflow://models/mobile-price-svm-<your-user>/1"
diff --git a/serving/mlflow-kserve-minimal/README.md b/serving/mlflow-kserve-minimal/README.md
@@ -0,0 +1,81 @@
+# Minimal MLflow Model Inference
+
+This directory shows how to deploy an MLflow-tracked model as a KServe
+InferenceService using the v2 inference protocol. It uses the built-in
+`mlflow` model format in KServe, so no custom container image is required.
+
+## Prerequisites
+
+- A model trained and registered in MLflow. See the
+  [mobile price classification MLflow example](../../mlflow/mobile-price-classification/)
+  for how to train and register the SVM model used here.
+- `kubectl` access to your prokube namespace. (already installed in a pk-notebook)
+- Python with the `requests` package installed (for testing, already installed in a pk-notebook)
+
+## Deploy the InferenceService
+
+1. Open `InferenceService.yaml` and replace the placeholder values:
+
+   | Placeholder | Description |
+   |---|---|
+   | `<inference-name>` | A name for your InferenceService (e.g. `mobile-price-svm`) |
+   | `<workspace-name>` | Your Kubeflow namespace / workspace |
+   | `<your-user>` | Your username, matching the model name registered in MLflow |
+
+   You can also adjust the model version number at the end of the `storageUri`
+   (e.g. `/2` refers to version 2 of the registered model).
+
+   The `storageUri` uses the `mlflow://` scheme, which tells KServe to fetch
+   the model artifact directly from the MLflow model registry.
+
+   > [!WARNING]
+   > You need to the a MLFlow ClusterStorageContainer in order to use the
+   > `mlflow://` scheme (prokube platform versions >= 1.7.0)
+
+2. Apply the manifest:
+   ```sh
+   kubectl apply -f InferenceService.yaml -n <your-namespace>
+   ```
+
+3. Wait for the InferenceService to become ready. You can check the status in
+   the Kubeflow Endpoints UI or via:
+   ```sh
+   kubectl get inferenceservice -n <your-namespace>
+   ```
+
+## Test the Deployment
+
+A test script and sample request body are provided to verify the deployment.
+
+1. Set the required environment variables (optional):
+   ```sh
+   export API_KEY=<your-api-key>
+   export INFERENCE_SERVICE_URI=<your-inference-service-url>
+   export PROTOCOL_VERSION=v2
+   ```
+   You can find the inference service URL in the Kubeflow Endpoints UI. If you
+   don't know your API key, reach out to your prokube admin.
+
+2. Run the test script:
+   ```sh
+   python test_inference_service.py \
+     --model <inference-name> \
+     --json v2-mlflow-inference-body.json
+   ```
+
+   The script sends the sample request to the deployed model and prints the
+   response.
+
+   If `API_KEY` or `INFERENCE_SERVICE_URI` are not set as environment
+   variables, the script will prompt you for them interactively.
+
+## Request Body Format
+
+The provided `v2-mlflow-inference-body.json` follows the
+[v2 inference protocol](https://kserve.github.io/website/latest/modelserving/data_plane/v2_protocol/).
+Each feature is specified as a separate input with a name, shape, datatype, and
+data array. The sample contains 5 data points across 20 features from the
+mobile price classification dataset.
+
+The test script also supports the v1 protocol. To use it, set
+`PROTOCOL_VERSION=v1` and provide a v1-formatted request body.
diff --git a/serving/mlflow-kserve-minimal/test_inference_service.py b/serving/mlflow-kserve-minimal/test_inference_service.py
@@ -0,0 +1,50 @@
+# Test the deployed InferenceService.
+# The deployed service is protected by an API Key.
+import argparse
+import json as jsonlib
+import os
+from getpass import getpass
+
+import requests
+
+parser = argparse.ArgumentParser(description="Test the deployed InferenceService.")
+parser.add_argument(
+    "--json",
+    "-j",
+    required=True,
+    help="Path to the JSON file containing the request body.",
+)
+parser.add_argument(
+    "--model",
+    "-m",
+    required=True,
+    help="Model name to target.",
+)
+args = parser.parse_args()
+
+INFERENCE_SERVICE_API_KEY = os.getenv("API_KEY")
+INFERENCE_SERVICE_URI = os.getenv("INFERENCE_SERVICE_URI")
+PROTOCOL_VERSION = os.getenv("PROTOCOL_VERSION", "v2")
+INFERENCE_SERVICE_NAME = args.model
+JSON_FILE_PATH = args.json
+
+if not INFERENCE_SERVICE_API_KEY:
+    INFERENCE_SERVICE_API_KEY = getpass(prompt="Please enter your API key: ")
+if not INFERENCE_SERVICE_URI:
+    INFERENCE_SERVICE_URI = input("Please enter the external inference URI: ")
+
+# Read the JSON body from the provided file path
+with open(JSON_FILE_PATH, "r") as f:
+    request_body = jsonlib.load(f)
+
+if PROTOCOL_VERSION == "v2":
+    url = f"{INFERENCE_SERVICE_URI}/{PROTOCOL_VERSION}/models/{INFERENCE_SERVICE_NAME}/infer"
+else:
+    url = f"{INFERENCE_SERVICE_URI}/{PROTOCOL_VERSION}/models/{INFERENCE_SERVICE_NAME}:predict"
+
+response = requests.post(
+    url,
+    headers={"X-Api-Key": INFERENCE_SERVICE_API_KEY},
+    json=request_body,
+)
+print(response.json())