Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view

Large diffs are not rendered by default.

12 changes: 12 additions & 0 deletions serving/mlflow-kserve-inference-protocols/v1-InferenceService.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
apiVersion: "serving.kserve.io/v1beta1"
kind: "InferenceService"
metadata:
name: "v1-mobile-price-classification-inference"
namespace: "<workspace-name>"
spec:
predictor:
model:
modelFormat:
name: sklearn
protocolVersion: v1
storageUri: "mlflow://models/mobile-price-svm-<username>/1"
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
{
"instances": [
[
0.362057448229793, 1.0, 0.52, 1.0, 0.7368421052631579, 0.0,
0.04838709677419355, 0.0, 0.9416666666666668, 0.2857142857142857, 0.8,
0.1153061224489796, 0.6088117489986649, 0.860502405130946, 0.5,
0.38888888888888884, 0.0, 0.0, 1.0, 0.0
],
[
0.22712090848363398, 1.0, 0.0, 1.0, 0.21052631578947367, 1.0,
0.9516129032258065, 0.7777777777777778, 0.9249999999999999,
0.5714285714285714, 0.6000000000000001, 0.3806122448979592,
0.23831775700934577, 0.9724746125066808, 0.07142857142857145, 0.0,
0.27777777777777773, 1.0, 0.0, 0.0
],
[
0.8724114896459587, 1.0, 0.9199999999999999, 0.0, 0.05263157894736842,
0.0, 0.4032258064516129, 0.8888888888888888, 0.8833333333333334,
0.2857142857142857, 0.2, 0.6479591836734695, 0.5781041388518023,
0.5718866916087653, 0.8571428571428571, 0.5555555555555556,
0.4444444444444445, 0.0, 1.0, 1.0
],
[
0.6980627922511691, 0.0, 0.0, 1.0, 0.9473684210526315, 1.0,
0.3709677419354839, 0.4444444444444445, 0.13333333333333341, 1.0, 1.0,
0.15051020408163265, 0.835781041388518, 0.9719401389631213,
0.3571428571428571, 0.0, 0.27777777777777773, 1.0, 1.0, 0.0
],
[
0.623246492985972, 0.0, 0.35999999999999993, 0.0, 0.5789473684210527, 1.0,
0.7580645161290323, 0.4444444444444445, 0.2333333333333334,
0.7142857142857142, 0.9, 0.3821428571428572, 0.20694259012016014,
0.4053981827899519, 0.7142857142857143, 0.4444444444444444,
0.27777777777777773, 1.0, 0.0, 1.0
]
]
}
12 changes: 12 additions & 0 deletions serving/mlflow-kserve-inference-protocols/v2-InferenceService.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
apiVersion: "serving.kserve.io/v1beta1"
kind: "InferenceService"
metadata:
name: "v2-mobile-price-classification-inference"
namespace: "<workspace-name>"
spec:
predictor:
model:
modelFormat:
name: mlflow
protocolVersion: v2
storageUri: "mlflow://models/mobile-price-svm-<username>/1"
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
{
"inputs": [
{
"name": "battery_power",
"shape": [5],
"datatype": "FP64",
"data": [0.362057448229793, 0.22712090848363398, 0.8724114896459587, 0.6980627922511691, 0.623246492985972]
},
{
"name": "blue",
"shape": [5],
"datatype": "FP64",
"data": [1.0, 1.0, 1.0, 0.0, 0.0]
},
{
"name": "clock_speed",
"shape": [5],
"datatype": "FP64",
"data": [0.52, 0.0, 0.9199999999999999, 0.0, 0.35999999999999993]
},
{
"name": "dual_sim",
"shape": [5],
"datatype": "FP64",
"data": [1.0, 1.0, 0.0, 1.0, 0.0]
},
{
"name": "fc",
"shape": [5],
"datatype": "FP64",
"data": [0.7368421052631579, 0.21052631578947367, 0.05263157894736842, 0.9473684210526315, 0.5789473684210527]
},
{
"name": "four_g",
"shape": [5],
"datatype": "FP64",
"data": [0.0, 1.0, 0.0, 1.0, 1.0]
},
{
"name": "int_memory",
"shape": [5],
"datatype": "FP64",
"data": [0.04838709677419355, 0.9516129032258065, 0.4032258064516129, 0.3709677419354839, 0.7580645161290323]
},
{
"name": "m_dep",
"shape": [5],
"datatype": "FP64",
"data": [0.0, 0.7777777777777778, 0.8888888888888888, 0.4444444444444445, 0.4444444444444445]
},
{
"name": "mobile_wt",
"shape": [5],
"datatype": "FP64",
"data": [0.9416666666666668, 0.9249999999999999, 0.8833333333333334, 0.13333333333333341, 0.2333333333333334]
},
{
"name": "n_cores",
"shape": [5],
"datatype": "FP64",
"data": [0.2857142857142857, 0.5714285714285714, 0.2857142857142857, 1.0, 0.7142857142857142]
},
{
"name": "pc",
"shape": [5],
"datatype": "FP64",
"data": [0.8, 0.6000000000000001, 0.2, 1.0, 0.9]
},
{
"name": "px_height",
"shape": [5],
"datatype": "FP64",
"data": [0.1153061224489796, 0.3806122448979592, 0.6479591836734695, 0.15051020408163265, 0.3821428571428572]
},
{
"name": "px_width",
"shape": [5],
"datatype": "FP64",
"data": [0.6088117489986649, 0.23831775700934577, 0.5781041388518023, 0.835781041388518, 0.20694259012016014]
},
{
"name": "ram",
"shape": [5],
"datatype": "FP64",
"data": [0.860502405130946, 0.9724746125066808, 0.5718866916087653, 0.9719401389631213, 0.4053981827899519]
},
{
"name": "sc_h",
"shape": [5],
"datatype": "FP64",
"data": [0.5, 0.07142857142857145, 0.8571428571428571, 0.3571428571428571, 0.7142857142857143]
},
{
"name": "sc_w",
"shape": [5],
"datatype": "FP64",
"data": [0.38888888888888884, 0.0, 0.5555555555555556, 0.0, 0.4444444444444444]
},
{
"name": "talk_time",
"shape": [5],
"datatype": "FP64",
"data": [0.0, 0.27777777777777773, 0.4444444444444445, 0.27777777777777773, 0.27777777777777773]
},
{
"name": "three_g",
"shape": [5],
"datatype": "FP64",
"data": [0.0, 1.0, 0.0, 1.0, 1.0]
},
{
"name": "touch_screen",
"shape": [5],
"datatype": "FP64",
"data": [1.0, 0.0, 1.0, 1.0, 0.0]
},
{
"name": "wifi",
"shape": [5],
"datatype": "FP64",
"data": [0.0, 0.0, 1.0, 0.0, 1.0]
}
]
}
12 changes: 12 additions & 0 deletions serving/mlflow-kserve-minimal/InferenceService.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
apiVersion: "serving.kserve.io/v1beta1"
kind: "InferenceService"
metadata:
name: "<inference-name>"
namespace: "<workspace-name>"
spec:
predictor:
model:
modelFormat:
name: mlflow
protocolVersion: v2
storageUri: "mlflow://models/mobile-price-svm-<your-user>/1"
81 changes: 81 additions & 0 deletions serving/mlflow-kserve-minimal/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
# Minimal MLflow Model Inference

This directory shows how to deploy an MLflow-tracked model as a KServe
InferenceService using the v2 inference protocol. It uses the built-in
`mlflow` model format in KServe, so no custom container image is required.

## Prerequisites

- A model trained and registered in MLflow. See the
[mobile price classification MLflow example](../../mlflow/mobile-price-classification/)
for how to train and register the SVM model used here.
- `kubectl` access to your prokube namespace. (already installed in a pk-notebook)
- Python with the `requests` package installed (for testing, already installed in a pk-notebook)

## Deploy the InferenceService

1. Open `InferenceService.yaml` and replace the placeholder values:

| Placeholder | Description |
|---|---|
| `<inference-name>` | A name for your InferenceService (e.g. `mobile-price-svm`) |
| `<workspace-name>` | Your Kubeflow namespace / workspace |
| `<your-user>` | Your username, matching the model name registered in MLflow |

You can also adjust the model version number at the end of the `storageUri`
(e.g. `/2` refers to version 2 of the registered model).

The `storageUri` uses the `mlflow://` scheme, which tells KServe to fetch
the model artifact directly from the MLflow model registry.

> [!WARNING]
> You need to the a MLFlow ClusterStorageContainer in order to use the
> `mlflow://` scheme (prokube platform versions >= 1.7.0)

2. Apply the manifest:
```sh
kubectl apply -f InferenceService.yaml -n <your-namespace>
```

3. Wait for the InferenceService to become ready. You can check the status in
the Kubeflow Endpoints UI or via:
```sh
kubectl get inferenceservice -n <your-namespace>
```

## Test the Deployment

A test script and sample request body are provided to verify the deployment.

1. Set the required environment variables (optional):
```sh
export API_KEY=<your-api-key>
export INFERENCE_SERVICE_URI=<your-inference-service-url>
export PROTOCOL_VERSION=v2
```
You can find the inference service URL in the Kubeflow Endpoints UI. If you
don't know your API key, reach out to your prokube admin.

2. Run the test script:
```sh
python test_inference_service.py \
--model <inference-name> \
--json v2-mlflow-inference-body.json
```

The script sends the sample request to the deployed model and prints the
response.

If `API_KEY` or `INFERENCE_SERVICE_URI` are not set as environment
variables, the script will prompt you for them interactively.

## Request Body Format

The provided `v2-mlflow-inference-body.json` follows the
[v2 inference protocol](https://kserve.github.io/website/latest/modelserving/data_plane/v2_protocol/).
Each feature is specified as a separate input with a name, shape, datatype, and
data array. The sample contains 5 data points across 20 features from the
mobile price classification dataset.

The test script also supports the v1 protocol. To use it, set
`PROTOCOL_VERSION=v1` and provide a v1-formatted request body.
50 changes: 50 additions & 0 deletions serving/mlflow-kserve-minimal/test_inference_service.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
# Test the deployed InferenceService.
# The deployed service is protected by an API Key.
import argparse
import json as jsonlib
import os
from getpass import getpass

import requests

parser = argparse.ArgumentParser(description="Test the deployed InferenceService.")
parser.add_argument(
"--json",
"-j",
required=True,
help="Path to the JSON file containing the request body.",
)
parser.add_argument(
"--model",
"-m",
required=True,
help="Model name to target.",
)
args = parser.parse_args()

INFERENCE_SERVICE_API_KEY = os.getenv("API_KEY")
INFERENCE_SERVICE_URI = os.getenv("INFERENCE_SERVICE_URI")
PROTOCOL_VERSION = os.getenv("PROTOCOL_VERSION", "v2")
INFERENCE_SERVICE_NAME = args.model
JSON_FILE_PATH = args.json

if not INFERENCE_SERVICE_API_KEY:
INFERENCE_SERVICE_API_KEY = getpass(prompt="Please enter your API key: ")
if not INFERENCE_SERVICE_URI:
INFERENCE_SERVICE_URI = input("Please enter the external inference URI: ")

# Read the JSON body from the provided file path
with open(JSON_FILE_PATH, "r") as f:
request_body = jsonlib.load(f)

if PROTOCOL_VERSION == "v2":
url = f"{INFERENCE_SERVICE_URI}/{PROTOCOL_VERSION}/models/{INFERENCE_SERVICE_NAME}/infer"
else:
url = f"{INFERENCE_SERVICE_URI}/{PROTOCOL_VERSION}/models/{INFERENCE_SERVICE_NAME}:predict"

response = requests.post(
url,
headers={"X-Api-Key": INFERENCE_SERVICE_API_KEY},
json=request_body,
)
print(response.json())
Loading