In [1]:
%pip install mlflow scikit-learn==1.5.0

Collecting mlflow
  Downloading mlflow-2.15.1-py3-none-any.whl (26.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m26.3/26.3 MB[0m [31m66.6 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hCollecting scikit-learn==1.5.0
  Downloading scikit_learn-1.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (13.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.3/13.3 MB[0m [31m122.8 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Collecting mlflow-skinny==2.15.1 (from mlflow)
  Downloading mlflow_skinny-2.15.1-py3-none-any.whl (5.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.5/5.5 MB[0m [31m119.2 MB/s[0m eta [36m0:00:00[0m00:01[0m
[?25hCollecting Flask<4 (from mlflow)
  Downloading flask-3.0.3-py3-none-any.whl (101 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m101.7/101.7 kB[0m [31m15.7 MB/s[0m eta [36m0:00:00[0m
Collecting docker<8,>=4.0.0 (from mlflow)
  Downloading docke

In [1]:
import digitalhub as dh
import pandas as pd
import os

In [2]:
PROJECT = "demo-ml"
project = dh.get_or_create_project(PROJECT)

In [3]:
%%writefile train-model.py

from digitalhub_runtime_python import handler

from digitalhub_ml.entities.utils import from_mlflow_run
import mlflow

from sklearn import datasets, svm
from sklearn.model_selection import GridSearchCV

@handler()
def train(project):
    mlflow.sklearn.autolog(log_datasets=True)

    iris = datasets.load_iris()
    parameters = {"kernel": ("linear", "rbf"), "C": [1, 10]}
    svc = svm.SVC()
    clf = GridSearchCV(svc, parameters)

    clf.fit(iris.data, iris.target)
    run_id = mlflow.last_active_run().info.run_id

    # utility to map mlflow run artifacts to model metadata
    model_params = from_mlflow_run(run_id)

    project.log_model(
        name="model-mlflow",
        kind="mlflow",
        **model_params
)

Overwriting train-model.py


In [4]:
train_fn = project.new_function(
                         name="train",
                         kind="python",
                         python_version="PYTHON3_9",
                         source={"source": "train-model.py", "handler": "train"},
                         requirements=["scikit-learn==1.5.0", "mlflow==2.15.1"])

In [5]:
train_run = train_fn.run(action="job", local_execution=True)

2024-08-26 13:47:18,747 - INFO - Validating task.
2024-08-26 13:47:18,748 - INFO - Validating run.
2024-08-26 13:47:18,749 - INFO - Starting task.
2024-08-26 13:47:18,749 - INFO - Configuring execution.
2024-08-26 13:47:19,108 - INFO - Composing function arguments.
2024-08-26 13:47:19,188 - INFO - Executing run.
2024/08/26 13:47:19 INFO mlflow.utils.autologging_utils: Created MLflow autologging run with ID '57727e0ea7f147dcada593a7f8c3644c', which will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow
2024/08/26 13:47:23 INFO mlflow.sklearn.utils: Logging the 5 best runs, no runs will be omitted.
2024-08-26 13:47:23,761 - INFO - Task completed, returning run status.


In [6]:
model = project.get_model("model-mlflow")
model.spec.path

's3://datalake/demo-ml/model/model-mlflow/01462dc6-ea66-4aa1-8e07-941ddca04b26/'

In [7]:
func = project.new_function(name="serve_mlflowmodel",
                            kind="mlflowserve",
                            model_name="testmodel",
                            path=model.spec.path + 'model')
serve_run = func.run(action="serve")

In [12]:
import requests
from sklearn import datasets

SERVICE_URL = serve_run.refresh().status.to_dict()["service"]["url"]
MODEL_NAME = "testmodel"
serve_run.refresh().status.to_dict()
SERVICE_URL

's-mlflowserveserve-02ef2504-92db-489c-924f-6b7812580958.digitalhub-tenant2.svc.cluster.local:8080'

In [13]:
iris = datasets.load_iris()
test_input = iris.data[0:2].tolist()

with requests.post(f'http://{SERVICE_URL}/v2/models/{MODEL_NAME}/infer', json={
    "inputs": [
        {
        "name": "input-0",
        "shape": [2, 4],
        "datatype": "FP64",
        "data": test_input
        }
    ]
}) as r:
    res = r.json()
    print(res)

{'model_name': 'testmodel', 'id': 'd4c95b80-313f-4604-a374-17d06f2413d5', 'parameters': {'content_type': 'np'}, 'outputs': [{'name': 'output-1', 'shape': [2, 1], 'datatype': 'INT64', 'parameters': {'content_type': 'np'}, 'data': [0, 0]}]}
