In [1]:
%pip install mlflow scikit-learn

Note: you may need to restart the kernel to use updated packages.


In [7]:
import digitalhub as dh

project = dh.get_or_create_project("project-mlflow-model-ci")

In [8]:
import os
directory="src"
if not os.path.exists(directory):
    os.makedirs(directory)

In [9]:
%%writefile "src/train-model.py"

from digitalhub_runtime_python import handler
from digitalhub import from_mlflow_run
import mlflow

from sklearn import datasets, svm
from sklearn.model_selection import GridSearchCV

@handler(outputs=["model"])
def train(project):
    mlflow.sklearn.autolog(log_datasets=True)

    iris = datasets.load_iris()
    parameters = {"kernel": ("linear", "rbf"), "C": [1, 10]}
    svc = svm.SVC()
    clf = GridSearchCV(svc, parameters)

    clf.fit(iris.data, iris.target)
    run_id = mlflow.last_active_run().info.run_id

    # utility to map mlflow run artifacts to model metadata
    model_params = from_mlflow_run(run_id)

    return (project.log_model(
        name="model-mlflow",
        kind="mlflow",
        **model_params
))

Writing src/train-model.py


In [10]:
train_fn = project.new_function(name="train",
                                kind="python",
                                python_version="PYTHON3_10",
                                code_src="src/train-model.py",
                                handler="train",
                                requirements=["scikit-learn", "mlflow"])

In [11]:
train_model = train_fn.run(action="job", local_execution=False)

In [18]:
train_model.refresh().outputs()

{'model': {'kind': 'mlflow', 'metadata': {'project': 'project-mlflow-model-ci', 'name': 'model-mlflow', 'version': '2c1e14ec-d061-4cca-b1ed-50714b9c4324', 'created': '2024-10-28T12:59:19.732Z', 'updated': '2024-10-28T12:59:20.243Z', 'created_by': 'khurshid@fbk.eu', 'updated_by': 'khurshid@fbk.eu', 'embedded': False}, 'spec': {'path': 's3://datalake/project-mlflow-model-ci/model/model-mlflow/2c1e14ec-d061-4cca-b1ed-50714b9c4324/', 'framework': 'sklearn', 'parameters': {'n_jobs': 'None', 'refit': 'True', 'param_grid': "{'kernel': ('linear', 'rbf'), 'C': [1, 10]}", 'verbose': '0', 'pre_dispatch': '2*n_jobs', 'estimator': 'SVC()', 'return_train_score': 'False', 'cv': 'None', 'scoring': 'None', 'best_C': '1', 'best_kernel': 'linear', 'error_score': 'nan'}, 'metrics': {'training_score': 0.9933333333333333, 'training_f1_score': 0.9933326665999933, 'training_precision_score': 0.9934640522875816, 'best_cv_score': 0.9800000000000001, 'training_recall_score': 0.9933333333333333, 'training_accurac

In [22]:
model_path=train_model.output('model').spec.path
model_path

's3://datalake/project-mlflow-model-ci/model/model-mlflow/2c1e14ec-d061-4cca-b1ed-50714b9c4324/'

In [23]:
func = project.new_function(name="serve_mlflowmodel",
                            kind="mlflowserve",
                            model_name="mlflow_model",
                            path=model_path + 'model/')

In [25]:
serve_run = func.run(action="serve", wait=True)

2024-10-28 13:08:42,104 - INFO - Waiting for run 879aa69e-a95e-4fe1-8d03-77429385de51 to deploy service.
2024-10-28 13:08:47,151 - INFO - Waiting for run 879aa69e-a95e-4fe1-8d03-77429385de51 to deploy service.
2024-10-28 13:08:52,204 - INFO - Waiting for run 879aa69e-a95e-4fe1-8d03-77429385de51 to deploy service.
2024-10-28 13:08:57,258 - INFO - Waiting for run 879aa69e-a95e-4fe1-8d03-77429385de51 to deploy service.
2024-10-28 13:09:02,332 - INFO - Waiting for run 879aa69e-a95e-4fe1-8d03-77429385de51 to deploy service.
2024-10-28 13:09:07,379 - INFO - Waiting for run 879aa69e-a95e-4fe1-8d03-77429385de51 to deploy service.
2024-10-28 13:09:12,649 - INFO - Waiting for run 879aa69e-a95e-4fe1-8d03-77429385de51 to deploy service.
2024-10-28 13:09:17,709 - INFO - Waiting for run 879aa69e-a95e-4fe1-8d03-77429385de51 to deploy service.
2024-10-28 13:09:17,774 - INFO - Run 879aa69e-a95e-4fe1-8d03-77429385de51 service deployed.


Let us wait till the 'serve' is loaded compeletey

In [26]:
from sklearn import datasets

iris = datasets.load_iris()
data = iris.data[0:2].tolist()
json={
    "inputs": [
        {
        "name": "input-0",
        "shape": [-1, 4],
        "datatype": "FP64",
        "data": data
        }
    ]
}

In [27]:
json

{'inputs': [{'name': 'input-0',
   'shape': [-1, 4],
   'datatype': 'FP64',
   'data': [[5.1, 3.5, 1.4, 0.2], [4.9, 3.0, 1.4, 0.2]]}]}

In [28]:
# serve_run.invoke(model_name="mlflow_model", json=json).json()

In [29]:
service_url = serve_run.refresh().status.to_dict()["service"]["url"]
url = f"http://{service_url}/v2/models/mlflow_model/infer"
url

'http://s-mlflowserveserve-879aa69e-a95e-4fe1-8d03-77429385de51.digitalhub-tenant1:8080/v2/models/mlflow_model/infer'

<!-- /v2/models/model/infer -->

In [30]:
from sklearn import datasets

iris = datasets.load_iris()
data = iris.data[0:2].tolist()
json={
    "inputs": [
        {
        "name": "input-0",
        "shape": [-1, 4],
        "datatype": "FP64",
        "data": data
        }
    ]
}

json

{'inputs': [{'name': 'input-0',
   'shape': [-1, 4],
   'datatype': 'FP64',
   'data': [[5.1, 3.5, 1.4, 0.2], [4.9, 3.0, 1.4, 0.2]]}]}

In [31]:
# url = 's-mlflowserveserve-66f056e7-99b9-481e-858d-d9888df35f91.digitalhub-tenant2:8080'

In [32]:
serve_run.invoke(url=url, json=json).json()
# serve_run.invoke(model_name="mlflow_model", url=url, json=json).json()

{'model_name': 'mlflow_model',
 'id': '6d90360a-91e1-48d3-8cde-f088ae87db3c',
 'parameters': {'content_type': 'np'},
 'outputs': [{'name': 'output-1',
   'shape': [2, 1],
   'datatype': 'INT64',
   'parameters': {'content_type': 'np'},
   'data': [0, 0]}]}

In [39]:
%%writefile "src/mlflow_pipeline.py"

from digitalhub_runtime_kfp.dsl import pipeline_context

def myhandler(): 
    with pipeline_context() as pc:
        train = pc.step(name="train", function="train", action="job")
        #serve = pc.step(name="serve", function="serve_mlflowmodel", action="serve")

Writing src/mlflow_pipeline.py


In [40]:
workflow = project.new_workflow(name="pipeline_mlflow", kind="kfp", code_src= "src/mlflow_pipeline.py", handler = "myhandler")

In [41]:
workflow.run()

{'kind': 'kfp+run', 'metadata': {'project': 'project-mlflow-model-ci', 'name': 'fb69e2c5-490a-4b54-8212-06eeace2eed1', 'created': '2024-10-28T13:20:15.896Z', 'updated': '2024-10-28T13:20:15.908Z', 'created_by': 'khurshid@fbk.eu', 'updated_by': 'khurshid@fbk.eu'}, 'spec': {'task': 'kfp+pipeline://project-mlflow-model-ci/pipeline_mlflow:0a2a73a4-9ffe-48db-a503-04366492294a', 'local_execution': False, 'function': 'kfp://project-mlflow-model-ci/pipeline_mlflow:0a2a73a4-9ffe-48db-a503-04366492294a', 'source': {'source': 'src/mlflow_pipeline.py', 'handler': 'myhandler', 'base64': 'CmZyb20gZGlnaXRhbGh1Yl9ydW50aW1lX2tmcC5kc2wgaW1wb3J0IHBpcGVsaW5lX2NvbnRleHQKCmRlZiBteWhhbmRsZXIoKTogCiAgICB3aXRoIHBpcGVsaW5lX2NvbnRleHQoKSBhcyBwYzoKICAgICAgICB0cmFpbiA9IHBjLnN0ZXAobmFtZT0idHJhaW4iLCBmdW5jdGlvbj0idHJhaW4iLCBhY3Rpb249ImpvYiIpCiAgICAgICAgI3NlcnZlID0gcGMuc3RlcChuYW1lPSJzZXJ2ZSIsIGZ1bmN0aW9uPSJzZXJ2ZV9tbGZsb3dtb2RlbCIsIGFjdGlvbj0ic2VydmUiKQo=', 'lang': 'python'}, 'inputs': {}, 'outputs': {}, 'parameters