In [1]:
# %pip install mlflow scikit-learn

In [2]:
import digitalhub as dh

project = dh.get_or_create_project("project-mlflow-model-ci")

In [3]:
import os
directory="src"
if not os.path.exists(directory):
    os.makedirs(directory)

In [4]:
%%writefile "src/train-model.py"

from digitalhub_runtime_python import handler
from digitalhub import from_mlflow_run
import mlflow

from sklearn import datasets, svm
from sklearn.model_selection import GridSearchCV

@handler(outputs=["model"])
def train(project):
    mlflow.sklearn.autolog(log_datasets=True)

    iris = datasets.load_iris()
    parameters = {"kernel": ("linear", "rbf"), "C": [1, 10]}
    svc = svm.SVC()
    clf = GridSearchCV(svc, parameters)

    clf.fit(iris.data, iris.target)
    run_id = mlflow.last_active_run().info.run_id

    # utility to map mlflow run artifacts to model metadata
    model_params = from_mlflow_run(run_id)

    return (project.log_model(
        name="model-mlflow",
        kind="mlflow",
        **model_params
))

Writing src/train-model.py


In [5]:
train_fn = project.new_function(name="train",
                                kind="python",
                                python_version="PYTHON3_10",
                                code_src="src/train-model.py",
                                handler="train",
                                requirements=["scikit-learn", "mlflow"])

In [6]:
train_model = train_fn.run(action="job", local_execution=False)

Wait until the train model job is completed. One can view the state of run from console or using digitalhub sdk API using

In [7]:
train_model.get_state()

In [11]:
model_path=train_model.refresh().output('model').spec.path
model_path

's3://datalake/project-mlflow-model-ci/model/model-mlflow/a20577a6-58e0-45d2-b793-ac08b57c87db/'

In [12]:
func = project.new_function(name="serve_mlflowmodel",
                            kind="mlflowserve",
                            model_name="mlflow_model",
                            path=model_path + 'model/')

In [13]:
serve_run = func.run(action="serve", wait=True)

2024-10-29 12:13:06,528 - INFO - Waiting for run 0724f9ad-d6d5-4632-89cf-405b9093f608 to deploy service.
2024-10-29 12:13:11,634 - INFO - Waiting for run 0724f9ad-d6d5-4632-89cf-405b9093f608 to deploy service.
2024-10-29 12:13:11,721 - INFO - Run 0724f9ad-d6d5-4632-89cf-405b9093f608 service deployed.


Let us wait till the 'serve' is loaded compeletey

In [14]:
from sklearn import datasets

iris = datasets.load_iris()
data = iris.data[0:2].tolist()
json={
    "inputs": [
        {
        "name": "input-0",
        "shape": [-1, 4],
        "datatype": "FP64",
        "data": data
        }
    ]
}

In [15]:
json

{'inputs': [{'name': 'input-0',
   'shape': [-1, 4],
   'datatype': 'FP64',
   'data': [[5.1, 3.5, 1.4, 0.2], [4.9, 3.0, 1.4, 0.2]]}]}

In [16]:
# serve_run.invoke(model_name="mlflow_model", json=json).json()

In [17]:
service_url = serve_run.refresh().status.to_dict()["service"]["url"]
url = f"http://{service_url}/v2/models/mlflow_model/infer"
url

'http://s-mlflowserveserve-0724f9ad-d6d5-4632-89cf-405b9093f608.digitalhub-tenant1:8080/v2/models/mlflow_model/infer'

<!-- /v2/models/model/infer -->

In [18]:
from sklearn import datasets

iris = datasets.load_iris()
data = iris.data[0:2].tolist()
json={
    "inputs": [
        {
        "name": "input-0",
        "shape": [-1, 4],
        "datatype": "FP64",
        "data": data
        }
    ]
}

json

{'inputs': [{'name': 'input-0',
   'shape': [-1, 4],
   'datatype': 'FP64',
   'data': [[5.1, 3.5, 1.4, 0.2], [4.9, 3.0, 1.4, 0.2]]}]}

In [19]:
# url = 's-mlflowserveserve-66f056e7-99b9-481e-858d-d9888df35f91.digitalhub-tenant2:8080'

In [21]:
serve_run.invoke(url=url, json=json).json()
# serve_run.invoke(model_name="mlflow_model", url=url, json=json).json()

{'model_name': 'mlflow_model',
 'id': 'fbfe5593-aa84-4c51-9c09-ab93b204344e',
 'parameters': {'content_type': 'np'},
 'outputs': [{'name': 'output-1',
   'shape': [2, 1],
   'datatype': 'INT64',
   'parameters': {'content_type': 'np'},
   'data': [0, 0]}]}

In [22]:
%%writefile "src/mlflow_pipeline.py"

from digitalhub_runtime_kfp.dsl import pipeline_context

def myhandler(): 
    with pipeline_context() as pc:
        train = pc.step(name="train", function="train", action="job")
        #serve = pc.step(name="serve", function="serve_mlflowmodel", action="serve")

Writing src/mlflow_pipeline.py


In [23]:
workflow = project.new_workflow(name="pipeline_mlflow", kind="kfp", code_src= "src/mlflow_pipeline.py", handler = "myhandler")

In [24]:
workflow.run()

{'kind': 'kfp+run', 'metadata': {'project': 'project-mlflow-model-ci', 'name': 'e72fcd46-9516-4aa0-8702-d2a6895d4a32', 'created': '2024-10-29T12:14:10.694Z', 'updated': '2024-10-29T12:14:10.709Z', 'created_by': 'khurshid@fbk.eu', 'updated_by': 'khurshid@fbk.eu'}, 'spec': {'task': 'kfp+pipeline://project-mlflow-model-ci/pipeline_mlflow:6af9304c-0a11-4827-8f25-4483d8318775', 'local_execution': False, 'function': 'kfp://project-mlflow-model-ci/pipeline_mlflow:6af9304c-0a11-4827-8f25-4483d8318775', 'source': {'source': 'src/mlflow_pipeline.py', 'handler': 'myhandler', 'base64': 'CmZyb20gZGlnaXRhbGh1Yl9ydW50aW1lX2tmcC5kc2wgaW1wb3J0IHBpcGVsaW5lX2NvbnRleHQKCmRlZiBteWhhbmRsZXIoKTogCiAgICB3aXRoIHBpcGVsaW5lX2NvbnRleHQoKSBhcyBwYzoKICAgICAgICB0cmFpbiA9IHBjLnN0ZXAobmFtZT0idHJhaW4iLCBmdW5jdGlvbj0idHJhaW4iLCBhY3Rpb249ImpvYiIpCiAgICAgICAgI3NlcnZlID0gcGMuc3RlcChuYW1lPSJzZXJ2ZSIsIGZ1bmN0aW9uPSJzZXJ2ZV9tbGZsb3dtb2RlbCIsIGFjdGlvbj0ic2VydmUiKQo=', 'lang': 'python'}, 'inputs': {}, 'outputs': {}, 'parameters