# Deploy Model with KServe

In [3]:
from kfp.components import create_component_from_func

%load_ext lab_black

# BASE_IMAGE = "quay.io/ibm/kubeflow-notebook-image-ppc64le:latest"
BASE_IMAGE = (
    "quay.io/ibm/kubeflow-notebook-image-ppc64le:elyra3.15.0-py3.9-tf2.12.0-pt2.0.1-v2"
)


def deploy_model_with_kserve(
    project_name: str,
    model_version: int,
    explainer_type: str = None,
    kserve_version: str = "v1beta1",
    s3_bucket: str = "projects",
    storage_uri: str = None,
) -> str:
    """
    Deploys a model using KServe and Trino as backend.

            Parameters:
                    project_name: Name of the project. Must be unique for the targeted namespace and conform Kubernetes naming conventions. Example: my-model.
                    explainer_type: Type of Alibi explanation. If None, explanations are not provided. Example: AnchorTabular.
                    kserve_version: KServe API version. Example: v1beta1.
                    model_version: Version of the deployed model. Relevant to match explainer version to model version. Example: 1.
                    s3_bucket: Name of the s3 bucket in which model projects reside. Example: projects.
                    storage_uri: Optional full storage URI, overriding dynamic URI creating from project_name. Example: gs://kfserving-examples/models/sklearn/1.0/model.
            Returns:
                    endpoint: REST endpoint where the model can be queried. Example: https://my-model-user-example-com.apps.myorg.com.
    """
    from kubernetes import client, config
    from kserve import KServeClient
    from kserve import constants
    from kserve import utils
    from kserve import V1beta1AlibiExplainerSpec
    from kserve import V1beta1ExplainerSpec
    from kserve import V1beta1InferenceService
    from kserve import V1beta1InferenceServiceSpec
    from kserve import V1beta1PredictorSpec
    from kserve import V1beta1TritonSpec
    import logging
    import sys

    logging.basicConfig(
        stream=sys.stdout,
        level=logging.INFO,
        format="%(levelname)s %(asctime)s: %(message)s",
    )

    try:
        model_version = int(model_version)
    except ValueError:
        logging.warning(
            "Could not parse model version. Continuing with default value 1..."
        )
        model_version = 1

    # See: https://www.kubeflow.org/docs/external-add-ons/kserve/first_isvc_kserve/
    logging.info("Initializing environment...")
    config.load_incluster_config()
    namespace = utils.get_default_target_namespace()
    api_version = constants.KSERVE_GROUP + "/" + kserve_version
    if storage_uri is None:
        storage_uri: str = f"s3://{s3_bucket}/{project_name}"

    logging.info("Initializing inference service specification...")
    resources_spec = client.V1ResourceRequirements(
        requests={"cpu": "1000m", "memory": "8Gi"},
        limits={"cpu": "2000m", "memory": "16Gi"},
    )

    # See: https://kserve.github.io/website/master/sdk_docs/docs/V1beta1TritonSpec/
    triton_spec = V1beta1TritonSpec(
        args=["--strict-model-config=false"],
        runtime_version="22.03-py3",
        storage_uri=storage_uri,
        resources=resources_spec,
    )

    # See: https://kserve.github.io/website/master/sdk_docs/docs/V1beta1PredictorSpec/
    predictor_spec = V1beta1PredictorSpec(
        service_account_name="kserve-inference-sa", triton=triton_spec
    )

    if explainer_type:
        print("Found an explainer, which will be co-deployed.")
        # See: https://kserve.github.io/website/master/sdk_docs/docs/V1beta1AlibiExplainerSpec/
        alibi_spec = V1beta1AlibiExplainerSpec(
            type=explainer_type,
            storage_uri=f"{storage_uri}/explainer/{model_version}",  # /explainer.alibi",
            resources=resources_spec,
        )

        # See: https://kserve.github.io/website/master/sdk_docs/docs/V1beta1ExplainerSpec/
        explainer_spec = V1beta1ExplainerSpec(
            min_replicas=1,
            alibi=alibi_spec,
        )

    # See: https://kserve.github.io/website/master/sdk_docs/docs/V1beta1InferenceServiceSpec/#properties
    inference_service_spec = V1beta1InferenceService(
        api_version=api_version,
        kind=constants.KSERVE_KIND,
        metadata=client.V1ObjectMeta(
            name=project_name,
            namespace=namespace,
            annotations={"sidecar.istio.io/inject": "false"},
        ),
        spec=V1beta1InferenceServiceSpec(
            predictor=predictor_spec,
            explainer=explainer_spec if explainer_type else None,
        ),
    )

    kserve_client = KServeClient()

    logging.info("Checking for existing inference service...")
    try:
        inference_service = kserve_client.get(project_name, namespace=namespace)
        logging.info(f"Received: {inference_service}")

        if "status" in inference_service:
            logging.info("Inference service already exists.")

            logging.info("Patching inference service with new model version...")
            kserve_client.patch(project_name, inference_service_spec)
        else:
            logging.info("Creating inference service...")
            kserve_client.create(inference_service_spec)
    except Exception:
        logging.info("Creating new inference service...")
        kserve_client.create(inference_service_spec)

    logging.info("Waiting for inference service to start...")
    kserve_client.get(
        project_name, namespace=namespace, watch=True, timeout_seconds=180
    )

    logging.info("Getting inference URL...")
    inference_response = kserve_client.get(project_name, namespace=namespace)
    inference_url = inference_response["status"]["address"]["url"]
    logging.info(f"inference URL: {inference_url}")

    logging.info("Finished.")
    return inference_url


deploy_model_with_kserve_comp = create_component_from_func(
    func=deploy_model_with_kserve,
    output_component_file="component.yaml",
    base_image=BASE_IMAGE,
)

The lab_black extension is already loaded. To reload it, use:
  %reload_ext lab_black
