# 1. Exploring the MlFlow Storage to deploy the models

In [None]:
import mlflow
from mlflow.tracking import MlflowClient

mlflow.set_tracking_uri("http://sunrise-mlflow-tracking.mlflow.svc.cluster.local:5080")
client = MlflowClient()

mv = client.get_model_version("cpu-pct-test-5", "1")   # <- pick your model/version
print(mv.source)  # e.g. s3://mlflow/27/3e0850.../artifacts/model


### Minio fro Mlflow credential

kubectl -n mlflow get secrets | grep -i minio

# Example for Bitnami chart:
kubectl -n mlflow get secret sunrise-minio -o jsonpath='{.data.root-user}' | base64 -d; echo
kubectl -n mlflow get secret sunrise-minio -o jsonpath='{.data.root-password}' | base64 -d; echo

'''

sunrise-minio                   Opaque               2      123d

admin                            user

ySO5ISk7Eq                       password


'''

# 2. KServe


### K server issue #1

Bingo — the ClusterRoleBindings point to the wrong namespace (kubeflow) for the kserve-controller-manager SA. That’s why every can-i came back no and the controller can’t sync caches.

Let’s fix the bindings so they reference the SA in kserve.

1) Recreate the two bindings with the correct subject

(This is safer than JSON-patching indexes; it idempotently overwrites them.)


**Manager -> needs cluster-wide perms**
kubectl create clusterrolebinding kserve-manager-rolebinding \
  --clusterrole=kserve-manager-role \
  --serviceaccount=kserve:kserve-controller-manager \
  -o yaml --dry-run=client | kubectl apply -f -

**Proxy -> used by kube-rbac-proxy sidecar**
kubectl create clusterrolebinding kserve-proxy-rolebinding \
  --clusterrole=kserve-proxy-role \
  --serviceaccount=kserve:kserve-controller-manager \
  -o yaml --dry-run=client | kubectl apply -f -


kubectl -n kserve rollout restart deploy/kserve-controller-manager
kubectl -n kserve get pods -w

**then:**
kubectl -n kserve logs deploy/kserve-controller-manager -c manager --tail=200


for r in \
  "services" \
  "deployments.apps" \
  "virtualservices.networking.istio.io" \
  "httproutes.gateway.networking.k8s.io" \
  "inferenceservices.serving.kserve.io" \
  "trainedmodels.serving.kserve.io" \
  "inferencegraphs.serving.kserve.io"
do
  printf "%-45s " "$r"; \
  kubectl auth can-i --as=system:serviceaccount:kserve:kserve-controller-manager list "$r" --all-namespaces
done
    
######


### #issue 2

this check the problems if it is there becasue the user "system:serviceaccount:kubeflow:kserve-controller-manager" cannot get resource "serviceaccounts" in the namespace "lstm-iqu"
* kubectl -n kubeflow logs deploy/kserve-controller-manager -c manager | grep -i credential^C
* kubectl -n kubeflow get cm inferenceservice-config -o yaml | grep -A5 credentials

note: Because the KServe controller can’t read your ServiceAccount in lstm-iqu, it fails to inject S3 env vars into the storage-initializer, so you get NoCredentialsError

The kserver cannot inject the secrets credenatial and service account in the pods

In [None]:
import torch
print(torch.__version__)

#### CODE

In [1]:
from kubernetes import client
from kserve import KServeClient
from kserve import constants
from kserve import V1beta1InferenceService, V1beta1InferenceServiceSpec, V1beta1PredictorSpec, V1beta1ModelSpec


namespace = "lstm-iqu"
name = "inference-serving-kserve-cpu"
service_account = "sa-private-mlflow"
storage_uri = "s3://mlflow/27/7b7f44e59b8f4f348ec18bb0ca7da0c4/artifacts/model"

# Important: model_format is a dict with "name" (for MLflow)
model_spec = V1beta1ModelSpec(
    model_format={"name": "mlflow"},
    storage_uri=storage_uri,
    runtime="kserve-mlserver"
)

predictor_spec = V1beta1PredictorSpec(
    service_account_name=service_account,
    model=model_spec,
)

isvc = V1beta1InferenceService(
    api_version="serving.kserve.io/v1beta1",
    kind="InferenceService",
    metadata=client.V1ObjectMeta(
        name=name,
        namespace=namespace
    ),
    spec=V1beta1InferenceServiceSpec(
        predictor=predictor_spec
    )
)

kserve_client = KServeClient()
kserve_client.create(isvc, namespace=namespace)

# Optionally, wait for readiness
kserve_client.wait_isvc_ready(name, namespace=namespace)


# Component working with a specific uri

In [None]:
@dsl.component(
    base_image="docker.io/jhofydu/kpf-kserve:V1.0.0",
    packages_to_install=["kserve==0.15.0", "kubernetes"]
)
def deploying_model( ):
    
    from kubernetes import client
    from kserve import KServeClient
    from kserve import constants
    from kserve import V1beta1InferenceService, V1beta1InferenceServiceSpec, V1beta1PredictorSpec, V1beta1ModelSpec
    
    namespace = "lstm-iqu"
    name = "inference-serving-kserve-cpu"
    service_account = "sa-private-mlflow"
    storage_uri = "s3://mlflow/27/7b7f44e59b8f4f348ec18bb0ca7da0c4/artifacts/model"
    
    # Important: model_format is a dict with "name" (for MLflow)
    model_spec = V1beta1ModelSpec(
        model_format={"name": "mlflow"},
        storage_uri=storage_uri,
        runtime="kserve-mlserver"
    )
    
    predictor_spec = V1beta1PredictorSpec(
        service_account_name=service_account,
        model=model_spec,
    )
    
    isvc = V1beta1InferenceService(
        api_version="serving.kserve.io/v1beta1",
        kind="InferenceService",
        metadata=client.V1ObjectMeta(
            name=name,
            namespace=namespace
        ),
        spec=V1beta1InferenceServiceSpec(
            predictor=predictor_spec
        )
    )
    
    kserve_client = KServeClient()
    kserve_client.create(isvc, namespace=namespace)

    # Optionally, wait for readiness
    kserve_client.wait_isvc_ready(name, namespace=namespace)


# Getting a RUNID resgistred model that is tagged as winner 

In [None]:
import mlflow
from mlflow import MlflowClient
import re
from kubernetes import client
from kserve import KServeClient
from kserve import constants
from kserve import V1beta1InferenceService, V1beta1InferenceServiceSpec, V1beta1PredictorSpec, V1beta1ModelSpec

# ===== config =====
TRACKING_URI = "http://sunrise-mlflow-tracking.mlflow.svc.cluster.local:5080"
REGISTERED_MODEL_NAME = "cpu-pct-test-5"
ALIAS = "winner"

mlflow.set_tracking_uri(TRACKING_URI)
client = MlflowClient()

try:
    # Get model version linked to alias
    model_version = client.get_model_version_by_alias(REGISTERED_MODEL_NAME, ALIAS)
    version = model_version.version
    model_uri = f"models:/{REGISTERED_MODEL_NAME}/{version}"
    print(f"Model URI: {model_uri}")

    # Try to get run ID from tags
    run_id = model_version.tags.get("run_id", None)

    # If not in tags, parse from source URI
    if not run_id and model_version.source:
        match = re.search(r"/([0-9a-f]{32})/artifacts", model_version.source)
        if match:
            run_id = match.group(1)

    if run_id:
        runs_uri = f"runs:/{run_id}/model"
        print(f"Run ID: {run_id}")
        print(f"Runs URI: {runs_uri}")
    else:
        print("⚠️  Could not extract run_id from alias 'winner'.")

except Exception as e:
    print(f"❌ Error retrieving alias '{ALIAS}' for model '{REGISTERED_MODEL_NAME}': {e}")



# Combined get the URI of the winner and deploy it

In [6]:
import mlflow
from mlflow import MlflowClient
import re
from kubernetes import client
from kserve import KServeClient
from kserve import constants
from kserve import V1beta1InferenceService, V1beta1InferenceServiceSpec, V1beta1PredictorSpec, V1beta1ModelSpec

# ===== config =====
TRACKING_URI = "http://sunrise-mlflow-tracking.mlflow.svc.cluster.local:5080"
REGISTERED_MODEL_NAME = "cpu-pct-test-5"
ALIAS = "winner"

namespace = "lstm-iqu"
name = "inference-serving-kserve-cpu"
service_account = "sa-private-mlflow"
S3_PREFIX = "s3://mlflow"

# ===== MLflow: Get model version + run ID =====
mlflow.set_tracking_uri(TRACKING_URI)
mlflow_client = MlflowClient()

try:
    model_version = mlflow_client.get_model_version_by_alias(REGISTERED_MODEL_NAME, ALIAS)
    version = model_version.version
    run_id = model_version.tags.get("run_id")

    if not run_id and model_version.source:
        match = re.search(r"/([0-9a-f]{32})/artifacts", model_version.source)
        if match:
            run_id = match.group(1)

    if not run_id:
        raise RuntimeError("❌ Could not extract run_id from alias.")

    # Fetch run info to get experiment_id
    run = mlflow_client.get_run(run_id)
    experiment_id = run.info.experiment_id

    # ✅ Correct storage URI based on experiment_id and run_id
    storage_uri = f"{S3_PREFIX}/{experiment_id}/{run_id}/artifacts/model"
    print(f"✅ Storage URI: {storage_uri}")

except Exception as e:
    raise RuntimeError(f"❌ Failed to resolve MLflow model: {e}")

# ===== KServe Deployment =====
model_spec = V1beta1ModelSpec(
    model_format={"name": "mlflow"},
    storage_uri=storage_uri,
    runtime="kserve-mlserver"
)

predictor_spec = V1beta1PredictorSpec(
    service_account_name=service_account,
    model=model_spec,
)

isvc = V1beta1InferenceService(
    api_version="serving.kserve.io/v1beta1",
    kind="InferenceService",
    metadata=client.V1ObjectMeta(
        name=name,
        namespace=namespace
    ),
    spec=V1beta1InferenceServiceSpec(
        predictor=predictor_spec
    )
)

kserve_client = KServeClient()
kserve_client.create(isvc, namespace=namespace)

# Optionally, wait for readiness
kserve_client.wait_isvc_ready(name, namespace=namespace)


✅ Storage URI: s3://mlflow/27/a4bf131840464350ad5dcfd941e7ddcf/artifacts/model
