# Prepare the environment for the notebook

In [None]:
# sklearn, joblib, s3fs already come with the notebook image
# %pip install sklearn joblib s3fs

In [None]:
INFERENCE_SERVICE_API_KEY = "" # If not known, ask the cluster administrator for the API Key that is used to access the deployed InferenceServices.
if not INFERENCE_SERVICE_API_KEY:
    raise RuntimeError("Please provide the API Key that will be used to test the deployed InferenceService")

# Create a small model to be deployed as InferenceService

In [None]:
from sklearn import svm, datasets
from joblib import dump

In [None]:
# Create a small model with iris dataset
iris = datasets.load_iris()
clf = svm.SVC(gamma='scale')
clf.fit(iris.data, iris.target)
dump(clf, 'model.joblib')
print("Iris model file model.joblib created!")

# Push the created model to s3 storage (MinIO)

In [None]:
import os, s3fs # for uploading the created model to minio

In [None]:
# The notebook is already setup with minio credentials for the bucket that start with <namespace>-data
with open("/var/run/secrets/kubernetes.io/serviceaccount/namespace", "r") as namespace_file:
    namespace = namespace_file.read()
s3_bucket = f"{namespace}-data"
s3_model_path = f"{s3_bucket}/minimal-kserve-example"
print(f"The created model will be uploaded to s3://{s3_model_path}")

In [None]:
# Steps for uploading the created model to MinIO
# Get the required values from the environment that is set during notebook creation
aws_access_key_id = os.getenv("AWS_ACCESS_KEY_ID", "")
aws_secret_access_key = os.getenv("AWS_SECRET_ACCESS_KEY", "")
s3_endpoint = os.getenv("S3_ENDPOINT", "minio.minio")
s3_use_https = os.getenv("S3_USE_HTTPS", "0") == "1"
s3_verify_ssl = os.getenv("S3_VERIFY_SSL", "0") == "1"
s3_endpoint = f"http{'s' if s3_use_https else ''}://{s3_endpoint}"
if not aws_access_key_id or not aws_secret_access_key:
    raise ValueError("AWS credentials not found in environment variables.")

# Upload the model to MinIO
s3 = s3fs.S3FileSystem(endpoint_url=s3_endpoint, key=aws_access_key_id, secret=aws_secret_access_key, use_ssl=s3_verify_ssl)
s3.put("model.joblib", f"{s3_model_path}/model.joblib")
# List the bucket content to see if upload was successful
s3.ls(s3_model_path)

# Create the InferenceService manifest that will use the uploaded model and deploy it to the cluster

In [None]:
# Create the manifest for the kserve InferenceService
inference_service_name = "kserve-minio-test"
inference_service_manifest= \
f"""
apiVersion: serving.kserve.io/v1beta1
kind: InferenceService
metadata:
  name: {inference_service_name}
  namespace: {namespace}
spec:
  predictor:
    model:
      modelFormat:
        name: sklearn
      storageUri: s3://{s3_model_path}/model.joblib

"""
manifest_file_name="inferenceservice.yaml"
with open(manifest_file_name, "w") as manifest_file:
    manifest_file.write(inference_service_manifest)

In [None]:
# Use kubectl to apply the created manifest
# Jupyter notebook replaces {variable} with actual python value
!kubectl apply -f {manifest_file_name} # Apply the manifest

In [None]:
!kubectl wait inferenceservice --for=condition=ready --timeout 300s --namespace {namespace} {inference_service_name} # Wait for the kserve InferenceService to be ready.
!kubectl get inferenceservice --namespace {namespace} {inference_service_name}

# Test the deployed InferenceService with a sample request

In [None]:
# Below, we use {{ and }} to escape the curly braces in the jsonpath expression so Jupyter notebook does not try to replace them with python variables
inference_service_url = !kubectl get inferenceservice --namespace {namespace} {inference_service_name} -o jsonpath='{{.status.url}}' 
inference_service_url = inference_service_url[0] # Jupyter notebook shell command executions returns an array
print(inference_service_url)

In [None]:
# Test the deployed InferenceService.
# The deployed service is protected by an API Key.
import requests
response = requests.post(
    f"{inference_service_url}/v1/models/{inference_service_name}:predict",
    headers={
        "X-Api-Key": INFERENCE_SERVICE_API_KEY
    },
    json={"instances": [[6.8, 2.8, 4.8, 1.4], [5.1, 3.5, 1.4, 0.2]]} # an iris instance is [sepal_length, sepal_width, petal_length, petal_width]
)
print(response.json())