# KServe API Person Detection

This notebook demonstrates person detection using the YOLO model deployed on OpenShift AI via KServe REST API.

## Prerequisites:
- ONNX model deployed on OpenShift AI
- InferenceService endpoint accessible
- Network connectivity to OpenShift cluster

In [None]:
import sys
sys.path.insert(0, '..')

import cv2
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
import time

from src.api.kserve_client import KServeClient
from src.detection.visualizer import draw_detections, create_detection_summary

%matplotlib inline
plt.rcParams['figure.figsize'] = (12, 8)

## 1. Configure KServe Endpoint

Update the endpoint URL to match your OpenShift AI deployment.

In [None]:
# Configure endpoint (update with your actual endpoint)
# Get the URL with: oc get inferenceservice -n train-detection
ENDPOINT_URL = "https://train-detection-model-train-detection.apps.cluster-rk6mx.rk6mx.sandbox492.opentlc.com"

# IMPORTANT: Use Triton model name, not InferenceService name
# The InferenceService name is "train-detection-model" (from oc get inferenceservice)
# But the Triton model name is "yolo11n" (from S3 directory structure)
MODEL_NAME = "yolo11n"

# For local testing with port-forward:
# oc port-forward svc/train-detection-model-predictor 8080:8080 -n train-detection
# ENDPOINT_URL = "http://localhost:8080"

print(f"Endpoint URL: {ENDPOINT_URL}")
print(f"Model Name: {MODEL_NAME}")
print(f"\nFull inference URL: {ENDPOINT_URL}/v2/models/{MODEL_NAME}/infer")

## 2. Initialize KServe Client

In [None]:
# Initialize client
client = KServeClient(
    endpoint_url=ENDPOINT_URL,
    model_name=MODEL_NAME,
    timeout=30
)

print("KServe client initialized")

## 3. Health Check

In [None]:
# Check if endpoint is healthy
is_healthy = client.health_check()

if is_healthy:
    print("✓ Endpoint is healthy")
else:
    print("✗ Endpoint health check failed")
    print("Please verify:")
    print("  1. InferenceService is deployed and running")
    print("  2. Endpoint URL is correct")
    print("  3. Network connectivity to OpenShift cluster")

## 4. Get Model Metadata

In [None]:
# Get model metadata
metadata = client.get_metadata()

if metadata:
    print("Model Metadata:")
    print(f"  Name: {metadata.name}")
    print(f"  Platform: {metadata.platform}")
    print(f"  Inputs: {metadata.inputs}")
    print(f"  Outputs: {metadata.outputs}")
else:
    print("Could not retrieve model metadata")

## 5. Test Inference on Sample Image

In [None]:
# Download sample image
import urllib.request

sample_image_url = "https://ultralytics.com/images/bus.jpg"
sample_image_path = "sample_image.jpg"

urllib.request.urlretrieve(sample_image_url, sample_image_path)
print(f"Downloaded sample image to: {sample_image_path}")

In [None]:
# Run inference via API
result = client.predict_from_file(sample_image_path, conf_threshold=0.25)

print(f"Inference Time: {result.inference_time_ms:.2f} ms")
print(f"Model Name: {result.model_name}")
print(f"Image Shape: {result.image_shape}")
print(f"Number of persons detected: {len(result.detections)}")

# Print detection details
for i, det in enumerate(result.detections):
    print(f"Person {i+1}: confidence={det.confidence:.2f}, bbox={[round(x, 1) for x in det.bbox]}")

In [None]:
# Visualize results
image = cv2.imread(sample_image_path)
annotated_image = draw_detections(image, result.detections)
annotated_rgb = cv2.cvtColor(annotated_image, cv2.COLOR_BGR2RGB)

plt.figure(figsize=(14, 10))
plt.imshow(annotated_rgb)
plt.title(f"API Detection: {len(result.detections)} persons - {result.inference_time_ms:.2f} ms")
plt.axis('off')
plt.show()

## 6. Compare with Local Model

Compare API inference results with local YOLOv11 model.

In [None]:
from src.detection.yolo_detector import YOLODetector
from src.utils.config import Config

# Load local detector
local_detector = YOLODetector(
    model_path=str(Config.get_model_path('yolo11n.pt')),
    conf_threshold=0.25
)

# Run local inference
start_time = time.time()
local_image, local_detections = local_detector.process_image(sample_image_path)
local_time = (time.time() - start_time) * 1000

print("\nComparison:")
print(f"  API Detections: {len(result.detections)} in {result.inference_time_ms:.2f} ms")
print(f"  Local Detections: {len(local_detections)} in {local_time:.2f} ms")
print(f"  Difference: {abs(len(result.detections) - len(local_detections))} detections")
print(f"  Time Overhead: {(result.inference_time_ms - local_time):.2f} ms")

## 7. Benchmark API Performance

In [None]:
# Benchmark API inference
num_runs = 10
times = []
detection_counts = []

print(f"Running {num_runs} API inference requests...")
for i in range(num_runs):
    result = client.predict_from_file(sample_image_path)
    times.append(result.inference_time_ms)
    detection_counts.append(len(result.detections))
    print(f"  Run {i+1}: {result.inference_time_ms:.2f} ms, {len(result.detections)} detections")

print(f"\nBenchmark Results ({num_runs} runs):")
print(f"  Average Time: {np.mean(times):.2f} ms")
print(f"  Min Time: {np.min(times):.2f} ms")
print(f"  Max Time: {np.max(times):.2f} ms")
print(f"  Std Dev: {np.std(times):.2f} ms")
print(f"  Throughput: {1000/np.mean(times):.2f} requests/sec")
print(f"  Detection Consistency: {np.std(detection_counts):.2f} std dev")

## 8. Test Multiple Images

In [None]:
# Download additional test images
test_images = [
    "https://ultralytics.com/images/bus.jpg",
    "https://ultralytics.com/images/zidane.jpg"
]

for idx, img_url in enumerate(test_images):
    img_path = f"test_image_{idx}.jpg"
    urllib.request.urlretrieve(img_url, img_path)
    
    # Run inference
    result = client.predict_from_file(img_path)
    
    # Visualize
    image = cv2.imread(img_path)
    annotated = draw_detections(image, result.detections)
    annotated_rgb = cv2.cvtColor(annotated, cv2.COLOR_BGR2RGB)
    
    plt.figure(figsize=(10, 7))
    plt.imshow(annotated_rgb)
    plt.title(f"Image {idx+1}: {len(result.detections)} persons - {result.inference_time_ms:.2f} ms")
    plt.axis('off')
    plt.show()
    
    summary = create_detection_summary(result.detections)
    print(f"\nImage {idx+1} Summary: {summary}")

## Summary

This notebook demonstrated:
- Connecting to KServe model endpoint on OpenShift AI
- Health checking and metadata retrieval
- Running inference via REST API
- Comparing API vs local inference
- Performance benchmarking

The API adds network overhead but provides:
- Centralized model serving
- Auto-scaling capabilities
- Version management
- Multi-client access