## Step 1: Setup Dependencies

In [None]:
import sys
!{sys.executable} -m pip install -q scikit-learn==1.2.2 kserve==0.11.0 mlflow boto3

## Step 2: Import Libraries

In [None]:
import os
import time
import mlflow
import requests
from sklearn.datasets import make_classification
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from kserve import KServeClient, V1beta1InferenceService, V1beta1InferenceServiceSpec
from kserve import V1beta1PredictorSpec, V1beta1SKLearnSpec
from kubernetes.client import V1ResourceRequirements

## Step 3: Configure MLflow from Environment

In [None]:
# Use environment variables provided by MLflow PodDefault
mlflow.set_tracking_uri(os.environ['MLFLOW_TRACKING_URI'])

print(f"MLflow Tracking URI: {os.environ['MLFLOW_TRACKING_URI']}")
print(f"S3 Endpoint: {os.environ['MLFLOW_S3_ENDPOINT_URL']}")

## Step 4: Train and Log Model with MLflow

In [None]:
# Generate synthetic dataset
X, y = make_classification(n_samples=1000, n_features=20, n_informative=15, 
                          n_redundant=5, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train model with MLflow tracking
with mlflow.start_run(run_name='fraud-detection-demo') as run:
    # Train model
    model = RandomForestClassifier(n_estimators=100, max_depth=10, random_state=42)
    model.fit(X_train, y_train)
    
    # Evaluate
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    
    # Log to MLflow
    mlflow.log_params({
        'n_estimators': 100,
        'max_depth': 10,
        'model_type': 'RandomForest'
    })
    mlflow.log_metric('accuracy', accuracy)
    mlflow.sklearn.log_model(model, 'model')
    
    run_id = run.info.run_id
    print(f"\n✓ Model trained with accuracy: {accuracy:.4f}")
    print(f"✓ MLflow Run ID: {run_id}")

## Step 5: Get Model Storage URI

In [None]:
# Get artifact URI directly from MLflow run
mlflow_client = mlflow.tracking.MlflowClient()
run_info = mlflow_client.get_run(run_id)
storage_uri = f"{run_info.info.artifact_uri}/model"

print(f"Model Storage URI: {storage_uri}")

## Step 6: Deploy with KServe SDK

In [None]:
# Initialize KServe client
kserve_client = KServeClient()
namespace = 'kubeflow-user-example-com'
service_name = 'fraud-detection-demo'

# Define InferenceService using Python SDK
isvc = V1beta1InferenceService(
    api_version='serving.kserve.io/v1beta1',
    kind='InferenceService',
    metadata={
        'name': service_name,
        'namespace': namespace,
        'annotations': {
            'sidecar.istio.io/inject': 'true',
            'traffic.sidecar.istio.io/excludeOutboundPorts': '9000'
        }
    },
    spec=V1beta1InferenceServiceSpec(
        predictor=V1beta1PredictorSpec(
            service_account_name='kserve-sa',
            sklearn=V1beta1SKLearnSpec(
                storage_uri=storage_uri,
                resources=V1ResourceRequirements(
                    requests={'cpu': '100m', 'memory': '256Mi'},
                    limits={'cpu': '500m', 'memory': '512Mi'}
                )
            )
        )
    )
)

# Create deployment
kserve_client.create(isvc)
print(f"✓ InferenceService '{service_name}' created")

## Step 7: Wait for Deployment

In [None]:
def wait_for_ready(name, namespace, timeout=300, interval=5):
    """Wait for InferenceService to become ready."""
    start_time = time.time()
    
    while time.time() - start_time < timeout:
        try:
            isvc = kserve_client.get(name, namespace=namespace)
            conditions = isvc.get('status', {}).get('conditions', [])
            
            for condition in conditions:
                if condition.get('type') == 'Ready':
                    if condition.get('status') == 'True':
                        print(f"\n✓ InferenceService '{name}' is READY")
                        return True
                    else:
                        reason = condition.get('reason', 'Unknown')
                        print(f"  Status: {reason}", end='\r')
            
            time.sleep(interval)
        except Exception as e:
            print(f"  Waiting for service creation...", end='\r')
            time.sleep(interval)
    
    raise TimeoutError(f"Service '{name}' did not become ready within {timeout}s")

# Wait for deployment
wait_for_ready(service_name, namespace)

## Step 8: Test Inference

In [None]:
# Prepare test data
test_input = X_test[:5].tolist()

# Inference endpoint
inference_url = f"http://{service_name}-predictor.{namespace}.svc.cluster.local:80/v1/models/{service_name}:predict"

# Send prediction request
response = requests.post(
    inference_url,
    json={'instances': test_input},
    headers={'Content-Type': 'application/json'}
)

if response.status_code == 200:
    predictions = response.json()['predictions']
    print(f"✓ Predictions: {predictions}")
    print(f"✓ Actual labels: {y_test[:5].tolist()}")
else:
    print(f"✗ Error: {response.status_code}")
    print(response.text)

## Summary

This notebook demonstrated a production-ready workflow:

1. **Environment-driven config**: Using `MLFLOW_TRACKING_URI` and `MLFLOW_S3_ENDPOINT_URL`
2. **MLflow integration**: Direct artifact URI from `run.info.artifact_uri`
3. **KServe SDK**: Declarative deployment with `V1beta1InferenceService` objects
4. **Smart polling**: `wait_for_ready()` function checking actual status
5. **REST inference**: Clean API testing with proper error handling

The deployment is now serving predictions at the cluster endpoint!