# Deploy Model to OpenShift AI

This notebook deploys the trained model as an inference service on OpenShift AI.

**Inputs:**
- Model path from vars.txt (from notebook 2)
- Dynamic deployment YAML templates

**Outputs:**
- InferenceService deployed to OpenShift
- Model endpoint URL

In [None]:
import os
import json
import yaml
from string import Template
from kubernetes import client, config
from kubernetes.client.rest import ApiException
import time

In [None]:
# Load deployment variables from previous notebook
vars_path = 'vars.txt'
deployment_vars = {}

if os.path.exists(vars_path):
    with open(vars_path, 'r') as f:
        for line in f:
            if '=' in line:
                key, value = line.strip().split('=', 1)
                deployment_vars[key] = value
                os.environ[key] = value

print('Deployment variables loaded:')
for key, value in deployment_vars.items():
    print(f'  {key}={value}')

In [None]:
# Configuration
MODEL_NAME = os.getenv('MODEL_NAME', deployment_vars.get('model_name', 'alert-recommender'))
MODEL_VERSION = os.getenv('MODEL_VERSION', deployment_vars.get('model_version', '1.0.0'))
NAMESPACE = os.getenv('NAMESPACE', 'spending-transaction-monitor')
BUCKET = os.getenv('BUCKET', deployment_vars.get('s3_bucket', 'models'))
MODEL_PATH = deployment_vars.get('s3_model_path', 'alert-recommender/')

print(f'\nDeployment configuration:')
print(f'  Model: {MODEL_NAME}')
print(f'  Version: {MODEL_VERSION}')
print(f'  Namespace: {NAMESPACE}')
print(f'  Bucket: {BUCKET}')
print(f'  Model Path: {MODEL_PATH}')

In [None]:
# Initialize Kubernetes client
try:
    config.load_incluster_config()
    print('‚úÖ Loaded in-cluster Kubernetes config')
except:
    config.load_kube_config()
    print('‚úÖ Loaded kubeconfig from local')

api_client = client.ApiClient()
custom_api = client.CustomObjectsApi(api_client)

In [None]:
import re

def substitute_template_vars(yaml_content, variables):
    """Substitute template variables in YAML content (Jinja2-style)"""
    # Replace {{ variable }} with or without spaces using regex
    for key, value in variables.items():
        # Match {{ key }}, {{key}}, {{ key}}, {{key }}
        pattern = r'\{\{\s*' + re.escape(key) + r'\s*\}\}'
        yaml_content = re.sub(pattern, str(value), yaml_content)
    return yaml_content

def load_and_substitute_yaml(file_path, variables):
    """Load YAML file and substitute template variables"""
    with open(file_path, 'r') as f:
        yaml_content = f.read()
    
    # Substitute variables
    substituted = substitute_template_vars(yaml_content, variables)
    
    # Parse YAML
    return yaml.safe_load(substituted)

## Step 1: Deploy MinIO (if not already deployed)

In [None]:
# Check if MinIO is already deployed
v1 = client.AppsV1Api(api_client)

try:
    v1.read_namespaced_deployment('minio', NAMESPACE)
    print('‚úÖ MinIO already deployed')
    minio_exists = True
except ApiException as e:
    if e.status == 404:
        print('MinIO not found, will deploy...')
        minio_exists = False
    else:
        raise

In [None]:
if not minio_exists:
    print('\nüì¶ Deploying MinIO...')
    
    # Apply MinIO YAML with kubectl
    variables = {'namespace': NAMESPACE, 'bucket': BUCKET}
    
    !cd ../deployment && \
     sed -e "s|{{ namespace }}|{NAMESPACE}|g" -e "s|{{ bucket }}|{BUCKET}|g" minio.yaml | \
     kubectl apply -f -
    
    print('\n‚è≥ Waiting for MinIO to be ready...')
    time.sleep(30)
    
    # Wait for MinIO deployment
    max_wait = 180
    start_time = time.time()
    
    while time.time() - start_time < max_wait:
        try:
            deployment = v1.read_namespaced_deployment('minio', NAMESPACE)
            if deployment.status.ready_replicas == deployment.spec.replicas:
                print('\n‚úÖ MinIO is ready!')
                break
        except:
            pass
        time.sleep(5)
    else:
        print('\n‚ö†Ô∏è MinIO deployment timeout - check status manually')
else:
    print('Skipping MinIO deployment (already exists)')

## Step 2: Deploy Storage Config Secret

In [None]:
print('\nüîê Deploying storage-config secret...')

variables = {'namespace': NAMESPACE, 'bucket': BUCKET}
storage_config = load_and_substitute_yaml('./storage-config.yaml.template', variables)

core_v1 = client.CoreV1Api(api_client)

try:
    core_v1.create_namespaced_secret(NAMESPACE, storage_config)
    print('‚úÖ Storage config secret created')
except ApiException as e:
    if e.status == 409:
        core_v1.replace_namespaced_secret('storage-config', NAMESPACE, storage_config)
        print('‚úÖ Storage config secret updated')
    else:
        raise

## Step 3: Deploy ServingRuntime

In [None]:
print('\nüöÄ Deploying ServingRuntime...')

variables = {'namespace': NAMESPACE}
serving_runtime = load_and_substitute_yaml('./serving-runtime.yaml', variables)

print('ServingRuntime configuration:')
print(f"  Name: {serving_runtime['metadata']['name']}")
print(f"  Namespace: {NAMESPACE}")
print(f"  Image: {serving_runtime['spec']['containers'][0]['image']}")

try:
    custom_api.create_namespaced_custom_object(
        group='serving.kserve.io',
        version='v1alpha1',
        namespace=NAMESPACE,
        plural='servingruntimes',
        body=serving_runtime
    )
    print('‚úÖ ServingRuntime created')
except ApiException as e:
    if e.status == 409:
        custom_api.patch_namespaced_custom_object(
            group='serving.kserve.io',
            version='v1alpha1',
            namespace=NAMESPACE,
            plural='servingruntimes',
            name=serving_runtime['metadata']['name'],
            body=serving_runtime
        )
        print('‚úÖ ServingRuntime updated')
    else:
        raise

## Step 4: Deploy InferenceService

In [None]:
print('\nü§ñ Deploying InferenceService...')

variables = {
    'namespace': NAMESPACE,
    'bucket': BUCKET,
    'model_path': MODEL_PATH,
    'model_version': MODEL_VERSION
}

inference_service = load_and_substitute_yaml('./inference-service.yaml', variables)

print('InferenceService configuration:')
print(f"  Name: {inference_service['metadata']['name']}")
print(f"  Namespace: {NAMESPACE}")
print(f"  Model Path: s3://{BUCKET}/{MODEL_PATH}")

# Add model version annotation
if 'annotations' not in inference_service['metadata']:
    inference_service['metadata']['annotations'] = {}
inference_service['metadata']['annotations']['model-version'] = MODEL_VERSION

try:
    custom_api.create_namespaced_custom_object(
        group='serving.kserve.io',
        version='v1beta1',
        namespace=NAMESPACE,
        plural='inferenceservices',
        body=inference_service
    )
    print('‚úÖ InferenceService created')
except ApiException as e:
    if e.status == 409:
        custom_api.patch_namespaced_custom_object(
            group='serving.kserve.io',
            version='v1beta1',
            namespace=NAMESPACE,
            plural='inferenceservices',
            name=inference_service['metadata']['name'],
            body=inference_service
        )
        print('‚úÖ InferenceService updated')
    else:
        raise

## Step 5: Wait for Deployment

In [None]:
print('\n‚è≥ Waiting for InferenceService to be ready...')

max_wait = 300  # 5 minutes
start_time = time.time()
isvc_name = inference_service['metadata']['name']

while time.time() - start_time < max_wait:
    try:
        isvc = custom_api.get_namespaced_custom_object(
            group='serving.kserve.io',
            version='v1beta1',
            namespace=NAMESPACE,
            plural='inferenceservices',
            name=isvc_name
        )
        
        status = isvc.get('status', {})
        conditions = status.get('conditions', [])
        
        for condition in conditions:
            if condition['type'] == 'Ready':
                if condition['status'] == 'True':
                    print('\n‚úÖ InferenceService is ready!')
                    
                    # Get endpoint URL
                    url = status.get('url', 'N/A')
                    print(f'\nüåê Inference endpoint: {url}')
                    
                    # Save endpoint info
                    endpoint_info = {
                        'model_name': isvc_name,
                        'model_version': MODEL_VERSION,
                        'endpoint_url': url,
                        'namespace': NAMESPACE,
                        'bucket': BUCKET,
                        'model_path': MODEL_PATH,
                        'deployed_at': time.strftime('%Y-%m-%d %H:%M:%S')
                    }
                    
                    with open('endpoint_info.json', 'w') as f:
                        json.dump(endpoint_info, f, indent=2)
                    
                    print('\nüìù Deployment info saved to endpoint_info.json')
                    print('\n‚úÖ Deployment notebook completed successfully!')
                    ready = True
                    break
                else:
                    reason = condition.get('reason', 'Unknown')
                    message = condition.get('message', '')
                    print(f'  Status: {reason} - {message[:50]}...', end='\r')
        else:
            elapsed = int(time.time() - start_time)
            print(f'  Waiting for InferenceService... ({elapsed}s)', end='\r')
            time.sleep(10)
            continue
        
        if ready:
            break
            
    except ApiException as e:
        elapsed = int(time.time() - start_time)
        print(f'  Waiting for InferenceService... ({elapsed}s)', end='\r')
        time.sleep(10)
else:
    print('\n‚ö†Ô∏è  Timeout waiting for InferenceService to be ready')
    print(f'   Check status: kubectl get isvc {isvc_name} -n {NAMESPACE}')

## Step 6: Test the Deployment

In [None]:
if 'endpoint_info' in locals():
    print('\nüß™ Testing inference endpoint...')
    
    endpoint_url = endpoint_info['endpoint_url']
    health_url = f"{endpoint_url.replace('http://', 'http://')}/v2/health/ready"
    
    !curl -s {health_url}
    
    print('\n\n‚úÖ Deployment and testing complete!')