# ML Inference Services Deployment

**Deploy trained models as production inference services**

## **Objectives:**
1. **Deploy Models** - Convert trained models to inference services
2. **Real-time Scoring** - Single patient risk prediction endpoints
3. **Batch Inference** - Large-scale patient cohort processing
4. **Service Monitoring** - Performance and usage tracking
5. **API Integration** - REST endpoints for external systems

## **What We'll Create:**
- **Inference Services** from trained XGBoost and other models
- **Real-time Endpoints** for individual patient scoring
- **Batch Processing** pipelines for cohort analysis
- **Monitoring Tables** for service performance tracking

## Prerequisites
- Running in Snowflake Notebooks environment
- Previous notebooks completed (01, 02, 03, 03b, 04, 05, 06, 07)
- Trained models from notebook 05 (Model Training)
- Model monitoring from notebook 07 (ML Observability)
- IMPORT SNOWFLAKE-ML-PYTHON


In [None]:
# Initialize Snowflake Session for Model Serving
print("Initializing Snowflake session for model serving...")

import json
import datetime
import time
from typing import Dict, List, Any, Optional

# Import Snowpark session and functions (available in Snowflake Notebooks)
from snowflake.snowpark.context import get_active_session
from snowflake.snowpark.functions import col, lit, when, current_timestamp
from snowflake.snowpark.types import StructType, StructField, StringType, DoubleType, IntegerType, TimestampType

# Import Snowflake ML Registry for model serving
try:
    from snowflake.ml.registry import Registry
    print("Snowflake ML Registry imported successfully")
except ImportError as e:
    print(f"Failed to import Snowflake ML Registry: {e}")
    print("   Please ensure snowflake-ml-python is installed: pip install snowflake-ml-python")

# Get the active Snowflake session
session = get_active_session()

print("SUCCESS: Snowflake session initialized for model serving")

# Verify context
current_context = session.sql("""
    SELECT 
        CURRENT_DATABASE() as database,
        CURRENT_SCHEMA() as schema,
        CURRENT_WAREHOUSE() as warehouse,
        CURRENT_ROLE() as role
""").collect()[0]

print(f"   Database: {current_context['DATABASE']}")
print(f"   Schema: {current_context['SCHEMA']}")
print(f"   Warehouse: {current_context['WAREHOUSE']}")
print(f"   Role: {current_context['ROLE']}")

# Initialize Model Registry for serving
registry = Registry(session=session)
print("Model Registry initialized for serving")
print("Environment ready for ML inference deployment")
print("Ready to create native Snowflake inference services")


In [None]:
# Healthcare Risk Model Inference - Using Model Registry API (Correct Approach)
import warnings
warnings.filterwarnings('ignore', category=UserWarning)
warnings.filterwarnings('ignore', category=FutureWarning) 
warnings.filterwarnings('ignore', category=DeprecationWarning)
warnings.filterwarnings('ignore', category=RuntimeWarning)
print("Testing Healthcare Risk Model Inference using Model Registry...")

# Model configuration
PRIMARY_MODEL = "HEALTHCARE_RISK_XGBOOST_REGRESSOR"

try:
    # Step 1: Get model from registry (the proper way)
    print(f"Getting model {PRIMARY_MODEL} from Model Registry...")
    
    # Get the model version using registry
    model_version = registry.get_model(PRIMARY_MODEL).default
    print(f"Successfully retrieved model: {PRIMARY_MODEL}")
    print(f"Model version: {model_version}")
    
    # Step 3: Create test data matching the model's expected input signature
    print(f"Creating test data with correct features...")
    
    # Create test data with ALL the features the model expects
    test_data_sql = """
    SELECT 
        65 as AGE,
        5 as NUM_CONDITIONS,
        8 as NUM_MEDICATIONS,
        25 as NUM_CLAIMS,
        75.5 as MAX_MEDICATION_RISK,
        3 as HIGH_RISK_MEDICATION_COUNT,
        45.0 as WARFARIN_RISK,
        60.0 as STATIN_RISK,
        2 as BLEEDING_RISK_EVENTS,
        1 as LIVER_RISK_EVENTS,
        0 as CARDIAC_RISK_EVENTS,
        1 as HAS_CARDIOVASCULAR_DISEASE,
        0 as HAS_DIABETES,
        0 as HAS_KIDNEY_DISEASE,
        0 as HAS_LIVER_DISEASE,
        1 as HAS_HIGH_RISK_INTERACTION
    """
    
    test_data = session.sql(test_data_sql)
    print(f"Test data created with all required features")
    
    # Step 4: Run inference using the proper Model Registry API
    print(f"Running inference using Model Registry API...")
    
    try:
        # This is the correct way to run inference
        predictions = model_version.run(test_data, function_name="PREDICT")
        result_df = predictions.collect()
        
        if result_df:
            print(f"Inference successful!")
            
            # Show all columns to debug
            all_columns = result_df[0].asDict()
            print(f"   All output columns: {list(all_columns.keys())}")
            
            # Look specifically for the prediction column
            risk_score = None
            
            # First, try the exact prediction column name
            if 'PREDICTED_ADVERSE_EVENT_RISK' in all_columns:
                risk_score = all_columns['PREDICTED_ADVERSE_EVENT_RISK']
                print(f"   Found prediction: PREDICTED_ADVERSE_EVENT_RISK = {risk_score}")
            else:
                # Look for any column with 'RISK' or 'PREDICTION' in the name
                for col_name, value in all_columns.items():
                    if 'risk' in col_name.lower() and 'predicted' in col_name.lower():
                        risk_score = value
                        print(f"   Found prediction column: {col_name} = {risk_score}")
                        break
                    elif 'prediction' in col_name.lower() or 'output' in col_name.lower():
                        risk_score = value
                        print(f"   Found prediction column: {col_name} = {risk_score}")
                        break
                
                if risk_score is None:
                    print(f"   Could not find prediction column, showing all values:")
                    for col_name, value in all_columns.items():
                        print(f"      {col_name}: {value}")
                    # Use the last column (usually the prediction)
                    risk_score = list(all_columns.values())[-1]
                    print(f"   Using last column as prediction: {risk_score}")
                
            result = [(risk_score,)]  # Format for consistency with old code
        else:
            raise Exception("No prediction results returned")
            
    except Exception as predict_error:
        print(f"Model Registry prediction failed: {predict_error}")
        result = None
    
    if result and result[0][0] is not None:
        risk_score = result[0][0]
        print(f"Model inference working! Sample risk score: {risk_score}")
        print(f"Using official Snowflake pattern: model!PREDICT() with CTE")
    else:
        raise Exception("Model prediction returned no result")
    
    print(f"\\nHEALTHCARE RISK MODEL INFERENCE READY!")
    print(f"   Model: {PRIMARY_MODEL}")
    print(f"   Usage Pattern: WITH input AS (...) SELECT {PRIMARY_MODEL}!PREDICT(input)")
    print(f"   Following official Snowflake Model Registry documentation")
    print(f"   Ready for testing in Cell 3!")
        
except Exception as e:
    print(f"Error testing model inference: {e}")
    print("   Please ensure the healthcare risk model is trained and available")


In [None]:
# Test Healthcare Risk Model Predictions - Using Model Registry API
print("Testing healthcare risk predictions with multiple patients using Model Registry...")

# Test patients with correct feature profiles (matching model signature)
test_patients = [
    {
        "name": "Low Risk Patient", 
        "AGE": 35, "NUM_CONDITIONS": 2, "NUM_MEDICATIONS": 3, "NUM_CLAIMS": 8,
        "MAX_MEDICATION_RISK": 25.0, "HIGH_RISK_MEDICATION_COUNT": 1,
        "WARFARIN_RISK": 10.0, "STATIN_RISK": 15.0,
        "BLEEDING_RISK_EVENTS": 0, "LIVER_RISK_EVENTS": 0, "CARDIAC_RISK_EVENTS": 0,
        "HAS_CARDIOVASCULAR_DISEASE": 0, "HAS_DIABETES": 0, "HAS_KIDNEY_DISEASE": 0, "HAS_LIVER_DISEASE": 0, "HAS_HIGH_RISK_INTERACTION": 0
    },
    {
        "name": "Medium Risk Patient", 
        "AGE": 55, "NUM_CONDITIONS": 5, "NUM_MEDICATIONS": 8, "NUM_CLAIMS": 25,
        "MAX_MEDICATION_RISK": 65.0, "HIGH_RISK_MEDICATION_COUNT": 2,
        "WARFARIN_RISK": 35.0, "STATIN_RISK": 45.0,
        "BLEEDING_RISK_EVENTS": 1, "LIVER_RISK_EVENTS": 0, "CARDIAC_RISK_EVENTS": 1,
        "HAS_CARDIOVASCULAR_DISEASE": 1, "HAS_DIABETES": 0, "HAS_KIDNEY_DISEASE": 0, "HAS_LIVER_DISEASE": 0, "HAS_HIGH_RISK_INTERACTION": 1
    },
    {
        "name": "High Risk Patient", 
        "AGE": 78, "NUM_CONDITIONS": 12, "NUM_MEDICATIONS": 15, "NUM_CLAIMS": 45,
        "MAX_MEDICATION_RISK": 120.0, "HIGH_RISK_MEDICATION_COUNT": 5,
        "WARFARIN_RISK": 85.0, "STATIN_RISK": 95.0,
        "BLEEDING_RISK_EVENTS": 3, "LIVER_RISK_EVENTS": 2, "CARDIAC_RISK_EVENTS": 2,
        "HAS_CARDIOVASCULAR_DISEASE": 1, "HAS_DIABETES": 1, "HAS_KIDNEY_DISEASE": 1, "HAS_LIVER_DISEASE": 0, "HAS_HIGH_RISK_INTERACTION": 1
    }
]

print(f"\\nRunning predictions for {len(test_patients)} patients...")

# Get model once (reuse from Cell 2)
try:
    model_version = registry.get_model(PRIMARY_MODEL).default
    print(f"Model ready for batch predictions")
except Exception as model_error:
    print(f"Could not get model: {model_error}")
    model_version = None

successful_predictions = 0

for i, patient in enumerate(test_patients, 1):
    try:
        print(f"\\nPatient {i} - {patient['name']}:")
        print(f"   Age: {patient['AGE']}, Conditions: {patient['NUM_CONDITIONS']}, Medications: {patient['NUM_MEDICATIONS']}")
        
        if model_version is None:
            print(f"   Model not available")
            continue
            
        # Create patient data with all required features
        patient_data_sql = f"""
        SELECT 
            {patient['AGE']} as AGE,
            {patient['NUM_CONDITIONS']} as NUM_CONDITIONS,
            {patient['NUM_MEDICATIONS']} as NUM_MEDICATIONS,
            {patient['NUM_CLAIMS']} as NUM_CLAIMS,
            {patient['MAX_MEDICATION_RISK']} as MAX_MEDICATION_RISK,
            {patient['HIGH_RISK_MEDICATION_COUNT']} as HIGH_RISK_MEDICATION_COUNT,
            {patient['WARFARIN_RISK']} as WARFARIN_RISK,
            {patient['STATIN_RISK']} as STATIN_RISK,
            {patient['BLEEDING_RISK_EVENTS']} as BLEEDING_RISK_EVENTS,
            {patient['LIVER_RISK_EVENTS']} as LIVER_RISK_EVENTS,
            {patient['CARDIAC_RISK_EVENTS']} as CARDIAC_RISK_EVENTS,
            {patient['HAS_CARDIOVASCULAR_DISEASE']} as HAS_CARDIOVASCULAR_DISEASE,
            {patient['HAS_DIABETES']} as HAS_DIABETES,
            {patient['HAS_KIDNEY_DISEASE']} as HAS_KIDNEY_DISEASE,
            {patient['HAS_LIVER_DISEASE']} as HAS_LIVER_DISEASE,
            {patient['HAS_HIGH_RISK_INTERACTION']} as HAS_HIGH_RISK_INTERACTION
        """
        
        patient_df = session.sql(patient_data_sql)
        
        # Run prediction using Model Registry API
        start_time = time.time()
        predictions = model_version.run(patient_df, function_name="PREDICT")
        result_df = predictions.collect()
        response_time = (time.time() - start_time) * 1000
        
        if result_df:
            # Extract the prediction value from the result
            result_dict = result_df[0].asDict()
            
            # Look specifically for the prediction column
            risk_score = None
            
            # First, try the exact prediction column name
            if 'PREDICTED_ADVERSE_EVENT_RISK' in result_dict:
                risk_score = result_dict['PREDICTED_ADVERSE_EVENT_RISK']
                print(f"   Risk Score: {risk_score:.3f} ({response_time:.1f}ms)")
                successful_predictions += 1
            else:
                # Look for any column with 'RISK' and 'PREDICTED' in the name
                for col_name, value in result_dict.items():
                    if 'risk' in col_name.lower() and 'predicted' in col_name.lower():
                        risk_score = value
                        print(f"   Risk Score ({col_name}): {risk_score:.3f} ({response_time:.1f}ms)")
                        successful_predictions += 1
                        break
                    elif 'prediction' in col_name.lower() or 'output' in col_name.lower():
                        risk_score = value
                        print(f"   Risk Score ({col_name}): {risk_score:.3f} ({response_time:.1f}ms)")
                        successful_predictions += 1
                        break
                
                if risk_score is None:
                    print(f"   Available columns: {list(result_dict.keys())}")
                    # Show all values to debug what we're getting
                    for col_name, value in result_dict.items():
                        print(f"      {col_name}: {value}")
                    print(f"   Could not find prediction column")
        else:
            print(f"   No prediction returned")
            
    except Exception as e:
        print(f"   Prediction failed: {e}")