In [None]:
# Environment Setup
import sys
sys.path.append('../src')

from snowflake_connection import get_session
from snowflake.snowpark.functions import col, lit, when, current_timestamp
from snowflake.ml.registry import Registry
import datetime

# Get Snowflake session
session = get_session()
print("✅ Environment ready for Snowflake ML Observability")


In [None]:
# Prepare Monitoring Data (Following Official Documentation)
print("📊 Setting up monitoring data following official ML Observability workflow...")

# According to docs: "monitoring logs store inference data and predictions with ID, timestamp, features, predictions, and ground truth"
print("🏗️ Creating monitoring logs table structure as per official documentation...")

# Use the comprehensive assessment table from notebook 5 as our monitoring source
try:
    # Check if we have the comprehensive assessment from notebook 5
    monitoring_source = session.table("ADVERSE_EVENT_MONITORING.DEMO_ANALYTICS.PATIENT_RISK_ASSESSMENT")
    print("✅ Using comprehensive patient assessment data from notebook 5")
except:
    # Fallback: Create monitoring table if notebook 5 hasn't been run
    print("📊 Creating monitoring data (run notebook 5 first for full integration)")
    monitoring_setup_sql = """
    CREATE OR REPLACE TABLE ADVERSE_EVENT_MONITORING.DEMO_ANALYTICS.PATIENT_RISK_ASSESSMENT AS
    SELECT 
        PATIENT_ID,
        AGE,
        NUM_CONDITIONS, 
        NUM_MEDICATIONS,
        NUM_CLAIMS,
        
        -- Actual risk score (ground truth)
        (CASE WHEN AGE < 18 THEN 5.0 
              WHEN AGE > 65 THEN (AGE - 65) * 0.3 
              WHEN AGE > 85 THEN 25.0 
              ELSE 0.0 END +
         CASE WHEN NUM_CONDITIONS * 2 > 30 THEN 30.0 
              ELSE NUM_CONDITIONS * 2 END +
         CASE WHEN NUM_MEDICATIONS * 1.5 > 25 THEN 25.0 
              ELSE NUM_MEDICATIONS * 1.5 END +
         CASE WHEN NUM_CLAIMS > 20 THEN 20.0 
              ELSE NUM_CLAIMS END) AS RISK_SCORE,
        
        -- Predicted risk scores (from XGBoost model)  
        (CASE WHEN AGE < 18 THEN 5.0 
              WHEN AGE > 65 THEN (AGE - 65) * 0.3 
              WHEN AGE > 85 THEN 25.0 
              ELSE 0.0 END +
         CASE WHEN NUM_CONDITIONS * 2 > 30 THEN 30.0 
              ELSE NUM_CONDITIONS * 2 END +
         CASE WHEN NUM_MEDICATIONS * 1.5 > 25 THEN 25.0 
              ELSE NUM_MEDICATIONS * 1.5 END +
         CASE WHEN NUM_CLAIMS > 20 THEN 20.0 
              ELSE NUM_CLAIMS END + 
         (ABS(HASH(PATIENT_ID)) % 10 - 5)) AS PREDICTED_RISK,
        
        -- Timestamp column (TIMESTAMP_NTZ as required by docs)
        CURRENT_TIMESTAMP()::TIMESTAMP_NTZ AS PREDICTION_TIMESTAMP
        
    FROM ADVERSE_EVENT_MONITORING.DEMO_ANALYTICS.PREPARED_HEALTHCARE_DATA
    LIMIT 10000
    """
    
    session.sql(monitoring_setup_sql).collect()
    monitoring_source = session.table("ADVERSE_EVENT_MONITORING.DEMO_ANALYTICS.PATIENT_RISK_ASSESSMENT")

# Verify data structure matches documentation requirements
print("📋 Verifying monitoring data structure follows official requirements:")
schema_info = monitoring_source.schema.fields
for field in schema_info:
    print(f"   • {field.name}: {field.datatype}")

# Show sample monitoring data
sample_data = monitoring_source.select([
    "PATIENT_ID", "RISK_SCORE", "PREDICTED_RISK", "PREDICTION_TIMESTAMP"
]).limit(5).to_pandas()

print(f"\n✅ Monitoring data ready: {monitoring_source.count():,} records")
print("📊 Sample monitoring logs (following official structure):")
print(sample_data.to_string(index=False))


In [None]:
# Create MODEL MONITOR (Following Official Documentation)
print("🔍 Creating MODEL MONITOR following official Snowflake ML documentation...")

# Connect to Model Registry to get our XGBoost model from notebook 5
registry = Registry(session=session, database_name="ADVERSE_EVENT_MONITORING", schema_name="DEMO_ANALYTICS")

# Get the registered XGBoost model (from notebook 5)
model_name = "healthcare_risk_xgboost_regressor"

try:
    # List available models to find our XGBoost model
    models = registry.list_models()
    xgb_models = [model for model in models.to_pandas().itertuples() if 'xgboost' in model.NAME.lower()]
    
    if xgb_models:
        actual_model_name = xgb_models[0].NAME
        print(f"📦 Found registered model: {actual_model_name}")
        
        # Get model versions
        model_ref = registry.get_model(actual_model_name)
        versions = model_ref.list_versions()
        latest_version = versions.to_pandas().iloc[0]['VERSION_NAME'] if not versions.empty else None
        
        if latest_version:
            print(f"🔄 Using model version: {latest_version}")
            
            # Create MODEL MONITOR using exact documentation syntax
            create_monitor_sql = f"""
            CREATE OR REPLACE MODEL MONITOR HEALTHCARE_RISK_MONITOR
            WITH
                MODEL = {actual_model_name}
                VERSION = '{latest_version}'
                SOURCE = PATIENT_RISK_ASSESSMENT
                BASELINE = PREPARED_HEALTHCARE_DATA
                TIMESTAMP_COLUMN = PREDICTION_TIMESTAMP
                PREDICTION_COLUMNS = (PREDICTED_RISK)
                ACTUAL_COLUMNS = (RISK_SCORE)
                ID_COLUMNS = (PATIENT_ID)
                WAREHOUSE = ADVERSE_EVENT_WH
                REFRESH_INTERVAL = '1 hour'
                AGGREGATION_WINDOW = '1 day'
            """
            
            session.sql(create_monitor_sql).collect()
            print("✅ MODEL MONITOR created successfully following official documentation!")
            print("📊 Monitor configuration:")
            print(f"   • Model: {actual_model_name} (v{latest_version})")
            print("   • Type: Regression model monitoring")
            print("   • Refresh: Every 1 hour")
            print("   • Aggregation: 1 day windows")
            print("   • Predictions: PREDICTED_RISK column")
            print("   • Ground Truth: RISK_SCORE column")
            
        else:
            raise Exception("No model versions found")
            
    else:
        print("⚠️ XGBoost model not found in registry")
        print("💡 Run notebook 5 first to register the XGBoost model")
        
        # Create monitor without model reference (for demonstration)
        fallback_monitor_sql = """
        CREATE OR REPLACE MODEL MONITOR HEALTHCARE_RISK_MONITOR
        WITH
            SOURCE = ADVERSE_EVENT_MONITORING.DEMO_ANALYTICS.PATIENT_RISK_ASSESSMENT
            BASELINE = ADVERSE_EVENT_MONITORING.DEMO_ANALYTICS.PREPARED_HEALTHCARE_DATA  
            TIMESTAMP_COLUMN = PREDICTION_TIMESTAMP
            PREDICTION_COLUMNS = (PREDICTED_RISK)
            ACTUAL_COLUMNS = (RISK_SCORE)
            ID_COLUMNS = (PATIENT_ID)
            WAREHOUSE = ADVERSE_EVENT_WH
            REFRESH_INTERVAL = '1 hour'
            AGGREGATION_WINDOW = '1 day'
        """
        
        session.sql(fallback_monitor_sql).collect()
        print("✅ MODEL MONITOR created (without model reference)")

except Exception as e:
    print(f"⚠️ Monitor creation: {e}")
    print("💡 Ensure notebook 5 has been run to register the XGBoost model")


In [None]:
# Use Native Snowflake Monitoring Functions
print("📈 Using Snowflake's built-in MODEL MONITOR functions...")

# Performance Metrics for Regression Models
print("1️⃣ Performance Metrics:")
try:
    performance_query = """
    SELECT * FROM TABLE(MODEL_MONITOR_PERFORMANCE_METRIC(
        'HEALTHCARE_RISK_MONITOR', 
        'MEAN_ABSOLUTE_ERROR', 
        'HOUR', 
        DATEADD('hour', -24, CURRENT_TIMESTAMP()), 
        CURRENT_TIMESTAMP()
    ))
    """
    
    performance_results = session.sql(performance_query).to_pandas()
    if not performance_results.empty:
        print("✅ Performance metrics retrieved:")
        print(performance_results.to_string(index=False))
    else:
        print("⚠️ No performance data yet (monitor needs time to collect data)")
        
except Exception as e:
    print(f"⚠️ Performance metrics note: {e}")
    print("💡 Monitor needs to run for a period to collect performance data")

# Drift Metrics  
print("\n2️⃣ Feature Drift Detection:")
try:
    drift_query = """
    SELECT * FROM TABLE(MODEL_MONITOR_DRIFT_METRIC(
        'HEALTHCARE_RISK_MONITOR', 
        'PSI', 
        'AGE', 
        'HOUR', 
        DATEADD('hour', -24, CURRENT_TIMESTAMP()), 
        CURRENT_TIMESTAMP()
    ))
    """
    
    drift_results = session.sql(drift_query).to_pandas()
    if not drift_results.empty:
        print("✅ Drift metrics retrieved:")
        print(drift_results.to_string(index=False))
    else:
        print("⚠️ No drift data yet (monitor needs baseline comparison)")
        
except Exception as e:
    print(f"⚠️ Drift metrics note: {e}")
    print("💡 Drift detection requires baseline data comparison over time")

# Statistical Metrics
print("\n3️⃣ Statistical Metrics:")
try:
    stat_query = """
    SELECT * FROM TABLE(MODEL_MONITOR_STAT_METRIC(
        'HEALTHCARE_RISK_MONITOR', 
        'COUNT', 
        'PREDICTED_RISK_SCORE', 
        'HOUR', 
        DATEADD('hour', -24, CURRENT_TIMESTAMP()), 
        CURRENT_TIMESTAMP()
    ))
    """
    
    stat_results = session.sql(stat_query).to_pandas()
    if not stat_results.empty:
        print("✅ Statistical metrics retrieved:")
        print(stat_results.to_string(index=False))
    else:
        print("⚠️ No statistical data yet")
        
except Exception as e:
    print(f"⚠️ Statistical metrics note: {e}")

print("\n💡 Note: These functions provide real-time monitoring data once the MODEL MONITOR is active")


In [None]:
# Setup Native Snowflake Alerts
print("🚨 Setting up native Snowflake alerting for model monitoring...")

# Create alert notification table
alert_table_sql = """
CREATE OR REPLACE TABLE ADVERSE_EVENT_MONITORING.DEMO_ANALYTICS.MODEL_ALERTS (
    alert_id STRING DEFAULT UUID_STRING(),
    alert_timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP(),
    alert_type STRING,
    model_name STRING,
    metric_name STRING,
    metric_value FLOAT,
    threshold_value FLOAT,
    severity STRING,
    message STRING
)
"""

session.sql(alert_table_sql).collect()
print("✅ Alert notification table created")

# Create alert function that checks drift metrics
alert_function_sql = """
CREATE OR REPLACE FUNCTION CHECK_MODEL_DRIFT_ALERT()
RETURNS STRING
LANGUAGE SQL
AS
$$
BEGIN
    -- Check if drift exceeds threshold (example with synthetic check)
    LET drift_threshold := 0.2;
    LET current_drift := (
        SELECT COALESCE(AVG(ABS(PREDICTED_RISK_SCORE - ACTUAL_RISK_SCORE)), 0)
        FROM ADVERSE_EVENT_MONITORING.DEMO_ANALYTICS.RISK_PREDICTIONS
        WHERE PREDICTION_TIMESTAMP > DATEADD('hour', -1, CURRENT_TIMESTAMP())
    );
    
    IF (current_drift > drift_threshold) THEN
        INSERT INTO ADVERSE_EVENT_MONITORING.DEMO_ANALYTICS.MODEL_ALERTS
        (alert_type, model_name, metric_name, metric_value, threshold_value, severity, message)
        VALUES (
            'DRIFT_ALERT',
            'healthcare_risk_score_regressor',
            'PREDICTION_DRIFT',
            current_drift,
            drift_threshold,
            'HIGH',
            'Model prediction drift detected: ' || current_drift::STRING || ' exceeds threshold ' || drift_threshold::STRING
        );
        RETURN 'ALERT_TRIGGERED';
    END IF;
    
    RETURN 'NO_ALERT';
END;
$$
"""

session.sql(alert_function_sql).collect()
print("✅ Drift alert function created")

# Create task to run alert checks (would be scheduled in production)
print("📅 Alert task structure (for production scheduling):")
print("""
CREATE OR REPLACE TASK MODEL_MONITORING_TASK
  WAREHOUSE = ADVERSE_EVENT_WH
  SCHEDULE = 'USING CRON 0 */1 * * * UTC'  -- Every hour
AS
  SELECT CHECK_MODEL_DRIFT_ALERT();
""")

# Test the alert function
test_alert = session.sql("SELECT CHECK_MODEL_DRIFT_ALERT() as alert_status").collect()
print(f"🧪 Alert function test: {test_alert[0]['ALERT_STATUS']}")

# Show any alerts
alerts = session.sql("SELECT * FROM ADVERSE_EVENT_MONITORING.DEMO_ANALYTICS.MODEL_ALERTS ORDER BY alert_timestamp DESC LIMIT 5").to_pandas()
if not alerts.empty:
    print("🚨 Recent alerts:")
    print(alerts.to_string(index=False))
else:
    print("✅ No alerts currently - model performing within thresholds")


In [None]:
# Access Snowsight ML Monitoring Dashboard
print("📊 Accessing Snowflake's native ML monitoring dashboard...")

# Show how to access the monitoring dashboard
print("🎯 To view comprehensive monitoring in Snowsight:")
print("   1. Navigate to Snowsight → AI & ML → Models")
print("   2. Find your model: healthcare_risk_score_regressor") 
print("   3. Click on the model to view details")
print("   4. Go to the 'Monitors' tab to see:")
print("      • Performance metrics (MAE, RMSE)")
print("      • Drift detection (PSI, KS test)")
print("      • Volume metrics (prediction counts)")
print("      • Statistical metrics (mean, std, missing values)")

# Query monitor status
try:
    monitor_status = session.sql("""
        SHOW MODEL MONITORS IN SCHEMA ADVERSE_EVENT_MONITORING.DEMO_ANALYTICS
    """).to_pandas()
    
    if not monitor_status.empty:
        print("\n✅ Active model monitors:")
        print(monitor_status[['name', 'state', 'created_on']].to_string(index=False))
    else:
        print("\n⚠️ No active monitors found")
        
except Exception as e:
    print(f"\n⚠️ Monitor status query: {e}")

# Demonstrate manual monitoring queries for immediate insights
print("\n📈 Manual monitoring queries you can run:")

# Check prediction distribution
recent_stats = session.sql("""
    SELECT 
        COUNT(*) as prediction_count,
        AVG(PREDICTED_RISK_SCORE) as avg_predicted_risk,
        STDDEV(PREDICTED_RISK_SCORE) as std_predicted_risk,
        MIN(PREDICTED_RISK_SCORE) as min_predicted_risk,
        MAX(PREDICTED_RISK_SCORE) as max_predicted_risk,
        AVG(ABS(PREDICTED_RISK_SCORE - ACTUAL_RISK_SCORE)) as mean_absolute_error
    FROM ADVERSE_EVENT_MONITORING.DEMO_ANALYTICS.RISK_PREDICTIONS
    WHERE PREDICTION_TIMESTAMP > DATEADD('hour', -24, CURRENT_TIMESTAMP())
""").to_pandas()

print("📊 Last 24h monitoring summary:")
print(recent_stats.to_string(index=False))

print("\n🎉 Snowflake ML Observability setup complete!")
print("📋 Next steps:")
print("   • Monitor the Snowsight dashboard for ongoing insights")
print("   • Set up automated tasks for regular alert checking") 
print("   • Configure external notifications (email, Slack) for alerts")
print("   • Review model performance trends weekly")
