In [None]:
import streamlit as st
st.image("Snowflake_Logo.svg", width=300)


# Honor Health Intelligence Agent - ML Models

**Training 3 Machine Learning Models for SDOH and Value-Based Care**

This notebook trains 3 ML models for the Honor Health Intelligence Agent:
1. **READMISSION_RISK_PREDICTOR** - Predicts 30-day readmission risk (2 classes)
2. **HEALTH_OUTCOME_PREDICTOR** - Predicts patient health outcome improvement (3 classes)
3. **SOCIAL_RISK_STRATIFICATION** - Stratifies patients by social risk (3 classes)

---

## Prerequisites
- Database: `HONORHEALTH_INTELLIGENCE`
- Schema: `ML_MODELS`
- Feature views created
- Packages: `snowflake-ml-python`, `scikit-learn`, `pandas`


## Setup and Imports


In [None]:
# Import required libraries
from snowflake.snowpark import Session
from snowflake.ml.modeling.ensemble import RandomForestClassifier
from snowflake.ml.modeling.linear_model import LogisticRegression
from snowflake.ml.modeling.preprocessing import OneHotEncoder
from snowflake.ml.modeling.pipeline import Pipeline
from snowflake.ml.registry import Registry
import pandas as pd
import warnings

warnings.filterwarnings('ignore')

print("✅ Libraries imported successfully")


In [None]:
# Get current session
session = Session.builder.getOrCreate()

# Set context
session.use_database("HONORHEALTH_INTELLIGENCE")
session.use_schema("ML_MODELS")
session.use_warehouse("HONORHEALTH_WH")

print("✅ Session configured")
print(f"Database: {session.get_current_database()}")
print(f"Schema: {session.get_current_schema()}")
print(f"Warehouse: {session.get_current_warehouse()}")


In [None]:
# Initialize Model Registry
registry = Registry(
    session=session,
    database_name="HONORHEALTH_INTELLIGENCE",
    schema_name="ML_MODELS"
)

print("✅ Model Registry initialized")


---
## Model 1: Readmission Risk Predictor

**Objective**: Predict 30-day hospital readmission risk  
**Labels**: 0=Low Risk (No Readmission), 1=High Risk (Readmission)  
**Algorithm**: Random Forest Classifier  
**Features**: Age, insurance, length of stay, cost, SDOH factors, prior encounters


In [None]:
# Load readmission risk feature data
readmission_df = session.table("HONORHEALTH_INTELLIGENCE.ANALYTICS.V_READMISSION_RISK_FEATURES")

print(f"✅ Loaded {readmission_df.count()} records for readmission prediction")
readmission_df.show(5)


In [None]:
# Split data for training and testing
train_readmission, test_readmission = readmission_df.random_split([0.8, 0.2], seed=42)

# Drop ID columns not needed for training
train_readmission = train_readmission.drop("ENCOUNTER_ID")
test_readmission = test_readmission.drop("ENCOUNTER_ID")

print(f"Training set: {train_readmission.count()} records")
print(f"Test set: {test_readmission.count()} records")


In [None]:
# Create readmission risk pipeline - optimized for fast execution
readmission_pipeline = Pipeline([
    ("Classifier", RandomForestClassifier(
        label_cols=["READMISSION_LABEL"],
        output_cols=["PREDICTED_READMISSION"],
        n_estimators=5,
        max_depth=5,
        random_state=42
    ))
])

print("✅ Readmission risk pipeline created (optimized for speed)")


In [None]:
# Train the readmission prediction model
print("Training readmission prediction model...")
readmission_pipeline.fit(train_readmission)
print("✅ Readmission prediction model trained")


In [None]:
# Evaluate model on test set
test_predictions = readmission_pipeline.predict(test_readmission)
test_results = test_predictions.select("READMISSION_LABEL", "PREDICTED_READMISSION").to_pandas()

from sklearn.metrics import accuracy_score, classification_report
accuracy = accuracy_score(test_results['READMISSION_LABEL'], test_results['PREDICTED_READMISSION'])

print(f"Test Accuracy: {accuracy:.3f}")
print("\nClassification Report:")
print(classification_report(
    test_results['READMISSION_LABEL'], 
    test_results['PREDICTED_READMISSION'],
    target_names=['No Readmission', 'Readmission']
))


In [None]:
# Register model in Model Registry
# Drop label column from sample data - model signature should only include features
sample_data = train_readmission.drop("READMISSION_LABEL").limit(100)

# Delete existing model if present (force retraining)
try:
    registry.delete_model("READMISSION_RISK_PREDICTOR")
    print("Deleted existing model")
except:
    pass

registry.log_model(
    model=readmission_pipeline,
    model_name="READMISSION_RISK_PREDICTOR",
    target_platforms=['WAREHOUSE'],
    sample_input_data=sample_data,
    comment="Predicts 30-day hospital readmission risk (No Readmission/Readmission)"
)

print("✅ READMISSION_RISK_PREDICTOR registered in Model Registry")


---
## Model 2: Health Outcome Predictor

**Objective**: Predict patient health outcome improvement  
**Labels**: 0=Declined, 1=Stable, 2=Improved  
**Algorithm**: Logistic Regression  
**Features**: Age, SDOH factors, baseline value, encounter history, quality score


In [None]:
# Load health outcome prediction feature data
outcome_df = session.table("HONORHEALTH_INTELLIGENCE.ANALYTICS.V_HEALTH_OUTCOME_PREDICTION_FEATURES")

print(f"✅ Loaded {outcome_df.count()} records for health outcome prediction")
outcome_df.show(5)


In [None]:
# Split data
train_outcome, test_outcome = outcome_df.random_split([0.8, 0.2], seed=42)

train_outcome = train_outcome.drop("OUTCOME_ID")
test_outcome = test_outcome.drop("OUTCOME_ID")

print(f"Training set: {train_outcome.count()} records")
print(f"Test set: {test_outcome.count()} records")


In [None]:
# Create health outcome prediction pipeline
outcome_pipeline = Pipeline([
    ("Classifier", LogisticRegression(
        label_cols=["OUTCOME_LABEL"],
        output_cols=["PREDICTED_OUTCOME"],
        max_iter=100
    ))
])

print("✅ Health outcome prediction pipeline created (optimized for speed)")


In [None]:
# Train the health outcome prediction model
print("Training health outcome prediction model...")
outcome_pipeline.fit(train_outcome)
print("✅ Health outcome prediction model trained")


In [None]:
# Evaluate model
test_predictions = outcome_pipeline.predict(test_outcome)
test_results = test_predictions.select("OUTCOME_LABEL", "PREDICTED_OUTCOME").to_pandas()

accuracy = accuracy_score(test_results['OUTCOME_LABEL'], test_results['PREDICTED_OUTCOME'])

print(f"Test Accuracy: {accuracy:.3f}")
print("\nClassification Report:")
print(classification_report(
    test_results['OUTCOME_LABEL'], 
    test_results['PREDICTED_OUTCOME'],
    target_names=['Declined', 'Stable', 'Improved']
))


In [None]:
# Register model
sample_data = train_outcome.drop("OUTCOME_LABEL").limit(100)

# Delete existing model if present
try:
    registry.delete_model("HEALTH_OUTCOME_PREDICTOR")
    print("Deleted existing model")
except:
    pass

registry.log_model(
    model=outcome_pipeline,
    model_name="HEALTH_OUTCOME_PREDICTOR",
    target_platforms=['WAREHOUSE'],
    sample_input_data=sample_data,
    comment="Predicts patient health outcome improvement (Declined/Stable/Improved)"
)

print("✅ HEALTH_OUTCOME_PREDICTOR registered in Model Registry")


---
## Model 3: Social Risk Stratification Model

**Objective**: Stratify patients by social risk level  
**Labels**: 0=Low Risk, 1=Medium Risk, 2=High Risk  
**Algorithm**: Random Forest Classifier  
**Features**: Employment, income, education, housing, food insecurity, isolation, costs


In [None]:
# Load social risk stratification feature data
social_risk_df = session.table("HONORHEALTH_INTELLIGENCE.ANALYTICS.V_SOCIAL_RISK_STRATIFICATION_FEATURES")

print(f"✅ Loaded {social_risk_df.count()} records for social risk stratification")
social_risk_df.show(5)


In [None]:
# Split data
train_social, test_social = social_risk_df.random_split([0.8, 0.2], seed=42)

train_social = train_social.drop("SDOH_ID")
test_social = test_social.drop("SDOH_ID")

print(f"Training set: {train_social.count()} records")
print(f"Test set: {test_social.count()} records")


In [None]:
# Create social risk stratification pipeline
social_risk_pipeline = Pipeline([
    ("Classifier", RandomForestClassifier(
        label_cols=["RISK_LEVEL_LABEL"],
        output_cols=["PREDICTED_RISK_LEVEL"],
        n_estimators=5,
        max_depth=5,
        random_state=42
    ))
])

print("✅ Social risk stratification pipeline created (optimized for speed)")


In [None]:
# Train the social risk stratification model
print("Training social risk stratification model...")
social_risk_pipeline.fit(train_social)
print("✅ Social risk stratification model trained")


In [None]:
# Evaluate model
test_predictions = social_risk_pipeline.predict(test_social)
test_results = test_predictions.select("RISK_LEVEL_LABEL", "PREDICTED_RISK_LEVEL").to_pandas()

accuracy = accuracy_score(test_results['RISK_LEVEL_LABEL'], test_results['PREDICTED_RISK_LEVEL'])

print(f"Test Accuracy: {accuracy:.3f}")
print("\nClassification Report:")
print(classification_report(
    test_results['RISK_LEVEL_LABEL'], 
    test_results['PREDICTED_RISK_LEVEL'],
    target_names=['Low Risk', 'Medium Risk', 'High Risk']
))


In [None]:
# Register model
sample_data = train_social.drop("RISK_LEVEL_LABEL").limit(100)

# Delete existing model if present
try:
    registry.delete_model("SOCIAL_RISK_STRATIFICATION")
    print("Deleted existing model")
except:
    pass

registry.log_model(
    model=social_risk_pipeline,
    model_name="SOCIAL_RISK_STRATIFICATION",
    target_platforms=['WAREHOUSE'],
    sample_input_data=sample_data,
    comment="Stratifies patients by social risk level (Low/Medium/High Risk)"
)

print("✅ SOCIAL_RISK_STRATIFICATION registered in Model Registry")


---
## Summary and Verification


In [None]:
# List all registered models
models = session.sql("SHOW MODELS IN SCHEMA ML_MODELS").collect()

print("\n" + "="*80)
print("REGISTERED MODELS")
print("="*80)
for model in models:
    print(f"✅ {model['name']}")

print("\n" + "="*80)
print("MODEL TRAINING COMPLETE")
print("="*80)
print("\n3 ML models successfully trained and registered:")
print("1. READMISSION_RISK_PREDICTOR - Predicts readmission risk (2 classes)")
print("2. HEALTH_OUTCOME_PREDICTOR - Predicts health outcomes (3 classes)")
print("3. SOCIAL_RISK_STRATIFICATION - Stratifies social risk (3 classes)")
print("\nNext steps:")
print("1. Run honorhealth_07_ml_model_functions.sql to create SQL functions")
print("2. Run honorhealth_08_intelligence_agent.sql to configure agent")
