In [2]:
import numpy as np
import pandas as pd
import sklearn
import joblib

In [3]:
# ========== LOAD SAVED MODELS ==========
print("Loading saved models...")
rf_model = joblib.load(r"/content/health_risk_rf_model.pkl")
le = joblib.load(r"/content/health_risk_label_encoder.pkl")
SAFE_FEATURES = joblib.load(r"/content/health_risk_features.pkl")
print("âœ“ Models loaded successfully!\n")


Loading saved models...
âœ“ Models loaded successfully!



In [4]:



# ========== DEFINE RISK LABELS ==========
RISK_LABELS = {0: "Low Risk", 1: "High Risk"}

# ========== UNSEEN TEST DATA ==========
unseen_test_data = [
    # User 1: HEALTHY
    {
        "heart_rate": 72,
        "resting_heart_rate": 62,
        "hrv_rmssd": 45,
        "spo2": 98,
        "steps": 8500,
        "calories_burned": 2200,
        "sleep_duration": 7.5,
        "deep_sleep_ratio": 0.22,
        "rem_sleep_ratio": 0.25,
        "air_quality_index": 60,
        "pm2_5": 25,
        "noise_level": 55,
        "temperature": 25,
        "sleep_pressure": 0.65,
        "parasympathetic_score": 0.55,
        "environmental_load": 0.45,
        "expected": "Low Risk",
    },

    # User 2: VERY HEALTHY
    {
        "heart_rate": 68,
        "resting_heart_rate": 60,
        "hrv_rmssd": 50,
        "spo2": 97,
        "steps": 10000,
        "calories_burned": 2400,
        "sleep_duration": 7.2,
        "deep_sleep_ratio": 0.20,
        "rem_sleep_ratio": 0.24,
        "air_quality_index": 70,
        "pm2_5": 30,
        "noise_level": 50,
        "temperature": 26,
        "sleep_pressure": 0.60,
        "parasympathetic_score": 0.58,
        "environmental_load": 0.40,
        "expected": "Low Risk",
    },

    # User 3: MODERATELY ACTIVE
    {
        "heart_rate": 88,
        "resting_heart_rate": 75,
        "hrv_rmssd": 32,
        "spo2": 96,
        "steps": 5000,
        "calories_burned": 1800,
        "sleep_duration": 6.5,
        "deep_sleep_ratio": 0.18,
        "rem_sleep_ratio": 0.21,
        "air_quality_index": 110,
        "pm2_5": 45,
        "noise_level": 62,
        "temperature": 28,
        "sleep_pressure": 0.72,
        "parasympathetic_score": 0.45,
        "environmental_load": 0.62,
        "expected": "Low Risk",
    },

    # User 4: AT-RISK
    {
        "heart_rate": 105,
        "resting_heart_rate": 82,
        "hrv_rmssd": 22,
        "spo2": 92,
        "steps": 2000,
        "calories_burned": 1400,
        "sleep_duration": 4.8,
        "deep_sleep_ratio": 0.10,
        "rem_sleep_ratio": 0.14,
        "air_quality_index": 180,
        "pm2_5": 75,
        "noise_level": 72,
        "temperature": 32,
        "sleep_pressure": 0.82,
        "parasympathetic_score": 0.32,
        "environmental_load": 0.78,
        "expected": "High Risk",
    },

    # User 5: SEVERE
    {
        "heart_rate": 115,
        "resting_heart_rate": 88,
        "hrv_rmssd": 18,
        "spo2": 89,
        "steps": 1200,
        "calories_burned": 1100,
        "sleep_duration": 4.0,
        "deep_sleep_ratio": 0.07,
        "rem_sleep_ratio": 0.10,
        "air_quality_index": 220,
        "pm2_5": 100,
        "noise_level": 80,
        "temperature": 35,
        "sleep_pressure": 0.94,
        "parasympathetic_score": 0.20,
        "environmental_load": 0.92,
        "expected": "High Risk",
    },

    # User 6: BORDERLINE
    {
        "heart_rate": 92,
        "resting_heart_rate": 78,
        "hrv_rmssd": 28,
        "spo2": 94,
        "steps": 3500,
        "calories_burned": 1600,
        "sleep_duration": 5.5,
        "deep_sleep_ratio": 0.13,
        "rem_sleep_ratio": 0.18,
        "air_quality_index": 140,
        "pm2_5": 55,
        "noise_level": 68,
        "temperature": 30,
        "sleep_pressure": 0.78,
        "parasympathetic_score": 0.38,
        "environmental_load": 0.70,
        "expected": "High Risk",
    },

    # User 7: STRESSED BUT MANAGING
    {
        "heart_rate": 85,
        "resting_heart_rate": 72,
        "hrv_rmssd": 35,
        "spo2": 96,
        "steps": 6500,
        "calories_burned": 1900,
        "sleep_duration": 6.8,
        "deep_sleep_ratio": 0.19,
        "rem_sleep_ratio": 0.22,
        "air_quality_index": 95,
        "pm2_5": 40,
        "noise_level": 60,
        "temperature": 27,
        "sleep_pressure": 0.68,
        "parasympathetic_score": 0.50,
        "environmental_load": 0.55,
        "expected": "Low Risk",
    },

    # User 8: POOR SLEEP
    {
        "heart_rate": 98,
        "resting_heart_rate": 80,
        "hrv_rmssd": 25,
        "spo2": 93,
        "steps": 2500,
        "calories_burned": 1500,
        "sleep_duration": 4.5,
        "deep_sleep_ratio": 0.09,
        "rem_sleep_ratio": 0.12,
        "air_quality_index": 170,
        "pm2_5": 65,
        "noise_level": 75,
        "temperature": 33,
        "sleep_pressure": 0.85,
        "parasympathetic_score": 0.30,
        "environmental_load": 0.75,
        "expected": "High Risk",
    },

    # User 9: RECOVERING
    {
        "heart_rate": 80,
        "resting_heart_rate": 68,
        "hrv_rmssd": 38,
        "spo2": 97,
        "steps": 7000,
        "calories_burned": 2000,
        "sleep_duration": 7.0,
        "deep_sleep_ratio": 0.21,
        "rem_sleep_ratio": 0.23,
        "air_quality_index": 85,
        "pm2_5": 35,
        "noise_level": 58,
        "temperature": 26,
        "sleep_pressure": 0.65,
        "parasympathetic_score": 0.52,
        "environmental_load": 0.50,
        "expected": "Low Risk",
    },

    # User 10: CRITICAL
    {
        "heart_rate": 120,
        "resting_heart_rate": 92,
        "hrv_rmssd": 15,
        "spo2": 88,
        "steps": 800,
        "calories_burned": 900,
        "sleep_duration": 3.5,
        "deep_sleep_ratio": 0.05,
        "rem_sleep_ratio": 0.08,
        "air_quality_index": 250,
        "pm2_5": 120,
        "noise_level": 85,
        "temperature": 36,
        "sleep_pressure": 0.96,
        "parasympathetic_score": 0.15,
        "environmental_load": 0.95,
        "expected": "High Risk",
    }
]

# ========== MAKE PREDICTIONS ==========
print("Making predictions on unseen data...\n")

# Create DataFrame with correct feature order
df_test = pd.DataFrame(unseen_test_data)
df_test = df_test[SAFE_FEATURES]

# Get predictions and probabilities
y_probs = rf_model.predict_proba(df_test)[:, 1]
threshold = 0.35
y_pred = (y_probs >= threshold).astype(int)

# ========== EVALUATE RESULTS ==========
results = []
correct_count = 0

for i, (user, prob, pred) in enumerate(zip(unseen_test_data, y_probs, y_pred)):
    predicted_risk = RISK_LABELS[pred]
    expected_risk = user["expected"]
    is_correct = (predicted_risk == expected_risk)

    if is_correct:
        correct_count += 1

    results.append({
        "User": i + 1,
        "Predicted": predicted_risk,
        "Expected": expected_risk,
        "Probability": round(float(prob), 3),
        "Correct": "âœ“" if is_correct else "âœ—"
    })

# Create results DataFrame
df_results = pd.DataFrame(results)

# ========== PRINT RESULTS ==========
print("="*80)
print("PREDICTION RESULTS ON UNSEEN DATA")
print("="*80)
print(df_results.to_string(index=False))
print("="*80)

# Calculate accuracy
accuracy = (correct_count / len(unseen_test_data)) * 100
print(f"\nðŸ“Š ACCURACY: {correct_count}/{len(unseen_test_data)} correct = {accuracy:.1f}%")
print("="*80)

# Summary
print("\nðŸ“ˆ SUMMARY:")
print(f"  â€¢ Correctly predicted Low Risk: {sum((df_results['Expected'] == 'Low Risk') & (df_results['Correct'] == 'âœ“'))}")
print(f"  â€¢ Correctly predicted High Risk: {sum((df_results['Expected'] == 'High Risk') & (df_results['Correct'] == 'âœ“'))}")
print(f"  â€¢ Wrong predictions: {len(unseen_test_data) - correct_count}")
print(f"  â€¢ Threshold used: {threshold}")
print("="*80)

Making predictions on unseen data...

PREDICTION RESULTS ON UNSEEN DATA
 User Predicted  Expected  Probability Correct
    1  Low Risk  Low Risk        0.015       âœ“
    2  Low Risk  Low Risk        0.027       âœ“
    3  Low Risk  Low Risk        0.025       âœ“
    4 High Risk High Risk        0.888       âœ“
    5 High Risk High Risk        0.878       âœ“
    6  Low Risk High Risk        0.084       âœ—
    7  Low Risk  Low Risk        0.014       âœ“
    8 High Risk High Risk        0.376       âœ“
    9  Low Risk  Low Risk        0.009       âœ“
   10 High Risk High Risk        0.877       âœ“

ðŸ“Š ACCURACY: 9/10 correct = 90.0%

ðŸ“ˆ SUMMARY:
  â€¢ Correctly predicted Low Risk: 5
  â€¢ Correctly predicted High Risk: 4
  â€¢ Wrong predictions: 1
  â€¢ Threshold used: 0.35
