In [1]:
# Load saved ensemble configuration and component models
import os
import pickle
import torch

CONFIG_PATH = os.path.join("saved_models", "ensemble_config_complete.pkl")

if not os.path.exists(CONFIG_PATH):
    raise FileNotFoundError(f"Ensemble config not found at {CONFIG_PATH}. Run the save step first in combined_model.ipynb.")

with open(CONFIG_PATH, "rb") as f:
    ensemble_config = pickle.load(f)

print("✅ Loaded ensemble configuration")
print(f"Version: {ensemble_config.get('version')}")
print(f"Created: {ensemble_config.get('created_at')}")
print(f"Description: {ensemble_config.get('description')}")
print(f"#Labels: {len(ensemble_config.get('label_names', []))}")


✅ Loaded ensemble configuration
Version: 1.0
Created: 2025-10-07T11:31:00.928168
Description: Multimodal medical prediction ensemble combining symptom analysis, heart disease prediction, and chest X-ray analysis
#Labels: 24


In [None]:
# ============================================
# 🧠 Load All Saved Models and Components
# ============================================

import pickle
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torchvision.transforms as transforms
from PIL import Image
import re
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer

print("Loading all saved models and components...")

# Load heart disease model and components
try:
    with open("heart_disease_model.pkl", "rb") as f:
        heart_model = pickle.load(f)
    print("✅ Heart disease model loaded")
except FileNotFoundError:
    print("❌ Heart disease model not found")
    heart_model = None

try:
    with open("heart_scaler_final.pkl", "rb") as f:
        heart_scaler = pickle.load(f)
    print("✅ Heart scaler loaded")
except FileNotFoundError:
    print("❌ Heart scaler not found")
    heart_scaler = None

try:
    with open("heart_feature_columns.pkl", "rb") as f:
        heart_feature_cols = pickle.load(f)
    print("✅ Heart feature columns loaded")
except FileNotFoundError:
    print("❌ Heart feature columns not found")
    heart_feature_cols = None

# Load NLP model and vectorizer
try:
    with open("symptos2disease_model_fixed.pkl", "rb") as f:
        symptom_model = pickle.load(f)
    print("✅ Symptom model loaded")
except FileNotFoundError:
    try:
        with open("symptos2disease_model.pkl", "rb") as f:
            symptom_model = pickle.load(f)
        print("✅ Symptom model loaded (original)")
    except FileNotFoundError:
        print("❌ Symptom model not found")
        symptom_model = None

try:
    with open("tfidf_vectorizer_fixed.pkl", "rb") as f:
        tfidf_vectorizer = pickle.load(f)
    print("✅ TF-IDF vectorizer loaded")
except FileNotFoundError:
    try:
        with open("tfidf_vectorizer.pkl", "rb") as f:
            tfidf_vectorizer = pickle.load(f)
        print("✅ TF-IDF vectorizer loaded (original)")
    except FileNotFoundError:
        print("❌ TF-IDF vectorizer not found")
        tfidf_vectorizer = None

# Load X-ray model
try:
    xray_state_dict = torch.load("chest_xray_cnn.pth", map_location='cpu')
    print("✅ X-ray model state dict loaded")
except FileNotFoundError:
    print("❌ X-ray model not found")
    xray_state_dict = None

# Get label names from ensemble config
label_names = ensemble_config.get('label_names', [])
print(f"✅ Label names loaded: {len(label_names)} classes")

print("\n🎉 Model loading completed!")


Loading all saved models and components...
✅ Heart disease model loaded
✅ Heart scaler loaded
✅ Heart feature columns loaded
✅ Symptom model loaded
✅ TF-IDF vectorizer loaded
✅ X-ray model state dict loaded
✅ Label names loaded: 24 classes

🎉 Model loading completed!


https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


In [3]:
# ============================================
# 🧠 Symptom Prediction Function
# ============================================

def predict_symptoms(symptom_texts):
    """
    Predicts disease from symptom text using the trained NLP model.
    """
    if symptom_model is None or tfidf_vectorizer is None:
        print("❌ Symptom model or vectorizer not loaded")
        return np.array([[0.0] * len(label_names)])
    
    # Preprocess the input text
    def preprocess_text(text):
        text = text.lower()
        text = re.sub(r'[^\w\s]', '', text)
        text = re.sub(r'\s+', ' ', text)
        text = re.sub(r'\d+', ' ', text)
        
        # Tokenize and remove stopwords
        tokens = word_tokenize(text)
        stop_words = set(stopwords.words('english'))
        tokens = [word for word in tokens if word not in stop_words]
        
        # Lemmatize
        lemmatizer = WordNetLemmatizer()
        tokens = [lemmatizer.lemmatize(word) for word in tokens]
        
        return ' '.join(tokens)
    
    # Preprocess input texts
    processed_texts = [preprocess_text(text) for text in symptom_texts]
    
    # Transform using the trained vectorizer
    X_transformed = tfidf_vectorizer.transform(processed_texts)
    
    # Get predictions
    predictions = symptom_model.predict(X_transformed)
    probabilities = symptom_model.predict_proba(X_transformed)
    
    return probabilities

print("✅ Symptom prediction function defined")


✅ Symptom prediction function defined


In [None]:
# ============================================
# ❤️ Heart Disease Prediction Function
# ============================================

def predict_heart(input_data):
    """
    Predicts heart disease probability from a single row or CSV file.
    Accepts either a DataFrame (single/multiple rows) or a CSV path.
    """
    if heart_model is None or heart_scaler is None or heart_feature_cols is None:
        print("❌ Heart model, scaler, or feature columns not loaded")
        return np.array([[0.5, 0.5]])  # Neutral default

    # If input is a file path, load CSV
    if isinstance(input_data, str):
        heart_df = pd.read_csv(input_data)
    else:
        heart_df = input_data.copy()

    # Drop unnecessary columns but keep 'Unnamed: 0' if it exists
    columns_to_drop = ['Unnamed: 0', 'id', 'num', 'label']
    X_heart = heart_df.drop(columns=[col for col in columns_to_drop if col in heart_df.columns], errors='ignore')

    # Align with training columns
    for col in heart_feature_cols:
        if col not in X_heart.columns:
            X_heart[col] = 0
    X_heart = X_heart[heart_feature_cols]

    # Apply scaling
    X_heart = heart_scaler.transform(X_heart)

    # Load model and predict
    probs = heart_model.predict_proba(X_heart)
    return probs

print("✅ Heart disease prediction function defined")


✅ Heart disease prediction function defined


In [None]:
# ============================================
# 🫁 X-ray Prediction Function
# ============================================

# Define a simple CNN model architecture (should match the training model)
class SimpleCNN(nn.Module):
    def __init__(self, num_classes=2):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 8, kernel_size=3, padding=1)  # Grayscale input
        self.conv2 = nn.Conv2d(8, 16, kernel_size=3, padding=1)
        self.fc1 = nn.Linear(16 * 56 * 56, num_classes)  # 224x224 -> 56x56 after pooling
        
    def forward(self, x):
        x = torch.relu(self.conv1(x))
        x = torch.max_pool2d(x, 2)
        x = torch.relu(self.conv2(x))
        x = torch.max_pool2d(x, 2)
        x = x.view(x.size(0), -1)
        x = self.fc1(x)
        return x

def predict_xray(image_path):
    """
    Predicts pneumonia from chest X-ray image using the trained CNN model.
    Returns probabilities for [Normal, Pneumonia]
    """
    if xray_state_dict is None:
        print("❌ X-ray model not loaded")
        return np.array([[0.5, 0.5]])  # Neutral default
        
    try:
        # Create model instance
        model = SimpleCNN(num_classes=2)
        
        # Load the trained model state dict
        model.load_state_dict(xray_state_dict)
        model.eval()
        
        # Define transforms (grayscale for this model)
        transform = transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.Grayscale(num_output_channels=1),
            transforms.ToTensor(),
        ])
        
        # Load and preprocess image
        image = Image.open(image_path).convert('RGB')
        image_tensor = transform(image).unsqueeze(0)
        
        # Make prediction
        with torch.no_grad():
            outputs = model(image_tensor)
            probabilities = torch.softmax(outputs, dim=1)
            
        return probabilities.numpy()
        
    except Exception as e:
        print(f"Error in X-ray prediction: {e}")
        # Return neutral probabilities if prediction fails
        return np.array([[0.5, 0.5]])

print("✅ X-ray prediction function defined")


✅ X-ray prediction function defined


In [None]:
# ============================================
# 🧩 Unified Ensemble (Multimodal Fusion)
# ============================================

def map_binary_to_multiclass(symptom_probs, heart_probs, xray_probs, label_names):
    """
    Maps binary heart/xray outputs into 24-class probabilities.
    Adds small weight contributions to relevant classes.
    """
    final_probs = symptom_probs.copy()

    # Extract relevant probabilities
    heart_disease_prob = heart_probs[0, 1] if heart_probs.size > 0 else 0
    pneumonia_prob = xray_probs[0, 1] if xray_probs.size > 0 else 0

    # Add contributions to mapped diseases
    if "Hypertension" in label_names:
        idx = label_names.index("Hypertension")
        final_probs[0, idx] += 0.3 * heart_disease_prob

    if "Pneumonia" in label_names:
        idx = label_names.index("Pneumonia")
        final_probs[0, idx] += 0.4 * pneumonia_prob

    # Normalize back to probabilities
    final_probs = final_probs / final_probs.sum(axis=1, keepdims=True)
    return final_probs


def final_multimodal_prediction(symptom_text, heart_row, xray_path, label_names):
    """
    Combines all three models for final prediction
    """
    symptom_pred = predict_symptoms([symptom_text])
    heart_pred = predict_heart(heart_row)
    xray_pred = predict_xray(xray_path)

    if len(symptom_pred) == 0:
        raise ValueError("Symptom model failed.")
    if len(heart_pred) == 0:
        heart_pred = np.array([[0.5, 0.5]])  # Neutral default
    if len(xray_pred) == 0:
        xray_pred = np.array([[0.5, 0.5]])

    combined_probs = map_binary_to_multiclass(symptom_pred, heart_pred, xray_pred, label_names)
    final_label = label_names[np.argmax(combined_probs)]
    return final_label, combined_probs

print("✅ Ensemble prediction functions defined")


✅ Ensemble prediction functions defined


In [None]:
# ============================================
# 🧪 Test the Loaded Models
# ============================================

print("🧪 Testing the loaded models...")

# Test individual models
try:
    # Test symptom prediction
    test_symptoms = ["chest pain and shortness of breath"]
    symptom_probs = predict_symptoms(test_symptoms)
    print(f"✅ Symptom prediction: {symptom_probs.shape}")
    
    # Test heart prediction
    if os.path.exists("pre_heart.csv"):
        test_heart = pd.read_csv("pre_heart.csv").iloc[[0]]
        heart_probs = predict_heart(test_heart)
        print(f"✅ Heart prediction: {heart_probs.shape}")
    else:
        print("⚠️ pre_heart.csv not found, skipping heart test")
    
    # Test X-ray prediction (if file exists)
    xray_path = os.path.join("data", "raw", "chest_xray", "chest_xray", "val", "NORMAL", "NORMAL2-IM-1427-0001.jpeg")
    if os.path.exists(xray_path):
        xray_probs = predict_xray(xray_path)
        print(f"✅ X-ray prediction: {xray_probs.shape}")
    else:
        print("⚠️ X-ray file not found, skipping X-ray test")
    
    print("\n🎉 All available models tested successfully!")
    
except Exception as e:
    print(f"❌ Error during testing: {e}")
    import traceback
    traceback.print_exc()


🧪 Testing the loaded models...
✅ Symptom prediction: (1, 24)
✅ Heart prediction: (1, 2)
✅ X-ray prediction: (1, 2)

🎉 All available models tested successfully!


In [None]:
# ============================================
# 🎯 Example Usage - Complete Prediction
# ============================================

print("🎯 Running complete multimodal prediction example...")

try:
    # Example inputs
    symptom_text = "Chest pain and shortness of breath"
    
    # Load heart data if available
    if os.path.exists("pre_heart.csv"):
        heart_row = pd.read_csv("pre_heart.csv").iloc[[0]]
    else:
        print("⚠️ Using dummy heart data")
        heart_row = pd.DataFrame([[0] * 30])  # Dummy data
    
    # X-ray path
    xray_path = os.path.join("data", "raw", "chest_xray", "chest_xray", "val", "NORMAL", "NORMAL2-IM-1427-0001.jpeg")
    
    # Run complete prediction
    prediction, probs = final_multimodal_prediction(symptom_text, heart_row, xray_path, label_names)
    
    print(f"\n🩺 Final Disease Prediction: {prediction}")
    print(f"📊 Prediction Probabilities:")
    
    # Show top 5 predictions
    top_indices = np.argsort(probs[0])[-5:][::-1]
    for i, idx in enumerate(top_indices):
        print(f"  {i+1}. {label_names[idx]}: {probs[0][idx]:.4f}")
    
    print("\n✅ Complete prediction example successful!")
    
except Exception as e:
    print(f"❌ Error in complete prediction: {e}")
    import traceback
    traceback.print_exc()


🎯 Running complete multimodal prediction example...

🩺 Final Disease Prediction: Pneumonia
📊 Prediction Probabilities:
  1. Pneumonia: 0.2705
  2. Dimorphic Hemorrhoids: 0.1572
  3. Hypertension: 0.0983
  4. drug reaction: 0.0768
  5. gastroesophageal reflux disease: 0.0545

✅ Complete prediction example successful!
