In [2]:
import pickle
with open('kidney_rf_model.pkl', 'rb') as f:
    model = pickle.load(f)


In [5]:
import numpy as np
import pickle
import pytesseract
from PIL import Image
import re
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer

# Load trained models
models = {
    "diabetes": pickle.load(open('diabetes_predictor.pkl', 'rb')),
    "heart": pickle.load(open('heart_stage_model.pkl', 'rb')),
    "hypertension": pickle.load(open('hypertension_model.pkl', 'rb')),
    "kidney": pickle.load(open('kidney_rf_model.pkl', 'rb')),
    "liver": pickle.load(open('liver_disease_model.pkl', 'rb')),
}


meta_model = pickle.load(open('meta_model.pkl', 'rb'))


field_names = {
    "diabetes": ["glucose", "insulin", "hba1c", "blood sugar"],
    "heart": ["cholesterol", "blood pressure", "ecg", "heart rate"],
    "hypertension": ["systolic", "diastolic", "blood pressure"],
    "kidney": ["creatinine", "urea", "gfr", "proteinuria"],
    "liver": ["bilirubin", "alt", "ast", "alkaline phosphatase"]
}

REQUIRED_FEATURES = 10  

def extract_text_from_image(image_path):
    """Extract text from a medical report image using OCR."""
    try:
        text = pytesseract.image_to_string(Image.open(image_path))
        return text.lower()
    except Exception as e:
        print(f"Error extracting text: {e}")
        return ""

def preprocess_text(text):
    """Extract numerical features from the text."""
    try:
        features = [float(value) for value in re.findall(r'\d+\.\d+|\d+', text)]
        
       
        if len(features) < REQUIRED_FEATURES:
            features.extend([0] * (REQUIRED_FEATURES - len(features)))
        elif len(features) > REQUIRED_FEATURES:
            features = features[:REQUIRED_FEATURES]
        
        return features
    except ValueError:
        print("Error: No valid numeric data found!")
        return []

def determine_disease_by_similarity(text):
    """Determine the disease type by comparing extracted text with known field names."""
    vectorizer = TfidfVectorizer().fit([" ".join(fields) for fields in field_names.values()])
    text_vector = vectorizer.transform([text])
    similarities = {disease: cosine_similarity(text_vector, vectorizer.transform([" ".join(fields)]))[0][0] for disease, fields in field_names.items()}
    return max(similarities, key=similarities.get)

def classify_disease(report_features, text):
    """Predict the disease type using meta-model and field name similarity."""
    predicted_disease = determine_disease_by_similarity(text)
    meta_prediction = meta_model.predict([report_features])[0]
    disease_names = list(models.keys())
    
    if predicted_disease == disease_names[meta_prediction]:
        return predicted_disease
    else:
        return predicted_disease  

def verify_disease_type(predicted_disease):
    """Ask the user to confirm the disease type."""
    confirm = input(f"Is this report about {predicted_disease}? (yes/no): ").strip().lower()
    return confirm == "yes"

def predict_severity(disease, report_features):
    """Predict severity using the specific disease model."""
    model = models[disease]
    severity = model.predict([report_features])[0]
    return severity

def main():
    image_path = input("Enter the path to your medical report image: ")
    text = extract_text_from_image(image_path)
    
    if not text:
        print("Failed to extract text from the report. Exiting.")
        return
    
    report_features = preprocess_text(text)
    if not report_features:
        print("Failed to process report features. Exiting.")
        return
    
    predicted_disease = classify_disease(report_features, text)
    if not verify_disease_type(predicted_disease):
        print("Report type not confirmed. Exiting.")
        return
    
    severity = predict_severity(predicted_disease, report_features)
    print(f"Predicted severity of {predicted_disease}: {severity}")

if __name__ == "__main__":
    main()


Report type not confirmed. Exiting.
