In [2]:
import pytesseract
import cv2
import numpy as np
import pandas as pd
import joblib
import pickle
from PIL import Image
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import re


df = pd.read_csv("Hypertension.csv")
df.fillna(df.median(), inplace=True)

X = df.drop(columns=['Risk'])
y = df['Risk']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)


with open("hypertension_model.pkl", "wb") as file:
    pickle.dump({"model": model, "scaler": scaler}, file)

with open("hypertension_model.pkl", "rb") as file:
    data = pickle.load(file)

model = data["model"]
scaler = data["scaler"]


def extract_text_from_image(image_path):
    image = cv2.imread(image_path)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    _, thresh = cv2.threshold(gray, 150, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    text = pytesseract.image_to_string(thresh)
    return text


def parse_report(text):
    values = re.findall(r'\d+\.?\d*', text)  
    extracted_values = [float(val) for val in values[:12]]  
    return extracted_values if len(extracted_values) == 12 else None  


def predict_risk_with_explanation(test_data):
    test_data = np.array(test_data).reshape(1, -1)
    test_data = scaler.transform(test_data)
    risk_probability = model.predict_proba(test_data)[0][1]  
    risk = model.predict(test_data)[0]

    bp_systolic, bp_diastolic = test_data[0][6], test_data[0][7]  
    bmi, cholesterol, triglycerides = test_data[0][9], test_data[0][5], test_data[0][8]
    blood_sugar, creatinine = test_data[0][3], test_data[0][10]


    bp_risk = "🩸 Normal BP." if bp_systolic < 140 and bp_diastolic < 90 else "⚠️ Stage 1 Hypertension or higher."
    cholesterol_risk = "✅ Cholesterol is within normal range." if cholesterol <= 200 else "⚠️ High cholesterol detected."
    triglyceride_risk = "✅ Triglycerides are normal." if triglycerides <= 150 else "⚠️ High triglycerides detected."
    bmi_risk = "✅ Normal BMI." if bmi <= 25 else "⚠️ Overweight."
    sugar_risk = "✅ Normal blood sugar." if blood_sugar < 100 else "⚠️ Elevated blood sugar."

   
    risk_percentage = round(risk_probability * 100, 2)

   
    explanation = f"""
    Conclusion: {"🔥 High Risk" if risk == 1 else "🟡 Moderate Risk"}
    - 📊 Estimated Risk: {risk_percentage}%
    - {bp_risk}
    - {cholesterol_risk}
    - {triglyceride_risk}
    - {bmi_risk}
    - {sugar_risk}

    Recommendation: 🥗 Lifestyle modifications (healthy diet, exercise). 💊 Medication may be required if risk persists.
    """
    return explanation

#
image_path = "hypertension1.jpg"  
extracted_text = extract_text_from_image(image_path)
parsed_data = parse_report(extracted_text)

if parsed_data:
    risk_report = predict_risk_with_explanation(parsed_data)
    print(risk_report)
else:
    print("Error: Extracted data does not match the expected format.")

with open("hypertension_scaler.pkl", "wb") as file:
    pickle.dump(scaler, file)



    Conclusion: 🔥 High Risk
    - 📊 Estimated Risk: 64.0%
    - 🩸 Normal BP.
    - ⚠️ High cholesterol detected.
    - ✅ Triglycerides are normal.
    - ⚠️ Overweight.
    - ✅ Normal blood sugar.

    Recommendation: 🥗 Lifestyle modifications (healthy diet, exercise). 💊 Medication may be required if risk persists.
    


