In [15]:
import pandas as pd
import numpy as np
import joblib
import pytesseract
from PIL import Image
import re
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score


file_path = "diabetes.csv"
df = pd.read_csv(file_path)
df.dropna(inplace=True)  


X = df.drop(columns=["Outcome"])
y = df["Outcome"]


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)


scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train_scaled, y_train)


rf_acc = accuracy_score(y_test, rf_model.predict(X_test_scaled))
print(f"🔥 Random Forest Accuracy: {rf_acc:.2f}")


joblib.dump(rf_model, "random_forest_model.pkl")
joblib.dump(scaler, "diabetes_scaler.pkl")
joblib.dump(X.columns, "feature_columns.pkl")  # Save feature names

print("✅ Model, scaler, and feature columns saved!")


def predict_from_report(image_path):
    
    rf_model = joblib.load("random_forest_model.pkl")
    scaler = joblib.load("diabetes_scaler.pkl")
    feature_columns = joblib.load("feature_columns.pkl")

    ocr_text = pytesseract.image_to_string(Image.open(image_path))

    def extract_value(pattern, default=None, cast_type=str):
        match = re.search(pattern, ocr_text)
        return cast_type(match.group(1)) if match else default

    extracted_data = {
        "Pregnancies": extract_value(r"Pregnancies:\s*(\d+)", default=0, cast_type=int),
        "Glucose": extract_value(r"Glucose:\s*(\d+)", default=120, cast_type=int),
        "BloodPressure": extract_value(r"Blood Pressure:\s*(\d+)", default=70, cast_type=int),
        "SkinThickness": extract_value(r"Skin Thickness:\s*(\d+)", default=20, cast_type=int),
        "Insulin": extract_value(r"Insulin:\s*(\d+)", default=80, cast_type=int),
        "BMI": extract_value(r"BMI:\s*([\d.]+)", default=25.0, cast_type=float),
        "DiabetesPedigreeFunction": extract_value(r"Diabetes Pedigree Function:\s*([\d.]+)", default=0.5, cast_type=float),
        "Age": extract_value(r"Age:\s*(\d+)", default=30, cast_type=int)
    }

    
    report_data = np.array([[extracted_data[feature] for feature in feature_columns]])

   
    report_scaled = scaler.transform(report_data)
    rf_pred = rf_model.predict(report_scaled)
    rf_proba = rf_model.predict_proba(report_scaled)[0][1]  

    risk_percentage = round(rf_proba * 100, 2)

    if risk_percentage >= 75:
        risk_level = "🔥 High Risk"
    elif 50 <= risk_percentage < 75:
        risk_level = "⚠️ Moderate Risk"
    else:
        risk_level = "✅ Low Risk"

    
    result = f"""
    Conclusion: {risk_level}
    - 📊 Estimated Risk: {risk_percentage}%
    - Prediction: {"🩸 Diabetic" if rf_pred[0] == 1 else "✅ Non-Diabetic"}
    
    Recommendation: 🥗 Maintain a healthy lifestyle. 💊 Consult a doctor if symptoms persist.
    """
    print(result)
    return result


joblib.dump(predict_from_report, "diabetes_predictor.pkl")

print("✅ Prediction function saved!")


loaded_predictor = joblib.load("diabetes_predictor.pkl")
image_path = "diabetes3.jpg"  
loaded_predictor(image_path)


🔥 Random Forest Accuracy: 0.76
✅ Model, scaler, and feature columns saved!
✅ Prediction function saved!

    Conclusion: ✅ Low Risk
    - 📊 Estimated Risk: 9.0%
    - Prediction: ✅ Non-Diabetic
    
    Recommendation: 🥗 Maintain a healthy lifestyle. 💊 Consult a doctor if symptoms persist.
    




'\n    Conclusion: ✅ Low Risk\n    - 📊 Estimated Risk: 9.0%\n    - Prediction: ✅ Non-Diabetic\n    \n    Recommendation: 🥗 Maintain a healthy lifestyle. 💊 Consult a doctor if symptoms persist.\n    '

In [16]:
from sklearn.ensemble import RandomForestClassifier
import pickle


X_train, y_train = [[1, 2], [3, 4], [5, 6]], [0, 1, 0]  
model = RandomForestClassifier()
model.fit(X_train, y_train)  


with open('diabetes_predictor.pkl', 'wb') as f:
    pickle.dump(model, f)

print("Model saved successfully!")


Model saved successfully!
