In [4]:
import pandas as pd
import pytesseract
import cv2
import numpy as np
import pickle
from PIL import Image
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

MODEL_FILE = "liver_disease_model.pkl"
SCALER_FILE = "liver_scaler.pkl"


def classify_severity(row):
    if row['Total_Bilirubin'] > 3 or row['Alkaline_Phosphotase'] > 300 or row['Aspartate_Aminotransferase'] > 100:
        return 2  
    elif row['Total_Bilirubin'] > 1.2 or row['Alkaline_Phosphotase'] > 200 or row['Aspartate_Aminotransferase'] > 50:
        return 1  
    else:
        return 0  


def preprocess_image(image_path):
    img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    img = cv2.threshold(img, 128, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
    return img


def extract_text_from_image(image_path):
    img = preprocess_image(image_path)
    text = pytesseract.image_to_string(img)
    return text


def parse_extracted_text(text):
    data = {}
    lines = text.split('\n')
    for line in lines:
        parts = line.split(':')
        if len(parts) == 2:
            key = parts[0].strip()
            value = parts[1].strip()
            try:
                data[key] = float(value)
            except ValueError:
                pass
    return data

def train_model(file_path):
    df = pd.read_csv(file_path)
    df['Albumin_and_Globulin_Ratio'].fillna(df['Albumin_and_Globulin_Ratio'].median(), inplace=True)
    df['Severity'] = df.apply(classify_severity, axis=1)
    df['Gender'] = df['Gender'].map({'Male': 0, 'Female': 1})

    X = df.drop(columns=['Dataset', 'Severity'])
    y = df['Severity']

 
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

   
    with open(SCALER_FILE, 'wb') as file:
        pickle.dump(scaler, file)

   
    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42, stratify=y)
    model = RandomForestClassifier(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)

   
    with open(MODEL_FILE, 'wb') as file:
        pickle.dump((model, X.columns), file)

    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    print(f'Model Accuracy: {accuracy * 100:.2f}%')

def predict_severity_from_image(image_path):
   
    with open(MODEL_FILE, 'rb') as file:
        model, feature_columns = pickle.load(file)

    with open(SCALER_FILE, 'rb') as file:
        scaler = pickle.load(file)

    text = extract_text_from_image(image_path)
    patient_data = parse_extracted_text(text)

    df = pd.DataFrame([patient_data], columns=feature_columns).fillna(0)
    df_scaled = scaler.transform(df)

    severity = model.predict(df_scaled)[0]
    risk_percentage = [10, 40, 80][severity]  # Estimated risk

    findings = []
    if patient_data.get('Total_Bilirubin', 0) > 3:
        findings.append("⚠️ High bilirubin levels detected.")
    if patient_data.get('Alkaline_Phosphotase', 0) > 300:
        findings.append("⚠️ Elevated alkaline phosphatase levels.")
    if patient_data.get('Aspartate_Aminotransferase', 0) > 100:
        findings.append("⚠️ High AST levels detected.")
    if not findings:
        findings.append("✅ Liver function appears normal.")

    recommendations = {
        0: "✅ Maintain a healthy diet and regular check-ups.",
        1: "🥗 Lifestyle modifications recommended. Consider consulting a doctor.",
        2: "🚨 Immediate medical consultation required!"
    }

    result = (
        f"Conclusion: {'🔥 High Risk' if severity == 2 else '⚠️ Moderate Risk' if severity == 1 else '✅ Low Risk'}\n"
        f"- 📊 Estimated Risk: {risk_percentage}%\n"
        f"- " + "\n- ".join(findings) + "\n\n"
        f"Recommendation: {recommendations[severity]}"
    )
    return result

file_path = "liver.csv"
train_model(file_path)


image_path = "liver1.jpg"
severity_prediction = predict_severity_from_image(image_path)
print(severity_prediction)


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Albumin_and_Globulin_Ratio'].fillna(df['Albumin_and_Globulin_Ratio'].median(), inplace=True)


Model Accuracy: 98.29%
Conclusion: ✅ Low Risk
- 📊 Estimated Risk: 10%
- ✅ Liver function appears normal.

Recommendation: ✅ Maintain a healthy diet and regular check-ups.
