# Liver Disease Prediction using AutoML
## Clean Working Version
This notebook contains only the essential working code for the liver disease prediction system.

In [None]:
# Install required packages
!pip install flaml gradio -q

In [None]:
# Import libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from flaml import AutoML
import joblib
import numpy as np
import gradio as gr

In [None]:
# Load and prepare data
print("Loading and preparing dataset...")
data = pd.read_csv('Indian Liver Patient Dataset (ILPD).csv')
print(f"Dataset shape: {data.shape}")
print(f"Target distribution: {data['is_patient'].value_counts()}")

# Handle missing values
data = data.dropna()

# Prepare features and target
X = data.drop(columns=['is_patient'])
y = data['is_patient']

# Convert target to binary (1 = patient, 2 = healthy -> 1 = patient, 0 = healthy)
y = y.map({1: 1, 2: 0})

# Convert categorical 'gender' to numeric
X = pd.get_dummies(X, columns=['gender'], drop_first=True)

print(f"Features: {X.columns.tolist()}")
print(f"Feature shape: {X.shape}")
print(f"Target distribution after mapping: {y.value_counts()}")

In [None]:
# Train AutoML model
print("Training AutoML model...")

# Train-test split with stratification
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# FLAML AutoML settings
automl = AutoML()
automl_settings = {
    "time_budget": 120,
    "metric": "accuracy",
    "task": "classification",
    "log_file_name": "flaml_liver.log",
    "eval_method": "cv",
    "n_splits": 5
}

# Train model
automl.fit(X_train=X_train, y_train=y_train, **automl_settings)

# Evaluate
y_pred = automl.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

print(f"Best model: {automl.best_estimator}")
print(f"Test Accuracy: {accuracy:.4f}")
print("Classification Report:")
print(classification_report(y_test, y_pred, target_names=['Healthy', 'Patient']))

# Save the model
model_data = {
    'model': automl,
    'feature_columns': X.columns.tolist()
}
joblib.dump(model_data, 'flaml_liver_model.pkl')
print("Model saved successfully!")

In [None]:
# Load model and create prediction function
model_data = joblib.load('flaml_liver_model.pkl')
automl = model_data['model']
feature_columns = model_data['feature_columns']

def predict_liver_disease(age, gender, tot_bilirubin, direct_bilirubin, tot_proteins,
                         albumin, ag_ratio, sgpt, sgot, alkphos):
    """Predict liver disease risk with detailed clinical interpretation"""
    try:
        # Create input DataFrame
        input_dict = {
            'age': [float(age)],
            'tot_bilirubin': [float(tot_bilirubin)],
            'direct_bilirubin': [float(direct_bilirubin)],
            'tot_proteins': [float(tot_proteins)],
            'albumin': [float(albumin)],
            'ag_ratio': [float(ag_ratio)],
            'sgpt': [float(sgpt)],
            'sgot': [float(sgot)],
            'alkphos': [float(alkphos)],
            'gender_Male': [1 if gender.lower() == 'male' else 0]
        }
        
        input_df = pd.DataFrame(input_dict)
        input_df = input_df[feature_columns]
        
        # Get predictions
        prediction = automl.predict(input_df)[0]
        probabilities = automl.predict_proba(input_df)[0]
        
        healthy_prob = probabilities[0]
        patient_prob = probabilities[1]
        
        # Interpret results with adjusted thresholds
        if patient_prob > 0.75:
            status = "üö® HIGH LIVER DISEASE RISK"
            risk_level = "üî¥ HIGH RISK"
            confidence = patient_prob
        elif patient_prob > 0.6:
            status = "‚ö†Ô∏è MODERATE LIVER DISEASE RISK"
            risk_level = "üü† MODERATE RISK"
            confidence = patient_prob
        elif patient_prob > 0.45:
            status = "üü° LOW-MODERATE RISK"
            risk_level = "üü° LOW-MODERATE RISK"
            confidence = max(patient_prob, healthy_prob)
        else:
            status = "‚úÖ LOW LIVER DISEASE RISK"
            risk_level = "üü¢ LOW RISK"
            confidence = healthy_prob
        
        # Identify risk factors
        risk_factors = []
        if age > 60:
            risk_factors.append("Advanced age")
        if tot_bilirubin > 2.0:
            risk_factors.append("Elevated total bilirubin")
        if direct_bilirubin > 1.0:
            risk_factors.append("Elevated direct bilirubin")
        if albumin < 3.5:
            risk_factors.append("Low albumin")
        if sgpt > 56:
            risk_factors.append("Elevated SGPT/ALT")
        if sgot > 40:
            risk_factors.append("Elevated SGOT/AST")
        if alkphos > 147:
            risk_factors.append("Elevated alkaline phosphatase")
        
        # Format result
        result = f"""
{status}
{risk_level}
Confidence: {confidence:.1%}

üìä Probability Analysis:
‚Ä¢ Disease Risk: {patient_prob:.3f} ({patient_prob:.1%})
‚Ä¢ Healthy Score: {healthy_prob:.3f} ({healthy_prob:.1%})

üîç Clinical Parameters:
‚Ä¢ Age: {age} years, Gender: {gender}
‚Ä¢ Total Bilirubin: {tot_bilirubin} mg/dL (Normal: 0.2-1.2)
‚Ä¢ Direct Bilirubin: {direct_bilirubin} mg/dL (Normal: 0.0-0.3)
‚Ä¢ Total Proteins: {tot_proteins} g/dL (Normal: 6.0-8.3)
‚Ä¢ Albumin: {albumin} g/dL (Normal: 3.5-5.0)
‚Ä¢ SGPT: {sgpt} U/L (Normal: 7-56)
‚Ä¢ SGOT: {sgot} U/L (Normal: 10-40)
‚Ä¢ Alkaline Phosphatase: {alkphos} U/L (Normal: 44-147)

‚ö†Ô∏è Risk Factors Identified:
{('‚Ä¢ ' + '\n‚Ä¢ '.join(risk_factors)) if risk_factors else '‚Ä¢ No major risk factors detected'}

üí° Recommendation:
{"Consult a healthcare professional for comprehensive liver function assessment." if patient_prob > 0.6 else "Consider routine monitoring and maintain healthy lifestyle habits."}
        """
        
        return result.strip()
        
    except Exception as e:
        return f"‚ùå Prediction Error: {str(e)}\nPlease verify all input values are valid numbers."

print("Prediction function created successfully!")

In [None]:
# Test the prediction function
print("Testing prediction function with sample cases:")

# Test 1: Healthy case
print("\n=== Test 1: Healthy Young Adult ===")
result1 = predict_liver_disease(28, "Female", 0.8, 0.2, 7.5, 4.2, 1.7, 22, 28, 75)
print(result1)

# Test 2: High risk case
print("\n=== Test 2: High Risk Patient ===")
result2 = predict_liver_disease(58, "Male", 8.5, 4.2, 5.5, 2.8, 0.9, 125, 98, 235)
print(result2)

In [None]:
# Create Gradio Interface
print("Creating Gradio interface...")

with gr.Blocks(
    title="Liver Disease Prediction System",
    theme=gr.themes.Default(primary_hue="red", secondary_hue="orange")
) as demo:
    
    gr.Markdown("""
    # üè• Liver Disease Risk Assessment System
    ## AutoML-Powered Clinical Decision Support
    
    This system uses machine learning to assess liver disease risk based on clinical laboratory parameters.
    **For educational and screening purposes only - always consult healthcare professionals for medical decisions.**
    """)
    
    with gr.Row():
        with gr.Column():
            gr.Markdown("### üë§ Patient Information")
            age = gr.Number(label="Age (years)", value=45, minimum=1, maximum=100)
            gender = gr.Dropdown(choices=["Male", "Female"], label="Gender", value="Male")
            
            gr.Markdown("### üß™ Bilirubin Panel")
            tot_bilirubin = gr.Number(label="Total Bilirubin (mg/dL)", value=1.2, minimum=0.1, info="Normal: 0.2-1.2")
            direct_bilirubin = gr.Number(label="Direct Bilirubin (mg/dL)", value=0.3, minimum=0.1, info="Normal: 0.0-0.3")
            
            gr.Markdown("### ü•© Protein Panel")
            tot_proteins = gr.Number(label="Total Proteins (g/dL)", value=7.0, minimum=1.0, info="Normal: 6.0-8.3")
            albumin = gr.Number(label="Albumin (g/dL)", value=4.0, minimum=0.5, info="Normal: 3.5-5.0")
            ag_ratio = gr.Number(label="A/G Ratio", value=1.5, minimum=0.1, info="Normal: 1.1-2.5")
        
        with gr.Column():
            gr.Markdown("### üß¨ Liver Enzyme Panel")
            sgpt = gr.Number(label="SGPT/ALT (U/L)", value=35, minimum=1, info="Normal: 7-56")
            sgot = gr.Number(label="SGOT/AST (U/L)", value=32, minimum=1, info="Normal: 10-40")
            alkphos = gr.Number(label="Alkaline Phosphatase (U/L)", value=95, minimum=1, info="Normal: 44-147")
            
            gr.Markdown("### üéØ Analysis")
            predict_btn = gr.Button("üî¨ Analyze Liver Health Risk", variant="primary", size="lg")
            clear_btn = gr.Button("üîÑ Clear Fields", variant="secondary")
    
    # Results
    result_output = gr.Textbox(
        label="üìä Risk Assessment Report",
        lines=15,
        show_copy_button=True,
        placeholder="Enter patient data and click 'Analyze Liver Health Risk' to see results..."
    )
    
    # Example cases
    gr.Markdown("### üìã Example Test Cases")
    gr.Examples(
        examples=[
            [25, "Female", 0.7, 0.1, 7.8, 4.6, 1.9, 18, 22, 68],  # Healthy
            [62, "Male", 4.5, 2.1, 6.5, 3.2, 1.0, 78, 65, 178],   # Moderate Risk
            [58, "Female", 8.2, 4.0, 5.8, 2.9, 0.9, 125, 98, 235] # High Risk
        ],
        inputs=[age, gender, tot_bilirubin, direct_bilirubin, tot_proteins, 
               albumin, ag_ratio, sgpt, sgot, alkphos],
        label="Select a test case:"
    )
    
    # Functions
    def clear_all():
        return [45, "Male", 1.2, 0.3, 7.0, 4.0, 1.5, 35, 32, 95, ""]
    
    # Connect buttons
    predict_btn.click(
        fn=predict_liver_disease,
        inputs=[age, gender, tot_bilirubin, direct_bilirubin, tot_proteins,
                albumin, ag_ratio, sgpt, sgot, alkphos],
        outputs=result_output
    )
    
    clear_btn.click(
        fn=clear_all,
        outputs=[age, gender, tot_bilirubin, direct_bilirubin, tot_proteins,
                albumin, ag_ratio, sgpt, sgot, alkphos, result_output]
    )
    
    gr.Markdown("""
    ---
    ### ‚ö†Ô∏è Medical Disclaimer
    This tool is for educational purposes only. Always consult healthcare professionals for medical diagnosis.
    
    ### üìà Model Info
    - Dataset: Indian Liver Patient Dataset (583 patients)
    - Algorithm: FLAML AutoML
    - Performance: ~87% accuracy
    """)

# Launch interface
print("Launching Gradio interface...")
demo.launch(share=True, debug=True)