In [None]:
# Import required libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report
import pickle
import joblib

In [None]:
# Load the dataset
file_path = "Liver Patient Dataset (LPD)_train.csv"  # Update with the correct path
data = pd.read_csv(file_path, encoding='latin1')

In [None]:
# Clean column names
data.columns = data.columns.str.strip().str.replace(r'\s+', '_').str.replace('[^A-Za-z0-9_]', '')

In [None]:
# Check column names
print("Column names after cleaning:", data.columns)

Column names after cleaning: Index(['Age of the patient', 'Gender of the patient', 'Total Bilirubin',
       'Direct Bilirubin', 'Alkphos Alkaline Phosphotase',
       'Sgpt Alamine Aminotransferase', 'Sgot Aspartate Aminotransferase',
       'Total Protiens', 'ALB Albumin', 'A/G Ratio Albumin and Globulin Ratio',
       'Result'],
      dtype='object')


In [None]:
# Rename column if necessary
if 'Gender' in data.columns:  # Adjust based on actual column name
    data.rename(columns={"Gender": "Gender_of_the_patient"}, inplace=True)

In [None]:
# Handle missing values
for col in data.select_dtypes(include=['float64']).columns:
    data[col].fillna(data[col].median(), inplace=True)

for col in data.select_dtypes(include=['object']).columns:
    data[col].fillna(data[col].mode()[0], inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  data[col].fillna(data[col].median(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  data[col].fillna(data[col].mode()[0], inplace=True)


In [None]:
# Encode all categorical variables (object type)
label_encoder = LabelEncoder()


In [None]:
# Encode the 'Gender_of_the_patient' or any other categorical columns
for col in data.select_dtypes(include=['object']).columns:
    data[col] = label_encoder.fit_transform(data[col])

In [None]:
# Map target values to ensure 0 = Not a Liver Patient, 1 = Liver Patient
data['Result'] = data['Result'].map({2: 0, 1: 1})  # Correct mapping: 1 for Liver Patient, 0 for Not a Liver Patient

In [None]:
# Define features and target
X = data.drop('Result', axis=1)
y = data['Result']

In [None]:
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Identify numeric columns (exclude any categorical columns)
numeric_columns = X.select_dtypes(include=[np.number]).columns

In [None]:

# Scale only numeric columns
scaler = StandardScaler()

In [None]:
# Apply scaling only to numeric columns
X_train[numeric_columns] = scaler.fit_transform(X_train[numeric_columns])
X_test[numeric_columns] = scaler.transform(X_test[numeric_columns])

In [None]:
# Initialize models
models = {
    "Random Forest": RandomForestClassifier(random_state=42),
    # "KNN": KNeighborsClassifier(),
    # "Logistic Regression": LogisticRegression(random_state=42, max_iter=500),
    # "Naive Bayes": GaussianNB(),
    # "SVM": SVC(probability=True, random_state=42),
}

In [None]:
# Train and evaluate models
for name, model in models.items():
    print(f"\nTraining {name}...")
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    # Print accuracy and classification report
    accuracy = accuracy_score(y_test, y_pred)
    print(f"{name} Accuracy: {accuracy:.2f}")
    print(f"Classification Report for {name}:\n")
    print(classification_report(y_test, y_pred))

    # Save only the Random Forest model
    if name == "Random Forest":
        with open("Random_Forest.pkl", 'wb') as f:
            pickle.dump(model, f)
        joblib.dump(model, "Random_Forest.sav")


Training Random Forest...
Random Forest Accuracy: 1.00
Classification Report for Random Forest:

              precision    recall  f1-score   support

           0       1.00      0.99      1.00      1710
           1       1.00      1.00      1.00      4429

    accuracy                           1.00      6139
   macro avg       1.00      1.00      1.00      6139
weighted avg       1.00      1.00      1.00      6139



In [None]:
# Function for custom input prediction using Random Forest
def predict_custom_input(input_data):
    input_data_scaled = scaler.transform([input_data])
    prediction = models["Random Forest"].predict(input_data_scaled)[0]
    return "Liver Patient" if prediction == 1 else "Not a Liver Patient"

In [None]:
# Test custom input
custom_input = [70,0,0.7,0.2,237,18,28,5.8,2.5,0.75]  # Example input
result = predict_custom_input(custom_input)
print("\nCustom Input Prediction Result:")
print(result)


Custom Input Prediction Result:
Not a Liver Patient




In [None]:
import pickle
import numpy as np

In [None]:
# Load the saved Random Forest model
with open('Random_Forest.pkl', 'rb') as file:
    model = pickle.load(file)

In [None]:
# Define a function to predict liver disease using custom input values
def predict_liver_disease(custom_input):
    """
    Predict liver disease based on custom input values.

    Parameters:
        custom_input (list): A list of values corresponding to the model features.

    Returns:
        str: Prediction result ("Liver Patient" or "Not a Liver Patient").
    """
    # Ensure custom input is a 2D array for the model
    custom_input_array = np.array(custom_input).reshape(1, -1)

    # Make prediction
    prediction = model.predict(custom_input_array)

    # Map prediction to a human-readable format
    if prediction[0] == 1:
        return "Liver Patient"
    else:
        return "Not a Liver Patient"

# Example custom input
# Replace the following values with your actual test case
# [Age, Gender (0: Female, 1: Male), Total Bilirubin, Direct Bilirubin, Alkaline Phosphotase,
# Alamine Aminotransferase, Aspartate Aminotransferase, Total Proteins, Albumin, Albumin/Globulin Ratio]
custom_input = [60,1,0.6,0.1,186,0,20,6.2,3.3,1.1]
# Apply scaling to custom input
custom_input_scaled = scaler.transform([custom_input])
# prediction = model.predict(custom_input_scaled)

# Get the prediction result
result = predict_liver_disease(custom_input_scaled)

# Print the result
print("Prediction Result:", result)

Prediction Result: Not a Liver Patient




In [None]:
from sklearn.preprocessing import StandardScaler
import pickle

# Save scaler
with open('scaler.pkl', 'wb') as scaler_file:
    pickle.dump(scaler, scaler_file)

# Load scaler
with open('scaler.pkl', 'rb') as scaler_file:
    scaler = pickle.load(scaler_file)

# Apply scaling to custom input
custom_input_scaled = scaler.transform([custom_input])
prediction = model.predict(custom_input_scaled)




In [None]:
# Apply scaling to custom input
custom_input_scaled = scaler.transform([custom_input])
prediction = model.predict(custom_input_scaled)