In [17]:
import pandas as pd 
from sklearn.model_selection import train_test_split, StratifiedKFold, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score,
    f1_score, roc_auc_score, confusion_matrix
)


In [18]:
#load cleaned datasets
kidney_df = pd.read_csv("cleaned_kidney.csv")
liver_df = pd.read_csv("cleaned_liver.csv")
parkinsons_df = pd.read_csv("cleaned_parkinsons.csv")

In [19]:
# Drop non-predictive identifier column
if "name" in parkinsons_df.columns:
    parkinsons_df = parkinsons_df.drop(columns=["name"])

In [20]:
kidney_df = kidney_df.drop(columns=["id"])

In [21]:
from sklearn.pipeline import Pipeline

def train_evaluate_model(df, target_col, model_name="Disease"):
    X = df.drop(columns=[target_col])
    y = df[target_col]

    # Train-test split
    X_train, X_test, y_train, y_test = train_test_split(
        X, y,
        test_size=0.2,
        random_state=42,
        stratify=y
    )

    # Pipeline (NO leakage)
    pipe = Pipeline([
        ("scaler", StandardScaler()),
        ("model", LogisticRegression(max_iter=1000))
    ])

    # Train
    pipe.fit(X_train, y_train)

    # Predict
    y_pred = pipe.predict(X_test)
    y_prob = pipe.predict_proba(X_test)[:, 1]

    print(f"\nðŸ“Š {model_name} Model Evaluation")
    print("Accuracy :", accuracy_score(y_test, y_pred))
    print("Precision:", precision_score(y_test, y_pred))
    print("Recall   :", recall_score(y_test, y_pred))
    print("F1 Score :", f1_score(y_test, y_pred))
    print("ROC-AUC  :", roc_auc_score(y_test, y_prob))
    print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

    # Proper Cross-Validation
    cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
    cv_scores = cross_val_score(
        pipe,
        X,
        y,
        cv=cv,
        scoring="roc_auc"
    )

    print("Cross-validation ROC-AUC:", cv_scores.mean())

    return pipe


In [22]:
parkinsons_model = train_evaluate_model(
    parkinsons_df,
    target_col="status",
    model_name="Parkinson's"
)



ðŸ“Š Parkinson's Model Evaluation
Accuracy : 0.9230769230769231
Precision: 0.9333333333333333
Recall   : 0.9655172413793104
F1 Score : 0.9491525423728814
ROC-AUC  : 0.9241379310344828
Confusion Matrix:
 [[ 8  2]
 [ 1 28]]
Cross-validation ROC-AUC: 0.904955300127714


In [23]:
kidney_model = train_evaluate_model(
    kidney_df,
    target_col="classification",
    model_name="Kidney Disease"
)
kidney_df


ðŸ“Š Kidney Disease Model Evaluation
Accuracy : 0.9875
Precision: 0.967741935483871
Recall   : 1.0
F1 Score : 0.9836065573770492
ROC-AUC  : 0.9986666666666667
Confusion Matrix:
 [[49  1]
 [ 0 30]]
Cross-validation ROC-AUC: 0.9998666666666667


Unnamed: 0,age,bp,sg,al,su,rbc,pc,pcc,ba,bgr,...,pcv,wc,rc,htn,dm,cad,appet,pe,ane,classification
0,48.0,80.0,1.020,1.0,0.0,1,1,0,0,121.000000,...,44.0,7800.0,5.200000,1,1,0,0,0,0,0
1,7.0,50.0,1.020,4.0,0.0,1,1,0,0,148.036517,...,38.0,6000.0,4.707435,0,0,0,0,0,0,0
2,62.0,80.0,1.010,2.0,3.0,1,1,0,0,423.000000,...,31.0,7500.0,4.707435,0,1,0,1,0,1,0
3,48.0,70.0,1.005,4.0,0.0,1,0,1,0,117.000000,...,32.0,6700.0,3.900000,1,0,0,1,1,1,0
4,51.0,80.0,1.010,2.0,0.0,1,1,0,0,106.000000,...,35.0,7300.0,4.600000,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
395,55.0,80.0,1.020,0.0,0.0,1,1,0,0,140.000000,...,47.0,6700.0,4.900000,0,0,0,0,0,0,1
396,42.0,70.0,1.025,0.0,0.0,1,1,0,0,75.000000,...,54.0,7800.0,6.200000,0,0,0,0,0,0,1
397,12.0,80.0,1.020,0.0,0.0,1,1,0,0,100.000000,...,49.0,6600.0,5.400000,0,0,0,0,0,0,1
398,17.0,60.0,1.025,0.0,0.0,1,1,0,0,114.000000,...,51.0,7200.0,5.900000,0,0,0,0,0,0,1


In [24]:
liver_model = train_evaluate_model(
    liver_df,
    target_col="Dataset",
    model_name="Liver Disease"
)


ðŸ“Š Liver Disease Model Evaluation
Accuracy : 0.6923076923076923
Precision: 0.7281553398058253
Recall   : 0.9036144578313253
F1 Score : 0.8064516129032258
ROC-AUC  : 0.7388377037562013
Confusion Matrix:
 [[75  8]
 [28  6]]
Cross-validation ROC-AUC: 0.7542101177820526


In [25]:
#pickle my models
import pickle
with open("kidney_model.pkl", "wb") as f:
    pickle.dump(kidney_model, f)
with open("liver_model.pkl", "wb") as f:
    pickle.dump(liver_model, f) 
with open("parkinsons_model.pkl", "wb") as f:
    pickle.dump(parkinsons_model, f)