In [1]:
pip install scikit-learn


Note: you may need to restart the kernel to use updated packages.


In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report
import joblib

df = pd.read_csv("parkinsons - parkinsons.csv")

df = df.drop(columns=["name"], errors="ignore")



X = df.drop(["status"], axis=1)
y = df["status"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


model = SVC(kernel="rbf", probability=True)
model.fit(X_train_scaled, y_train)

y_pred = model.predict(X_test_scaled)
acc = accuracy_score(y_test, y_pred)

print(f"Parkinsons Accuracy: {acc:.2f}")
print(classification_report(y_test, y_pred))


joblib.dump(model, "parkinsons_model.joblib")
joblib.dump(scaler, "parkinsons_scaler.joblib")

print("Parkinsons model saved successfully!")


Parkinsons Accuracy: 0.90
              precision    recall  f1-score   support

           0       1.00      0.43      0.60         7
           1       0.89      1.00      0.94        32

    accuracy                           0.90        39
   macro avg       0.94      0.71      0.77        39
weighted avg       0.91      0.90      0.88        39

Parkinsons model saved successfully!


In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
import joblib

df = pd.read_csv("kidney_disease - kidney_disease.csv")

df.columns = df.columns.str.strip()

df = df.dropna()

X = df.drop("classification", axis=1)
y = df["classification"].replace({"ckd": 1, "notckd": 0})

cat_cols = X.select_dtypes(include=["object"]).columns
num_cols = X.select_dtypes(exclude=["object"]).columns


preprocessor = ColumnTransformer([
    ("cat", OneHotEncoder(handle_unknown="ignore"), cat_cols),
    ("num", StandardScaler(), num_cols)
])

model = RandomForestClassifier(n_estimators=100, random_state=42)


pipeline = Pipeline([
    ("prep", preprocessor),
    ("clf", model)
])


X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

pipeline.fit(X_train, y_train)


y_pred = pipeline.predict(X_test)
acc = accuracy_score(y_test, y_pred)

print(f"Kidney Disease Accuracy: {acc:.2f}")
print(classification_report(y_test, y_pred))

joblib.dump(pipeline, "kidney_model.joblib")
print("Kidney model saved successfully!")

  y = df["classification"].replace({"ckd": 1, "notckd": 0})


Kidney Disease Accuracy: 1.00
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        23
           1       1.00      1.00      1.00         9

    accuracy                           1.00        32
   macro avg       1.00      1.00      1.00        32
weighted avg       1.00      1.00      1.00        32

Kidney model saved successfully!


In [5]:
pip install xgboost

Note: you may need to restart the kernel to use updated packages.


In [6]:
pip install imbalanced-learn

Note: you may need to restart the kernel to use updated packages.


In [7]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.metrics import accuracy_score, classification_report
from xgboost import XGBClassifier
from imblearn.over_sampling import SMOTE
import joblib


df = pd.read_csv("indian_liver_patient - indian_liver_patient.csv")
df = df.dropna()

df.rename(columns={"Dataset": "disease"}, inplace=True)
df["disease"] = df["disease"].replace({2: 0, 1: 1})

X = df.drop("disease", axis=1)
y = df["disease"]

cat_cols = ["Gender"]
num_cols = X.columns.drop(cat_cols)

preprocessor = ColumnTransformer([
    ("cat", OneHotEncoder(drop="first"), cat_cols),
    ("num", StandardScaler(), num_cols)
])

X_pre = preprocessor.fit_transform(X)

sm = SMOTE(random_state=42)
X_resampled, y_resampled = sm.fit_resample(X_pre, y)


X_train, X_test, y_train, y_test = train_test_split(
    X_resampled, y_resampled, test_size=0.25, random_state=42
)

pos_weight = (y_train == 0).sum() / (y_train == 1).sum()
print("scale_pos_weight:", pos_weight)


model = XGBClassifier(
    n_estimators=320,
    max_depth=4,
    learning_rate=0.8,
    subsample=0.8,
    colsample_bytree=0.9,
    eval_metric="logloss",
    scale_pos_weight=pos_weight,

    random_state=42
)

model.fit(X_train, y_train)

y_pred = model.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

joblib.dump(model, "liver_xgb_recall_model.joblib")
joblib.dump(preprocessor, "liver_preprocessor.joblib")

print("Recall-optimized model saved!")


scale_pos_weight: 0.9528301886792453
Accuracy: 0.8454106280193237
              precision    recall  f1-score   support

           0       0.84      0.88      0.86       111
           1       0.86      0.80      0.83        96

    accuracy                           0.85       207
   macro avg       0.85      0.84      0.84       207
weighted avg       0.85      0.85      0.84       207

Recall-optimized model saved!
