In [43]:
# diabetes_training.py
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, roc_auc_score
from imblearn.over_sampling import SMOTE
import joblib

# Load dataset
df = pd.read_csv("diabetes.csv")

# Replace 0s with NaN for certain columns
cols = ["Glucose", "BloodPressure", "SkinThickness", "Insulin", "BMI"]
df[cols] = df[cols].replace(0, pd.NA)
df.fillna(df.median(), inplace=True)

# Features & target
X = df.drop("Outcome", axis=1)
y = df["Outcome"]

# Balance classes
smote = SMOTE()
X, y = smote.fit_resample(X, y)

# Split & scale
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Train model
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

# Evaluate
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred))
print("ROC-AUC:", roc_auc_score(y_test, y_pred))

# Save
joblib.dump(model, "diabetes_model.pkl")
joblib.dump(scaler, "diabetes_scaler.pkl")


  df.fillna(df.median(), inplace=True)


              precision    recall  f1-score   support

           0       0.83      0.75      0.79        99
           1       0.77      0.85      0.81       101

    accuracy                           0.80       200
   macro avg       0.80      0.80      0.80       200
weighted avg       0.80      0.80      0.80       200

ROC-AUC: 0.7994799479947994


['diabetes_scaler.pkl']

**Heart Disease Prediction**

In [44]:
# heart_training.py
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, roc_auc_score
from imblearn.over_sampling import SMOTE
import joblib

df = pd.read_csv("heart.csv")

X = df.drop("target", axis=1)
y = df["target"]

# Balance classes
smote = SMOTE()
X, y = smote.fit_resample(X, y)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred))
print("ROC-AUC:", roc_auc_score(y_test, y_pred))

joblib.dump(model, "heart_model.pkl")
joblib.dump(scaler, "heart_scaler.pkl")


              precision    recall  f1-score   support

           0       1.00      1.00      1.00        94
           1       1.00      1.00      1.00       117

    accuracy                           1.00       211
   macro avg       1.00      1.00      1.00       211
weighted avg       1.00      1.00      1.00       211

ROC-AUC: 1.0


['heart_scaler.pkl']

**Parkinson’s Disease Prediction**

In [45]:
# parkinson_training.py
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, roc_auc_score
import joblib

df = pd.read_csv("parkinsons.csv")

X = df.drop(["name","status"], axis=1)
y = df["status"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred))
print("ROC-AUC:", roc_auc_score(y_test, y_pred))

joblib.dump(model, "parkinson_model.pkl")
joblib.dump(scaler, "parkinson_scaler.pkl")


              precision    recall  f1-score   support

           0       1.00      0.71      0.83         7
           1       0.94      1.00      0.97        32

    accuracy                           0.95        39
   macro avg       0.97      0.86      0.90        39
weighted avg       0.95      0.95      0.95        39

ROC-AUC: 0.8571428571428572


['parkinson_scaler.pkl']

**Breast Cancer Prediction**

In [46]:
# breast_cancer_training.py
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, roc_auc_score
import joblib

df = pd.read_csv("breast_cancer.csv")

X = df.drop("diagnosis", axis=1)
y = df["diagnosis"].map({"M":1,"B":0})  # Malignant=1, Benign=0

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred))
print("ROC-AUC:", roc_auc_score(y_test, y_pred))

joblib.dump(model, "breast_model.pkl")
joblib.dump(scaler, "breast_scaler.pkl")


  updated_mean = (last_sum + new_sum) / updated_sample_count
  T = new_sum / new_sample_count
  new_unnormalized_variance -= correction**2 / new_sample_count


              precision    recall  f1-score   support

           0       0.96      0.97      0.97        71
           1       0.95      0.93      0.94        43

    accuracy                           0.96       114
   macro avg       0.96      0.95      0.95       114
weighted avg       0.96      0.96      0.96       114

ROC-AUC: 0.9510317720275139


['breast_scaler.pkl']

**Chronic Kidney Disease (CKD) Prediction**

In [47]:
# ckd_training_full_safe.py
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, label_binarize
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, roc_auc_score
from imblearn.over_sampling import SMOTE
import joblib

# Load dataset
df = pd.read_csv("ckd.csv")

# Print columns to identify target
print("Columns:", df.columns)

# Encode yes/no to 1/0
df.replace({"yes": 1, "no": 0}, inplace=True)

# Fill numeric columns with median
numeric_cols = df.select_dtypes(include=['int64', 'float64']).columns
df[numeric_cols] = df[numeric_cols].fillna(df[numeric_cols].median())

# Fill categorical columns with mode
categorical_cols = df.select_dtypes(include=['object']).columns
for col in categorical_cols:
    df[col] = df[col].fillna(df[col].mode()[0])

# Encode categorical columns as numbers
for col in categorical_cols:
    df[col] = pd.factorize(df[col])[0]

# Set target column (update if your dataset uses a different name)
target_col = "classification"  # replace with your actual target column
X = df.drop(target_col, axis=1)
y = df[target_col]

# Check class distribution
class_counts = y.value_counts()
print("Class distribution:\n", class_counts)

# Apply SMOTE if minority class has enough samples
minority_class_count = class_counts.min()
if minority_class_count > 1:
    k_neighbors = min(5, minority_class_count - 1)
    smote = SMOTE(k_neighbors=k_neighbors, random_state=42)
    X, y = smote.fit_resample(X, y)
    print("SMOTE applied.")
else:
    print("SMOTE skipped due to very small minority class.")

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Train Random Forest with balanced class weights
model = RandomForestClassifier(class_weight='balanced', random_state=42)
model.fit(X_train, y_train)

# Predict
y_pred = model.predict(X_test)

# Classification report
print(classification_report(y_test, y_pred))

# ROC-AUC handling multi-class
if len(y.unique()) == 2:
    auc = roc_auc_score(y_test, y_pred)
else:
    # Convert to one-hot for multi-class ROC-AUC
    y_test_bin = label_binarize(y_test, classes=list(y.unique()))
    y_pred_bin = label_binarize(y_pred, classes=list(y.unique()))
    auc = roc_auc_score(y_test_bin, y_pred_bin, multi_class='ovr')
print("ROC-AUC:", auc)

# Save model and scaler
joblib.dump(model, "models/ckd_model.pkl")
joblib.dump(scaler, "ckd_scaler.pkl")


Columns: Index(['id', 'age', 'bp', 'sg', 'al', 'su', 'rbc', 'pc', 'pcc', 'ba', 'bgr',
       'bu', 'sc', 'sod', 'pot', 'hemo', 'pcv', 'wc', 'rc', 'htn', 'dm', 'cad',
       'appet', 'pe', 'ane', 'classification'],
      dtype='object')
Class distribution:
 classification
0    248
2    150
1      2
Name: count, dtype: int64
SMOTE applied.


  df.replace({"yes": 1, "no": 0}, inplace=True)


              precision    recall  f1-score   support

           0       1.00      1.00      1.00        54
           1       1.00      1.00      1.00        48
           2       1.00      1.00      1.00        47

    accuracy                           1.00       149
   macro avg       1.00      1.00      1.00       149
weighted avg       1.00      1.00      1.00       149

ROC-AUC: 1.0


['ckd_scaler.pkl']