In [12]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score

# Load and prepare the dataset
df = pd.read_csv(r"C:\Users\Dhrumil\Desktop\diabetes-classifier\Backend\diabetes_disease_dataset_labeled.csv")
df.fillna(method='ffill', inplace=True)

# Label encoding
le_type = LabelEncoder()
df['Diabetes_Type_Label'] = le_type.fit_transform(df['Diabetes_Type_Label'])

le_heart = LabelEncoder()
le_kidney = LabelEncoder()
le_nerve = LabelEncoder()
le_eye = LabelEncoder()
le_complication = LabelEncoder()

df['Heart_Disease_'] = le_heart.fit_transform(df['Heart_Disease_Risk'])
df['Kidney_Issues'] = le_kidney.fit_transform(df['Kidney_Issues'])
df['Nerve_Damage'] = le_nerve.fit_transform(df['Nerve_Damage'])
df['Eye_Problems'] = le_eye.fit_transform(df['Eye_Problems'])
df['Diabetes_Complications'] = le_complication.fit_transform(df['Diabetes_Complications'])

# Drop unneeded columns
df.drop(columns=["Patient_ID"], inplace=True, errors="ignore")

# Feature columns
features = ['Age', 'BMI', 'Fasting_Glucose', 'HbA1c', 'C_Peptide', 'Insulin_Level', 'Autoantibody_Presence']

# Train models
X = df[features]

# Type model
y_type = df['Diabetes_Type_Label']
X_train, X_test, y_train, y_test = train_test_split(X, y_type, test_size=0.2, random_state=42)
type_model = XGBClassifier(n_estimators=50, max_depth=3, learning_rate=0.1, use_label_encoder=False, eval_metric='mlogloss')
type_model.fit(X_train, y_train)
type_acc = accuracy_score(y_test, type_model.predict(X_test))

# Complication stage model
y_comp = df['Diabetes_Complications']
comp_model = XGBClassifier(n_estimators=50, max_depth=3, learning_rate=0.1, use_label_encoder=False, eval_metric='mlogloss')
comp_model.fit(X, y_comp)
comp_acc = accuracy_score(y_comp, comp_model.predict(X))

# Sub complication models
models = {}
encoders = {
    "Heart_Disease_": le_heart,
    "Kidney_Issues": le_kidney,
    "Nerve_Damage": le_nerve,
    "Eye_Problems": le_eye
}
for col, encoder in encoders.items():
    y = df[col]
    model = XGBClassifier(n_estimators=50, max_depth=3, learning_rate=0.1, use_label_encoder=False, eval_metric='mlogloss')
    model.fit(X, y)
    models[col] = model

# Sample test case with strong signs of complications
sample_input = pd.DataFrame([{
    "Age": 60,
    "BMI": 33.0,
    "Fasting_Glucose": 210,
    "HbA1c": 9.5,
    "C_Peptide": 0.5,
    "Insulin_Level": 5.0,
    "Autoantibody_Presence": 1
}])

# Make predictions
pred_type = type_model.predict(sample_input)[0]
type_label = le_type.inverse_transform([pred_type])[0]

comp_probs = comp_model.predict_proba(sample_input)[0]
comp_label = le_complication.inverse_transform([np.argmax(comp_probs)])[0]

# Sub complication prediction
main_concerns = []
damage_probs = []
for col, model in models.items():
    probs = model.predict_proba(sample_input)[0]
    max_idx = np.argmax(probs)
    label = encoders[col].inverse_transform([max_idx])[0]
    prob = probs[max_idx] * 100
    damage_probs.append(prob)

    if "yes" in label.lower() or "present" in label.lower():
        main_concerns.append(col.replace('_', ' '))
    elif "moderate" in label.lower():
        main_concerns.append(f"{col.replace('_', ' ')}: {label}")

overall_damage = np.mean(damage_probs)

# Print final result
print(f"Diabetes Type: {type_label}")
print(f"Current Stage: {comp_label}\n")
print("Main Concerns:")
if main_concerns:
    for concern in main_concerns:
        print(concern)
else:
    print("No major complications predicted.")
print(f"\nOverall Damage Probability: {overall_damage:.1f}%")
print(f"Model Accuracy (Type Classification): {type_acc * 100:.2f}%")
print(f"Model Accuracy (Complication Risk): {comp_acc * 100:.2f}%")


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Diabetes Type: Type 2 Diabetes
Current Stage: Controlled

Main Concerns:
No major complications predicted.

Overall Damage Probability: 65.6%
Model Accuracy (Type Classification): 57.50%
Model Accuracy (Complication Risk): 85.50%
