In [None]:
import pandas as pd
from sklearn.metrics import roc_auc_score

from src.data_loader import DataLoader
from src.preprocessor_dropna import DropNaPreprocessor
from src.preprocessor_fillmean import FillMeanPreprocessor
from src.features import BMIFeature, AgeGroupFeature
from src.model import Model


: 

In [None]:
#load and split data
loader = DataLoader("data/sample_diabetes_mellitus_data.csv")
train_df, test_df = loader.split_data()

print("Train shape:", train_df.shape)
print("Test shape:", test_df.shape)


In [None]:

dropper = DropNaPreprocessor()
train_df = dropper.process(train_df)
test_df = dropper.process(test_df)

filler = FillMeanPreprocessor()
train_df = filler.process(train_df)
test_df = filler.process(test_df)


In [None]:
# create features
bmi_feat = BMIFeature()
age_feat = AgeGroupFeature()

train_df = bmi_feat.transform(train_df)
train_df = age_feat.transform(train_df)

test_df = bmi_feat.transform(test_df)
test_df = age_feat.transform(test_df)

print(train_df[["height", "weight", "BMI", "age", "age_group"]].head())


In [None]:
#train and predict
feature_cols = ["BMI", "age"]
target_col = "diabetes"

model = Model(feature_cols=feature_cols, target_col=target_col, max_iter=1000)
model.train(train_df)

test_df["predictions"] = model.predict(test_df)
print(test_df[["BMI", "age", "predictions"]].head())

#ROC-AUC
auc = roc_auc_score(test_df[target_col], test_df["predictions"])
print("ROC-AUC score:", auc)

