In [1]:
from cruasan.load_data.load_data import load_data
from cruasan.data_cleanup.data_cleanup import fill_mean, drop_nan
from cruasan.features.features import gen_bin, gen_dummy
from cruasan.split_data.split_data import split_data
from cruasan.train_model.train_model import train_model, pred
from cruasan.score.score import roc_score

In [2]:
#Load data
df = load_data('sample_diabetes_mellitus_data.csv')

In [3]:
#Drop NaN
df = drop_nan(['age', 'gender', 'ethnicity'], df)

#Fill mean
df = fill_mean(df, 'height')
df = fill_mean(df, 'weight')

In [4]:
df.isnull().sum()

encounter_id                      0
hospital_id                       0
age                               0
bmi                            1622
elective_surgery                  0
ethnicity                         0
gender                            0
height                            0
hospital_admit_source           850
icu_admit_source                 21
icu_id                            0
icu_stay_type                     0
icu_type                          0
pre_icu_los_days                  0
readmission_status                0
weight                            0
albumin_apache                 6046
apache_2_diagnosis              177
apache_3j_diagnosis              49
apache_post_operative             0
arf_apache                        0
bilirubin_apache               6170
bun_apache                     2372
creatinine_apache              2247
fio2_apache                    6583
gcs_eyes_apache                  65
gcs_motor_apache                 65
gcs_unable_apache           

In [5]:
#Generate dummies
df = gen_dummy(df,['ethnicity'])

#Generate binary
df= gen_bin(df,['gender'])

In [6]:
#Split data
y = df['diabetes_mellitus'].reset_index(drop=True)
X = df.iloc[:len(y), :]

X_train, X_test, y_train, y_test = split_data(X,y)

In [7]:
#Train model
cols = ['age','height','weight','aids','cirrhosis','hepatic_failure','immunosuppression','leukemia','lymphoma','solid_tumor_with_metastasis']
X_train, y_train = train_model(X_train[cols], y_train)

In [8]:
#Predict
X_test = X_test[cols]
X_train, X_test, train_pred, test_pred = pred(X_train, X_test)

In [9]:
#Train and test roc_auc
score_train, score_test = roc_score(train_pred, y_train,test_pred, y_test)

In [10]:
score_train

0.6702434388471388

In [11]:
score_test

0.6731288040028379