In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler, LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
import tensorflow as tf




In [None]:
# 1. Load Data
try:
    df_diabetes = pd.read_csv('diabetes_data.csv')
    print("Diabetes data loaded successfully.")
except FileNotFoundError:
    print("File diabetes_data.csv not found.")
except Exception as e:
    print(f"An error occurred: {e}")


Diabetes data loaded successfully.


In [None]:
# 2. EDA
print(df_diabetes.head())
print(df_diabetes.info())
print(df_diabetes.isnull().sum()) 
print(df_diabetes['Diabetes'].value_counts()) 

    Age  Sex  HighChol  CholCheck   BMI  Smoker  HeartDiseaseorAttack  \
0   4.0  1.0       0.0        1.0  26.0     0.0                   0.0   
1  12.0  1.0       1.0        1.0  26.0     1.0                   0.0   
2  13.0  1.0       0.0        1.0  26.0     0.0                   0.0   
3  11.0  1.0       1.0        1.0  28.0     1.0                   0.0   
4   8.0  0.0       0.0        1.0  29.0     1.0                   0.0   

   PhysActivity  Fruits  Veggies  HvyAlcoholConsump  GenHlth  MentHlth  \
0           1.0     0.0      1.0                0.0      3.0       5.0   
1           0.0     1.0      0.0                0.0      3.0       0.0   
2           1.0     1.0      1.0                0.0      1.0       0.0   
3           1.0     1.0      1.0                0.0      3.0       0.0   
4           1.0     1.0      1.0                0.0      2.0       0.0   

   PhysHlth  DiffWalk  Stroke  HighBP  Diabetes  
0      30.0       0.0     0.0     1.0       0.0  
1       0.0     

In [None]:
# preprocessing

X_diabetes = df_diabetes.drop('Diabetes', axis=1)
y_diabetes = df_diabetes['Diabetes']

numerical_cols_diabetes = ['Age', 'BMI', 'GenHlth', 'MentHlth', 'PhysHlth']
binary_cols_diabetes = [col for col in X_diabetes.columns if col not in numerical_cols_diabetes]

for col in numerical_cols_diabetes:
    if X_diabetes[col].isnull().any():
        X_diabetes[col] = X_diabetes[col].fillna(X_diabetes[col].median())

scaler_diabetes = StandardScaler() 
X_diabetes[numerical_cols_diabetes] = scaler_diabetes.fit_transform(X_diabetes[numerical_cols_diabetes])

X_train_diabetes, X_test_diabetes, y_train_diabetes, y_test_diabetes = train_test_split(
    X_diabetes, y_diabetes, test_size=0.2, random_state=42, stratify=y_diabetes
)

print(f"Diabetes X_train shape: {X_train_diabetes.shape}")
print(f"Diabetes X_test shape: {X_test_diabetes.shape}")

Diabetes X_train shape: (56553, 17)
Diabetes X_test shape: (14139, 17)


In [None]:
#4. membaut model
model_diabetes = Sequential([
    Dense(128, activation='relu', input_shape=(X_train_diabetes.shape[1],)),
    Dropout(0.3),
    Dense(64, activation='relu'),
    Dropout(0.3),
    Dense(32, activation='relu'),
    Dense(1, activation='sigmoid') 
])

model_diabetes.compile(optimizer=Adam(learning_rate=0.001),
                       loss='binary_crossentropy',
                       metrics=['accuracy'])

print("\nDiabetes Model Summary:")
model_diabetes.summary()

print("\nTraining Diabetes Model...")
history_diabetes = model_diabetes.fit(
    X_train_diabetes, y_train_diabetes,
    epochs=50, 
    batch_size=32,
    validation_split=0.1,
    verbose=0 
)

print(f"Diabetes Training Accuracy: {history_diabetes.history['accuracy'][-1]:.4f}")
print(f"Diabetes Validation Accuracy: {history_diabetes.history['val_accuracy'][-1]:.4f}")



Diabetes Model Summary:
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 128)               2304      
                                                                 
 dropout (Dropout)           (None, 128)               0         
                                                                 
 dense_1 (Dense)             (None, 64)                8256      
                                                                 
 dropout_1 (Dropout)         (None, 64)                0         
                                                                 
 dense_2 (Dense)             (None, 32)                2080      
                                                                 
 dense_3 (Dense)             (None, 1)                 33        
                                                                 
Total params: 12673 (49.50 KB)

In [None]:
# 7. Evaluasi
loss_diabetes, accuracy_diabetes = model_diabetes.evaluate(X_test_diabetes, y_test_diabetes, verbose=0)
print(f"\nDiabetes Test Loss: {loss_diabetes:.4f}")
print(f"Diabetes Test Accuracy: {accuracy_diabetes:.4f}")


Diabetes Test Loss: 0.5068
Diabetes Test Accuracy: 0.7462


In [None]:
#8 menyimpan model
import os
model_dir_diabetes = './saved_models/diabetes_model'
os.makedirs(model_dir_diabetes, exist_ok=True)
model_diabetes.save(model_dir_diabetes)
print(f"Diabetes model saved to {model_dir_diabetes}")

import joblib
scaler_diabetes_path = './saved_models/diabetes_scaler.pkl'
joblib.dump(scaler_diabetes, scaler_diabetes_path)
print(f"Diabetes scaler saved to {scaler_diabetes_path}")

INFO:tensorflow:Assets written to: ./saved_models/diabetes_model\assets


INFO:tensorflow:Assets written to: ./saved_models/diabetes_model\assets


Diabetes model saved to ./saved_models/diabetes_model
Diabetes scaler saved to ./saved_models/diabetes_scaler.pkl
