In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import TensorBoard, CSVLogger
from joblib import dump
import datetime
import os


In [2]:
log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
os.makedirs(log_dir, exist_ok=True)


In [3]:
dataset = "pima-indians-diabetes.data.csv"
column_names = ['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 
                'Insulin', 'BMI', 'DiabetesPedigreeFunction', 'Age', 'Outcome']

df = pd.read_csv(dataset, names=column_names)
df.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [4]:
X = df.drop('Outcome', axis=1)
y = df['Outcome']

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [7]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
dump(scaler, 'core/scaler.joblib')

['core/scaler.joblib']

In [8]:
model = Sequential([
    Dense(16, activation='relu', input_shape=(8,)),
    Dense(8, activation='relu'),
    Dense(1, activation='sigmoid')
])


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [9]:
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])


In [10]:
callbacks = [
    TensorBoard(
        log_dir=log_dir,
        histogram_freq=5,
        write_graph=True,
        write_images=True,
        update_freq='epoch',
        profile_batch=2
    ),
    CSVLogger(f'{log_dir}/training_log.csv')
]

In [11]:
history = model.fit(X_train, y_train,
                    epochs=150,
                    batch_size=16,
                    callbacks=callbacks,
                    validation_data=(X_test, y_test),
                    verbose=1)


Epoch 1/150
[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 20ms/step - accuracy: 0.4473 - loss: 0.7339 - val_accuracy: 0.6623 - val_loss: 0.6761
Epoch 2/150
[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.6289 - loss: 0.6669 - val_accuracy: 0.7208 - val_loss: 0.6379
Epoch 3/150
[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.6851 - loss: 0.6260 - val_accuracy: 0.7208 - val_loss: 0.6026
Epoch 4/150
[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.6953 - loss: 0.5846 - val_accuracy: 0.7532 - val_loss: 0.5726
Epoch 5/150
[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.7107 - loss: 0.5474 - val_accuracy: 0.7532 - val_loss: 0.5465
Epoch 6/150
[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.7208 - loss: 0.5199 - val_accuracy: 0.7727 - val_loss: 0.5293
Epoch 7/150
[1m39/39[0m [32m━━

In [12]:
loss, accuracy = model.evaluate(X_test, y_test, verbose=0)
print(f"\nTest Accuracy: {accuracy:.2f}")
print(f"Test Loss: {loss:.2f}")



Test Accuracy: 0.70
Test Loss: 0.65


In [13]:
sample_data = np.array([[1, 85, 66, 29, 0, 26.6, 0.351, 31]])
sample_scaled = scaler.transform(sample_data)
prediction = model.predict(sample_scaled)
print(f"\nPrediction probability: {prediction[0][0]:.2f}")
print(f"Predicted class: {round(prediction[0][0])} (0 = No diabetes, 1 = Diabetes)")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 93ms/step

Prediction probability: 1.00
Predicted class: 1 (0 = No diabetes, 1 = Diabetes)


In [14]:
model.save('core/diabetes_model.h5')



tf.Tensor(
[[1. 2. 3.]
 [4. 5. 6.]], shape=(2, 3), dtype=float32)
