In [1]:
from typing import List, Dict

import pandas as pd

import tensorflow as tf

import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

import os
import datetime

2024-12-18 16:47:48.634372: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-12-18 16:47:48.747967: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
data = pd.read_csv("./data/diabetes.csv")

In [3]:
X = data.drop('Outcome', axis = 1)
y = data['Outcome']

In [4]:
X.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age
0,6,148,72,35,0,33.6,0.627,50
1,1,85,66,29,0,26.6,0.351,31
2,8,183,64,0,0,23.3,0.672,32
3,1,89,66,23,94,28.1,0.167,21
4,0,137,40,35,168,43.1,2.288,33


In [5]:
y.head()

0    1
1    0
2    1
3    0
4    1
Name: Outcome, dtype: int64

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 0)

print(f"Number of samples in training set: {X_train.shape[0]}")
print(f"Number of samples in test set: {X_test.shape[0]}")

X_test, X_validation, y_test, y_validation = train_test_split(X_test,y_test, test_size = 0.3, random_state = 0)

print(f"Number of samples in test set: {X_test.shape[0]}")

Number of samples in training set: 537
Number of samples in test set: 231
Number of samples in test set: 161


In [7]:
tf.random.set_seed(10)
tf.keras.utils.set_random_seed(10)

model = tf.keras.Sequential([tf.keras.layers.Dense(20, activation='relu', input_shape=(8,)), tf.keras.layers.Dense(10, activation='relu'), tf.keras.layers.Dense(2, activation='softmax')])

In [8]:
log_dir = "logs/training/" + datetime.datetime.now().strftime("%Y-%m-%d_%H:%M:%S")
logging_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir)

In [9]:
epochs = 500

model.compile(optimizer = tf.keras.optimizers.Adam(learning_rate = 0.01), loss = 'sparse_categorical_crossentropy', metrics=['accuracy'])

In [10]:
model.fit(X_train, y_train, epochs=epochs, verbose=1, validation_data = (X_validation, y_validation), callbacks=[logging_callback])

Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500
Epoch 57/500
Epoch 58/500
Epoch 59/500
Epoch 60/500
Epoch 61/500
Epoch 62/500
Epoch 63/500
Epoch 64/500
Epoch 65/500
Epoch 66/500
Epoch 67/500
Epoch 68/500
Epoch 69/500
Epoch 70/500
Epoch 71/500
Epoch 72/500
Epoch 73/500
Epoch 74/500
Epoch 75/500
Epoch 76/500
Epoch 77/500
Epoch 78

<keras.src.callbacks.History at 0x7f41d41537c0>

In [13]:
%load_ext tensorboard

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


In [16]:
os.environ["TENSORBOARD_PROXY_URL"] = os.getenv("NB_PREFIX") + "/proxy/6006/"

print("TensorBoard URL:", os.environ["TENSORBOARD_PROXY_URL"])

TensorBoard URL: /notebook/model-trainning/tensorflow-trainning/proxy/6006/


In [17]:
%tensorboard --logdir logs/training

Reusing TensorBoard on port 6006 (pid 6253), started 0:06:41 ago. (Use '!kill 6253' to kill it.)

In [11]:
y_predicted_probabilities = model.predict(X_test)
y_predicted = tf.argmax(y_predicted_probabilities, axis = 1)

print("Classification Report:")
print(classification_report(y_test, y_predicted))

Classification Report:
              precision    recall  f1-score   support

           0       0.79      0.93      0.85       107
           1       0.72      0.45      0.55        47

    accuracy                           0.78       154
   macro avg       0.76      0.69      0.70       154
weighted avg       0.77      0.78      0.76       154



In [14]:
classes = ('No diabetes', 'Diabetes')

def predict(patients: List[Dict]):
    features_as_lists = [list(patient.values()) for patient in patients]
    inputs_array = np.array(features_as_lists)
    prediction_probabilities = model.predict(inputs_array, verbose=0)
    # argmax gets the index of the maximum value in an array
    predictions = [classes[np.argmax(p)] for p in prediction_probabilities]
    return predictions

diabetes_patient = {
    "Pregnancies": 6.0,
    "Glucose": 110.0,
    "BloodPressure": 65.0,
    "SkinThickness": 15.0,
    "Insulin": 1.0,
    "BMI": 45.7,
    "DiabetesPedigreeFunction": 0.627,
    "Age": 50
}

no_diabetes_patient = {
    "Pregnancies": 0,
    "Glucose": 88.0,
    "BloodPressure": 60.0,
    "SkinThickness": 35.0,
    "Insulin": 1.0,
    "BMI": 45.7,
    "DiabetesPedigreeFunction": 0.27,
    "Age": 20
}

predictions = predict([diabetes_patient, no_diabetes_patient])
print(predictions)

['Diabetes', 'No diabetes']
