# data prep

In [2]:
import pandas as pd
import numpy as np

In [3]:
df = pd.read_csv("data/diabetes.csv")

# delete broken data
df = df.dropna(how='any',axis=0)
df

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1
...,...,...,...,...,...,...,...,...,...
763,10,101,76,48,180,32.9,0.171,63,0
764,2,122,70,27,0,36.8,0.340,27,0
765,5,121,72,23,112,26.2,0.245,30,0
766,1,126,60,0,0,30.1,0.349,47,1


In [4]:
OUTPUT_KEY = "Outcome"

X = df.drop(OUTPUT_KEY, axis=1).values
y = df[OUTPUT_KEY].values

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)
X_train

array([[  9.   , 145.   ,  80.   , ...,  37.9  ,   0.637,  40.   ],
       [ 10.   , 129.   ,  62.   , ...,  41.2  ,   0.441,  38.   ],
       [  7.   , 102.   ,  74.   , ...,  37.2  ,   0.204,  45.   ],
       ...,
       [ 13.   , 126.   ,  90.   , ...,  43.4  ,   0.583,  42.   ],
       [  4.   , 171.   ,  72.   , ...,  43.6  ,   0.479,  26.   ],
       [  9.   , 102.   ,  76.   , ...,  32.9  ,   0.665,  46.   ]])

In [14]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler().set_output(transform="pandas")
scaled_X_train = scaler.fit_transform(X_train)
scaled_X_test = scaler.fit_transform(X_test)

In [18]:
scaled_X_test

Unnamed: 0,x0,x1,x2,x3,x4,x5,x6,x7
0,0.983349,0.499364,0.221374,0.163148,0.475490,-0.754147,0.414461,1.556484
1,-0.837522,0.951906,-0.568830,-1.461839,-0.755266,-0.742774,-0.942882,-0.952995
2,0.679871,-0.315212,-0.568830,0.225648,-0.755266,-0.867875,-0.864574,-0.520327
3,-0.230565,-1.763347,0.672920,0.288147,-0.755266,0.201165,-0.757262,1.123815
4,-0.837522,-0.104026,0.447147,0.350647,0.885742,0.394502,-0.023485,-0.693394
...,...,...,...,...,...,...,...,...
149,0.679871,-0.616907,-0.568830,-0.274348,-0.262964,-0.651792,-0.020585,-0.087658
150,-0.230565,-0.134195,-0.230171,0.975642,0.521073,0.621957,-1.026991,-0.433793
151,1.286828,2.068176,0.447147,-1.461839,-0.755266,1.736488,-1.064695,0.864214
152,0.376392,0.499364,0.672920,-1.461839,-0.755266,-3.711065,0.394159,3.114093


# model

In [83]:
import tensorflow as tf

model = tf.keras.Sequential([
    tf.keras.layers.Dense(units=64, activation="relu"),
    tf.keras.layers.Dense(units=32, activation="relu"),
    tf.keras.layers.Dense(units=16, activation="relu"),
    tf.keras.layers.Dense(units=1, activation="sigmoid"),
])

model.compile(loss=tf.keras.losses.BinaryCrossentropy(), 
              metrics=[tf.keras.metrics.BinaryAccuracy(threshold=0.5)],
              optimizer=tf.keras.optimizers.Adam(learning_rate=0.001))

model.fit(X_test, y_test, epochs=100)

Epoch 1/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 16ms/step - binary_accuracy: 0.6167 - loss: 2.0807
Epoch 2/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - binary_accuracy: 0.5342 - loss: 1.2265
Epoch 3/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - binary_accuracy: 0.5004 - loss: 1.1277
Epoch 4/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - binary_accuracy: 0.5607 - loss: 0.9492 
Epoch 5/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step - binary_accuracy: 0.5932 - loss: 0.8103
Epoch 6/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - binary_accuracy: 0.6396 - loss: 0.6695 
Epoch 7/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step - binary_accuracy: 0.6708 - loss: 0.6695
Epoch 8/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step - binary_accuracy: 0.7064 - loss: 0.582

<keras.src.callbacks.history.History at 0x2b338239950>

In [84]:
model.metrics_names

['loss', 'compile_metrics']

In [None]:
model.evaluate(X_train, y_train)

[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - binary_accuracy: 0.7028 - loss: 0.6842


[0.6560473442077637, 0.7096354365348816]

In [98]:
model.evaluate(X_test, y_test)

[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - binary_accuracy: 0.8222 - loss: 0.3834


[0.38905516266822815, 0.8311688303947449]

In [99]:
X_test.shape

(154, 8)

In [100]:
X_test[0]

array([  7.   , 136.   ,  74.   ,  26.   , 135.   ,  26.   ,   0.647,
        51.   ])

In [101]:
scaled_X_train.shape

(614, 8)

In [102]:
y_pred = model.predict(X_test)

[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step


In [103]:
y_pred

array([[6.59188926e-01],
       [7.19164386e-02],
       [4.46042120e-01],
       [5.92083950e-03],
       [2.12237343e-01],
       [3.06795925e-01],
       [3.94669473e-01],
       [8.07694998e-03],
       [1.32606521e-01],
       [2.19937578e-01],
       [3.60820115e-01],
       [7.17202201e-02],
       [9.96671617e-01],
       [8.48078012e-01],
       [1.92976296e-02],
       [6.25312090e-01],
       [4.89822358e-01],
       [1.33247986e-01],
       [2.09310725e-01],
       [2.44919151e-01],
       [4.29665387e-01],
       [2.76991159e-01],
       [9.91867602e-01],
       [5.34698248e-01],
       [2.30095908e-01],
       [6.07780337e-01],
       [2.05202907e-01],
       [7.54174709e-01],
       [7.24168792e-02],
       [6.26578331e-01],
       [2.33496264e-01],
       [3.95095646e-02],
       [2.09370460e-02],
       [2.54818290e-01],
       [2.22947463e-01],
       [4.19410884e-01],
       [1.49950281e-01],
       [1.50732890e-01],
       [2.47531980e-02],
       [2.21986353e-01],


# choosing threshold

In [None]:


THRESHOLD = 0.5

for i, pred in enumerate(y_pred):
    if (pred > THRESHOLD) != y_test[i]:
        print(X_test[i], pred, y_test[i])

[  7.    136.     74.     26.    135.     26.      0.647  51.   ] [0.6591889] 0
[  1.    115.     70.     30.     96.     34.6     0.529  32.   ] [0.48982236] 1
[  4.    125.     70.     18.    122.     28.9     1.144  45.   ] [0.4296654] 1
[ 5.    85.    74.    22.     0.    29.     1.224 32.   ] [0.27699116] 1
[  5.    128.     80.      0.      0.     34.6     0.144  45.   ] [0.60778034] 0
[  2.    118.     80.      0.      0.     42.9     0.693  21.   ] [0.23349626] 1
[0.00e+00 1.81e+02 8.80e+01 4.40e+01 5.10e+02 4.33e+01 2.22e-01 2.60e+01] [0.2548183] 1
[  1.    144.     82.     46.    180.     46.1     0.335  46.   ] [0.41941088] 1
[  1.    128.     98.     41.     58.     32.      1.321  33.   ] [0.21579204] 1
[ 4.    95.    64.     0.     0.    32.     0.161 31.   ] [0.44837016] 1
[ 11.    111.     84.     40.      0.     46.8     0.925  45.   ] [0.22092961] 1
[  0.    118.     84.     47.    230.     45.8     0.551  31.   ] [0.17261736] 1
[  0.    123.     72.      0.      0.  