In [1]:
import tensorflow as tf
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix
import pandas as pd


In [2]:
# Load the dataset
df = pd.read_csv('diabetes.csv')
print(df.head())

   Diabetes_binary  HighBP  HighChol  CholCheck   BMI  Smoker  Stroke  \
0              0.0     1.0       1.0        1.0  40.0     1.0     0.0   
1              0.0     0.0       0.0        0.0  25.0     1.0     0.0   
2              0.0     1.0       1.0        1.0  28.0     0.0     0.0   
3              0.0     1.0       0.0        1.0  27.0     0.0     0.0   
4              0.0     1.0       1.0        1.0  24.0     0.0     0.0   

   HeartDiseaseorAttack  PhysActivity  Fruits  ...  AnyHealthcare  \
0                   0.0           0.0     0.0  ...            1.0   
1                   0.0           1.0     0.0  ...            0.0   
2                   0.0           0.0     1.0  ...            1.0   
3                   0.0           1.0     1.0  ...            1.0   
4                   0.0           1.0     1.0  ...            1.0   

   NoDocbcCost  GenHlth  MentHlth  PhysHlth  DiffWalk  Sex   Age  Education  \
0          0.0      5.0      18.0      15.0       1.0  0.0   9.0   

In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 253680 entries, 0 to 253679
Data columns (total 22 columns):
 #   Column                Non-Null Count   Dtype  
---  ------                --------------   -----  
 0   Diabetes_binary       253680 non-null  float64
 1   HighBP                253680 non-null  float64
 2   HighChol              253680 non-null  float64
 3   CholCheck             253680 non-null  float64
 4   BMI                   253680 non-null  float64
 5   Smoker                253680 non-null  float64
 6   Stroke                253680 non-null  float64
 7   HeartDiseaseorAttack  253680 non-null  float64
 8   PhysActivity          253680 non-null  float64
 9   Fruits                253680 non-null  float64
 10  Veggies               253680 non-null  float64
 11  HvyAlcoholConsump     253680 non-null  float64
 12  AnyHealthcare         253680 non-null  float64
 13  NoDocbcCost           253680 non-null  float64
 14  GenHlth               253680 non-null  float64
 15  

In [4]:
df.isnull().sum()

Diabetes_binary         0
HighBP                  0
HighChol                0
CholCheck               0
BMI                     0
Smoker                  0
Stroke                  0
HeartDiseaseorAttack    0
PhysActivity            0
Fruits                  0
Veggies                 0
HvyAlcoholConsump       0
AnyHealthcare           0
NoDocbcCost             0
GenHlth                 0
MentHlth                0
PhysHlth                0
DiffWalk                0
Sex                     0
Age                     0
Education               0
Income                  0
dtype: int64

In [5]:
df_drop = df.drop(['Fruits','Veggies', 'NoDocbcCost', 'Education', 'Income', 'DiffWalk', 'Sex'], axis=1)

In [6]:
df_drop.head()

Unnamed: 0,Diabetes_binary,HighBP,HighChol,CholCheck,BMI,Smoker,Stroke,HeartDiseaseorAttack,PhysActivity,HvyAlcoholConsump,AnyHealthcare,GenHlth,MentHlth,PhysHlth,Age
0,0.0,1.0,1.0,1.0,40.0,1.0,0.0,0.0,0.0,0.0,1.0,5.0,18.0,15.0,9.0
1,0.0,0.0,0.0,0.0,25.0,1.0,0.0,0.0,1.0,0.0,0.0,3.0,0.0,0.0,7.0
2,0.0,1.0,1.0,1.0,28.0,0.0,0.0,0.0,0.0,0.0,1.0,5.0,30.0,30.0,9.0
3,0.0,1.0,0.0,1.0,27.0,0.0,0.0,0.0,1.0,0.0,1.0,2.0,0.0,0.0,11.0
4,0.0,1.0,1.0,1.0,24.0,0.0,0.0,0.0,1.0,0.0,1.0,2.0,3.0,0.0,11.0


In [7]:
df_new = df_drop.dropna()

In [8]:
df_new.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 253680 entries, 0 to 253679
Data columns (total 15 columns):
 #   Column                Non-Null Count   Dtype  
---  ------                --------------   -----  
 0   Diabetes_binary       253680 non-null  float64
 1   HighBP                253680 non-null  float64
 2   HighChol              253680 non-null  float64
 3   CholCheck             253680 non-null  float64
 4   BMI                   253680 non-null  float64
 5   Smoker                253680 non-null  float64
 6   Stroke                253680 non-null  float64
 7   HeartDiseaseorAttack  253680 non-null  float64
 8   PhysActivity          253680 non-null  float64
 9   HvyAlcoholConsump     253680 non-null  float64
 10  AnyHealthcare         253680 non-null  float64
 11  GenHlth               253680 non-null  float64
 12  MentHlth              253680 non-null  float64
 13  PhysHlth              253680 non-null  float64
 14  Age                   253680 non-null  float64
dtype

In [9]:
X = df_new.drop("Diabetes_binary", axis=1).values
y = df_new['Diabetes_binary'].values

In [10]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [23]:
# Define the model using TensorFlow and Keras
model = tf.keras.Sequential([
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

In [25]:
# Compile the model
model.compile(
             loss=tf.keras.losses.binary_crossentropy,
            optimizer="adam",
            metrics=[
                tf.keras.metrics.BinaryAccuracy(name='accuracy'),
                tf.keras.metrics.Precision(name='precision'),
                tf.keras.metrics.Recall(name='recall')
    ])

In [27]:
# Train the model
model.fit(X_train, y_train, epochs=15, batch_size=10, verbose=1)



Epoch 1/15
[1m20295/20295[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m44s[0m 2ms/step - accuracy: 0.8624 - loss: 0.3233 - precision: 0.5543 - recall: 0.1231
Epoch 2/15
[1m20295/20295[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m44s[0m 2ms/step - accuracy: 0.8631 - loss: 0.3211 - precision: 0.5555 - recall: 0.1162
Epoch 3/15
[1m20295/20295[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m45s[0m 2ms/step - accuracy: 0.8627 - loss: 0.3238 - precision: 0.5613 - recall: 0.1166
Epoch 4/15
[1m20295/20295[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m45s[0m 2ms/step - accuracy: 0.8635 - loss: 0.3210 - precision: 0.5674 - recall: 0.1309
Epoch 5/15
[1m20295/20295[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 2ms/step - accuracy: 0.8641 - loss: 0.3193 - precision: 0.5633 - recall: 0.1386
Epoch 6/15
[1m20295/20295[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 2ms/step - accuracy: 0.8648 - loss: 0.3184 - precision: 0.5571 - recall: 0.1270
Epoch 7/15
[1m20295/20295[

<keras.src.callbacks.history.History at 0x2076c107290>

In [28]:
model.evaluate(X_test, y_test)

[1m1586/1586[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - accuracy: 0.8673 - loss: 0.3153 - precision: 0.5902 - recall: 0.0918


[0.31736600399017334,
 0.8668597936630249,
 0.614800751209259,
 0.09261111915111542]

In [30]:
# Evaluate the model
loss, accuracy, precision, recall = model.evaluate(X_test, y_test)
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)


[1m1586/1586[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - accuracy: 0.8673 - loss: 0.3153 - precision: 0.5902 - recall: 0.0918
Accuracy: 0.8668597936630249
Precision: 0.614800751209259
Recall: 0.09261111915111542


In [33]:
# Make predictions on the test set
y_pred = model.predict(X_test)
y_pred = tf.round(y_pred)
y_pred 

[1m1586/1586[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step


<tf.Tensor: shape=(50736, 1), dtype=float32, numpy=
array([[0.],
       [0.],
       [0.],
       ...,
       [0.],
       [0.],
       [0.]], dtype=float32)>

In [34]:
# Generate the Confusion Matrix
print("Perceptron Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

Perceptron Confusion Matrix:
 [[43333   406]
 [ 6349   648]]
