In [1]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
import pandas as pd
from keras_visualizer import visualizer
from keras import models, layers
from keras.callbacks import EarlyStopping
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import numpy as np

import warnings
warnings.filterwarnings('ignore')

In [2]:
df = pd.read_csv('diabetes.csv')

In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 768 entries, 0 to 767
Data columns (total 9 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   Pregnancies               768 non-null    int64  
 1   Glucose                   768 non-null    int64  
 2   BloodPressure             768 non-null    int64  
 3   SkinThickness             768 non-null    int64  
 4   Insulin                   768 non-null    int64  
 5   BMI                       768 non-null    float64
 6   DiabetesPedigreeFunction  768 non-null    float64
 7   Age                       768 non-null    int64  
 8   Outcome                   768 non-null    int64  
dtypes: float64(2), int64(7)
memory usage: 54.1 KB


In [4]:
# Example columns: ['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 
# 'Insulin', 'BMI', 'DiabetesPedigreeFunction', 'Age', 'Outcome']
# Split data into features and labels
X = df.iloc[:, :-1].values  # Features (all columns except the last one)
y = df.iloc[:, -1].values   # Labels (last column)

# Scale features for better model performance
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Split data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the model
model = Sequential()
model.add(Dense(12, input_dim=X.shape[1], activation='relu'))  # Number of input features = X.shape[1]
model.add(Dense(8, activation='relu'))
model.add(Dense(1, activation='sigmoid'))  # Sigmoid for binary classification

# Compile the model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Early stopping to prevent overfitting
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# Train model
model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=100, batch_size=10, callbacks=[early_stopping])

Epoch 1/100
[1m62/62[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 12ms/step - accuracy: 0.6444 - loss: 0.6556 - val_accuracy: 0.6104 - val_loss: 0.6803
Epoch 2/100
[1m62/62[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.6907 - loss: 0.6163 - val_accuracy: 0.6494 - val_loss: 0.6277
Epoch 3/100
[1m62/62[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7202 - loss: 0.5693 - val_accuracy: 0.6883 - val_loss: 0.5871
Epoch 4/100
[1m62/62[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7497 - loss: 0.5210 - val_accuracy: 0.7403 - val_loss: 0.5612
Epoch 5/100
[1m62/62[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7915 - loss: 0.4877 - val_accuracy: 0.7273 - val_loss: 0.5458
Epoch 6/100
[1m62/62[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7354 - loss: 0.5087 - val_accuracy: 0.7403 - val_loss: 0.5381
Epoch 7/100
[1m62/62[0m [32m━━

<keras.src.callbacks.history.History at 0x2a3708c56a0>

In [5]:
visualizer(model, file_format='png', view=True)

In [6]:
# Evaluate the model on the training set
_, train_accuracy = model.evaluate(X_train, y_train, verbose=0)  # Set verbose=0 to suppress output
train_error = 1 - train_accuracy

# Evaluate the model on the validation/testing set
_, test_accuracy = model.evaluate(X_val, y_val, verbose=0)
test_error = 1 - test_accuracy

# Print the results
print(f'Training Accuracy: {train_accuracy * 100:.2f}%')
print(f'Training Error: {train_error * 100:.2f}%')

print(f'Testing Accuracy: {test_accuracy * 100:.2f}%')
print(f'Testing Error: {test_error * 100:.2f}%')

Training Accuracy: 78.01%
Training Error: 21.99%
Testing Accuracy: 75.32%
Testing Error: 24.68%


In [7]:
# Function to get user input
def get_user_input():
    print("Please enter the following details:")
    pregnancies = float(input("Number of pregnancies: "))
    glucose = float(input("Glucose level: "))
    blood_pressure = float(input("Blood pressure: "))
    skin_thickness = float(input("Skin thickness: "))
    insulin = float(input("Insulin level: "))
    bmi = float(input("BMI: "))
    diabetes_pedigree = float(input("Diabetes Pedigree Function: "))
    age = float(input("Age: "))
    
    # Store inputs in a NumPy array and scale it using the same scaler used for training
    user_data = np.array([[pregnancies, glucose, blood_pressure, skin_thickness, 
                           insulin, bmi, diabetes_pedigree, age]])
    user_data = scaler.transform(user_data)  # Scale the input
    return user_data

# Predict whether the user is diabetic or not
def predict_diabetes():
    user_data = get_user_input()
    prediction = model.predict(user_data)
    
    # Sigmoid output gives a probability, so we threshold at 0.5
    if prediction >= 0.5:
        print("The model predicts that you are diabetic.")
    else:
        print("The model predicts that you are not diabetic.")

# Call the prediction function
predict_diabetes()

Please enter the following details:


Number of pregnancies:  7
Glucose level:  56
Blood pressure:  89
Skin thickness:  5
Insulin level:  6
BMI:  20
Diabetes Pedigree Function:  5
Age:  56


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 119ms/step
The model predicts that you are not diabetic.
