<h1>Diabetes Prediction Model - Artificial Neural Network</h1>

In [None]:
%pip install tensorflow scikit-learn numpy pandas

In [2]:
import pandas as pd

df = pd.read_csv('diabetes_prediction_dataset.csv')

In [None]:
df.head()

In [None]:
df.describe()

In [None]:
df.isnull()

In [None]:
df.info()

In [7]:
## OHE on categorical columns, gender & smoking_history
df = pd.get_dummies(df, drop_first=True)

In [None]:
df.info()

In [9]:
X = df.drop("diabetes", axis=1)

In [10]:
y = df["diabetes"]

In [11]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [12]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

In [13]:
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

<h5>Builiding Neural Network</h5>

In [14]:
## Importing Neural Network libraries
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

In [None]:
## Building Neural Network
model = Sequential([
    Dense(32, activation='relu', input_shape=(X_train.shape[1],)), # input layer, input_shape specifies the no. of features in dataset
    Dense(16, activation='relu'),
    Dense(1, activation='sigmoid')  # Sigmoid activation for binary classification problem for output layer
])

In [16]:
## Compiling the model
model.compile(optimizer='adam',loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
## Training model
history = model.fit(X_train, y_train, validation_split=0.2, epochs=30, batch_size=32, verbose=1)

<h5>Evaluating Model</h5>

In [None]:
## Evaluating model
test_loss, test_accuracy = model.evaluate(X_test, y_test)

print(f'Test Loss: {test_loss * 100:.2f}%, Test Accuracy: {test_accuracy * 100:.2f}%')

<h5>Making predictions on new data</h5>

In [19]:
unseen_data = pd.DataFrame(
    {
        "gender": ["Male", "Female", "Male", "Female", "Male", "Female"],
        "age": [50, 42, 63, 56, 38, 36],
        "hypertension": [0, 1, 1, 0, 0, 1],
        "heart_disease": [1, 0, 1, 0, 0, 1],
        "smoking_history": ["former", "never", "current", "former", "never", "never"],
        "bmi": [28.5, 31.2, 34.0, 29.4, 25.8, 29.71],
        "HbA1c_level": [6.3, 5.9, 7.2, 5.7, 5.6, 10.3],
        "blood_glucose_level": [140, 120, 160, 110, 100, 130],
    }
)

In [None]:
unseen_data.info()

In [21]:
unseen_data = pd.get_dummies(unseen_data, drop_first=True)

In [27]:
columns = [
    "age",
    "hypertension",
    "heart_disease",
    "bmi",
    "HbA1c_level",
    "blood_glucose_level",
    "gender_Male",
    "gender_Other",
    "smoking_history_current",
    "smoking_history_ever",
    "smoking_history_former",
    "smoking_history_never",
    "smoking_history_not current",
]

unseen_data = unseen_data.reindex(columns=columns, fill_value=0)

In [28]:
unseen_data_scaled = scaler.transform(unseen_data[columns])

In [None]:
## making predictions
predictions = model.predict(unseen_data_scaled)

In [None]:
# Convert probabilities to binary labels using threshold 0.5, to avoid the wrong result in output for patient 3
binary_predictions = (predictions > 0.5).astype(int)

In [None]:
for i, prediction in enumerate(binary_predictions):
    print(f"Patient {i+1} is {'Diabetic' if prediction == 1 else 'Non-Diabetic'}")

In [None]:
import matplotlib.pyplot as plt

plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.title('Model Accuracy')
plt.show()