<a href="https://colab.research.google.com/github/priya1804/THE-PIMA-INDIAN-DIABETES-DATABASE/blob/main/MODEL.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
# Import necessary libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from keras.models import Sequential
from keras.layers import LSTM, Dense, Conv1D, Flatten, MaxPooling1D
from keras.callbacks import EarlyStopping

# Load the dataset
dataset = pd.read_csv("diabetes.csv")

# Handle missing values (replace zeros with median values)
features_to_replace = ["Glucose", "BloodPressure", "SkinThickness", "Insulin", "BMI"]
dataset[features_to_replace] = dataset[features_to_replace].replace(0, np.nan)
dataset.fillna(dataset.median(), inplace=True)

# Split features and target
X = dataset.drop("Outcome", axis=1)
y = dataset["Outcome"]

# Standardize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Logistic Regression
log_reg = LogisticRegression(max_iter=1000, class_weight='balanced', random_state=42)
log_reg.fit(X_train, y_train)
log_reg_preds = log_reg.predict(X_test)

# Random Forest
rf = RandomForestClassifier(n_estimators=150, max_depth=10, class_weight='balanced', random_state=42)
rf.fit(X_train, y_train)
rf_preds = rf.predict(X_test)

# Naive Bayes
nb = GaussianNB()
nb.fit(X_train, y_train)
nb_preds = nb.predict(X_test)

# LSTM
X_train_lstm = X_train.reshape(X_train.shape[0], 1, X_train.shape[1])
X_test_lstm = X_test.reshape(X_test.shape[0], 1, X_test.shape[1])

lstm_model = Sequential([
    LSTM(64, input_shape=(X_train_lstm.shape[1], X_train_lstm.shape[2]), return_sequences=False),
    Dense(32, activation='relu'),
    Dense(1, activation='sigmoid')
])
lstm_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
lstm_model.fit(X_train_lstm, y_train, validation_split=0.2, epochs=50, batch_size=32, callbacks=[early_stopping], verbose=0)
lstm_preds = (lstm_model.predict(X_test_lstm) > 0.5).astype(int).flatten()

# CNN
X_train_cnn = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
X_test_cnn = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

cnn_model = Sequential([
    Conv1D(32, kernel_size=2, activation='relu', input_shape=(X_train_cnn.shape[1], 1)),
    MaxPooling1D(pool_size=2),
    Flatten(),
    Dense(64, activation='relu'),
    Dense(1, activation='sigmoid')
])
cnn_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
cnn_model.fit(X_train_cnn, y_train, validation_split=0.2, epochs=50, batch_size=32, callbacks=[early_stopping], verbose=0)
cnn_preds = (cnn_model.predict(X_test_cnn) > 0.5).astype(int).flatten()

# Model Results
models = {
    "Logistic Regression": log_reg_preds,
    "Random Forest": rf_preds,
    "Naive Bayes": nb_preds,
    "LSTM": lstm_preds,
    "CNN": cnn_preds
}

for model_name, predictions in models.items():
    print(f"{model_name} Accuracy: {accuracy_score(y_test, predictions) * 100:.2f}%")
    print(f"{model_name} Classification Report:\n{classification_report(y_test, predictions)}")
    print(f"{model_name} Confusion Matrix:\n{confusion_matrix(y_test, predictions)}\n")

  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step 
Logistic Regression Accuracy: 70.13%
Logistic Regression Classification Report:
              precision    recall  f1-score   support

           0       0.81      0.70      0.75        99
           1       0.57      0.71      0.63        55

    accuracy                           0.70       154
   macro avg       0.69      0.70      0.69       154
weighted avg       0.72      0.70      0.71       154

Logistic Regression Confusion Matrix:
[[69 30]
 [16 39]]

Random Forest Accuracy: 74.68%
Random Forest Classification Report:
              precision    recall  f1-score   support

           0       0.83      0.77      0.80        99
           1       0.63      0.71      0.67        55

    accuracy                           0.75       154
   macro avg       0.73      0.74      0.73       154
weighted avg       0.76      0.75      0.75       154

Random Forest Confusion Matrix:
[[76 23]
 [16 39]]

Naive Bayes Accu

In [4]:
def user_input_prediction():
    print("Enter the following details to predict diabetes:")
    try:
        Pregnancies = float(input("Number of Pregnancies: "))
        Glucose = float(input("Glucose Level: "))
        BloodPressure = float(input("Blood Pressure: "))
        SkinThickness = float(input("Skin Thickness: "))
        Insulin = float(input("Insulin Level: "))
        BMI = float(input("BMI: "))
        DiabetesPedigreeFunction = float(input("Diabetes Pedigree Function: "))
        Age = float(input("Age: "))

        user_data = np.array([[Pregnancies, Glucose, BloodPressure, SkinThickness, Insulin, BMI, DiabetesPedigreeFunction, Age]])
        user_data_scaled = scaler.transform(user_data)

        # Predict with each model
        print("\nPredictions:")
        print("Logistic Regression:", log_reg.predict(user_data_scaled)[0])
        print("Random Forest:", rf.predict(user_data_scaled)[0])
        print("Naive Bayes:", nb.predict(user_data_scaled)[0])

        # For LSTM
        user_data_lstm = user_data_scaled.reshape(1, 1, user_data_scaled.shape[1])
        print("LSTM:", int(lstm_model.predict(user_data_lstm) > 0.5))

        # For CNN
        user_data_cnn = user_data_scaled.reshape(1, user_data_scaled.shape[1], 1)
        print("CNN:", int(cnn_model.predict(user_data_cnn) > 0.5))
    except ValueError:
        print("Invalid input. Please enter numeric values.")


user_input_prediction()

Enter the following details to predict diabetes:
Number of Pregnancies: 2
Glucose Level: 120
Blood Pressure: 70
Skin Thickness: 20
Insulin Level: 85
BMI: 30.5
Diabetes Pedigree Function: 0.45
Age: 35

Predictions:
Logistic Regression: 0
Random Forest: 0
Naive Bayes: 0
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
LSTM: 0
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
CNN: 0


  print("LSTM:", int(lstm_model.predict(user_data_lstm) > 0.5))
  print("CNN:", int(cnn_model.predict(user_data_cnn) > 0.5))
