## Step 1



importing all the libraries used in this assignment


In [55]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, KFold, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
import pickle

## Step 2

pre processing the data(using standard scaler)

In [56]:
df = pd.read_csv("diabetes.csv")

X = df[['Age', 'Glucose', 'Insulin', 'BMI']]
y = df['Outcome']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


## Step 3


Training Naive Bayes model

In [57]:
naive_bayes_model = GaussianNB()
naive_bayes_model.fit(X_train,y_train)
y_pred_nb=naive_bayes_model.predict(X_test)

## Step 4

Training MLP model(Using sklearnâ€™s MLPClassifier)

In [58]:
mlp_model = MLPClassifier(
    hidden_layer_sizes=(50,),
    activation='relu',
    solver='adam',
    max_iter=1000,
    random_state=42
)

mlp_model.fit(X_train, y_train)
y_pred_mlp = mlp_model.predict(X_test)

Training MLP model(custom implementation)

In [59]:
class SimplePerceptron:
    def __init__(self, learning_rate=0.01, epochs=1000):
        self.lr = learning_rate
        self.epochs = epochs

    def fit(self, X, y):
        self.weights = np.zeros(X.shape[1])
        self.bias = 0

        for _ in range(self.epochs):
            for idx, x_i in enumerate(X):
                linear_output = np.dot(x_i, self.weights) + self.bias
                y_pred = np.sign(linear_output)

                if y_pred != y[idx]:
                    self.weights += self.lr * y[idx] * x_i
                    self.bias += self.lr * y[idx]

    def predict(self, X):
        linear_output = np.dot(X, self.weights) + self.bias
        return np.sign(linear_output)


In [60]:
y_train_p = np.where(y_train == 0, -1, 1)
y_test_p  = np.where(y_test == 0, -1, 1)

custom_mlp = SimplePerceptron(learning_rate=0.01, epochs=1000)
custom_mlp.fit(X_train, y_train_p)

y_pred_custom = custom_mlp.predict(X_test)
y_pred_custom_binary = np.where(y_pred_custom == -1, 0, 1)


## Step 5

Evaluation of the model


In [61]:
import pandas as pd
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

metrics_table = pd.DataFrame({
    "Naive Bayes": [
        accuracy_score(y_test, y_pred_nb),
        precision_score(y_test, y_pred_nb),
        recall_score(y_test, y_pred_nb),
        f1_score(y_test, y_pred_nb),
        None
    ],
    "MLP Classifier": [
        accuracy_score(y_test, y_pred_mlp),
        precision_score(y_test, y_pred_mlp),
        recall_score(y_test, y_pred_mlp),
        f1_score(y_test, y_pred_mlp),
        None
    ],
    "Custom Perceptron": [
        accuracy_score(y_test, y_pred_custom_binary),
        precision_score(y_test, y_pred_custom_binary),
        recall_score(y_test, y_pred_custom_binary),
        f1_score(y_test, y_pred_custom_binary),
        None
    ]
}, index=["Accuracy", "Precision", "Recall", "F1 Score", " "])

metrics_table


Unnamed: 0,Naive Bayes,MLP Classifier,Custom Perceptron
Accuracy,0.746753,0.753247,0.727273
Precision,0.648148,0.660377,0.632653
Recall,0.636364,0.636364,0.563636
F1 Score,0.642202,0.648148,0.596154
,,,


## Step 6

saving the trained models

In [62]:
with open("naive_bayes_model.pkl", "wb") as f:
    pickle.dump(naive_bayes_model, f)


with open("mlp_model.pkl", "wb") as f:
    pickle.dump(mlp_model, f)

with open("custom_perceptron_model.pkl", "wb") as f:
    pickle.dump(custom_mlp, f)


## step 9

k fold validation for each model


In [63]:
kf = KFold(n_splits=5, shuffle=True, random_state=42)

nb_scores = cross_val_score(naive_bayes_model, X, y, cv=kf)
print("Naive Bayes CV Accuracy:", nb_scores.mean())

mlp_scores = cross_val_score(mlp_model, X, y, cv=kf)
print("MLP Classifier CV Accuracy:", mlp_scores.mean())

with open("scaler.pkl", "wb") as f:
    pickle.dump(scaler, f)

Naive Bayes CV Accuracy: 0.7539512774806892
MLP Classifier CV Accuracy: 0.6497580850522027


## Step 10

Analysis of the models

The MLP Classifier achieved the highest test accuracy (0.7532), slightly better than Naive Bayes (0.7467), with similar precision and F1-scores. However, Naive Bayes performed the best in cross-validation (0.7539), showing more stable and consistent behavior across different folds. The custom perceptron had the lowest metrics because it is a simple linear model and cannot capture the nonlinear relationships in the dataset. Overall, Naive Bayes is the most reliable model, while MLP gives the strongest performance on the test split.