In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import Perceptron
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import pickle


In [None]:
pip install scikit-learn

In [None]:
# Load the dataset
df = pd.read_csv("diabetes.csv")

# Drop any rows with missing values, if necessary
df = df.dropna()

# Split the features and the target variable
X = df[['Age', 'Glucose', 'Insulin', 'BMI']]
y = df['Outcome']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [None]:
nb_model = GaussianNB()
nb_model.fit(X_train, y_train)
nb_predictions = nb_model.predict(X_test)

In [None]:
# Naive Bayes metrics
nb_accuracy = accuracy_score(y_test, nb_predictions)
nb_precision = precision_score(y_test, nb_predictions, average='weighted')
nb_recall = recall_score(y_test, nb_predictions, average='weighted')
nb_f1 = f1_score(y_test, nb_predictions, average='weighted')

print("Naive Bayes Metrics:")
print(f"Accuracy: {nb_accuracy:.2f}, Precision: {nb_precision:.2f}, Recall: {nb_recall:.2f}, F1 Score: {nb_f1:.2f}")

In [None]:
import numpy as np

def unit_step_function(x):
    return np.where(x > 0, 1, 0)

class CustomPerceptron:
    def __init__(self, learning_rate=0.001, n_iter=5000):
        self.learning_rate = learning_rate
        self.n_iter = n_iter
        self.activation_function = unit_step_function
        self.weights = None
        self.bias = None
    
    def fit(self, X, y):
        # Initialize weights to zero with the same length as the number of features
        n_samples, n_features = X.shape
        self.weights = np.zeros(n_features)
        self.bias = 0

        y_ = np.where(y>0, 1,0)

        for _ in range(self.n_iter):
            for idx, x_i in enumerate(X):
                # Calculate the linear output
                linear_output = np.dot(x_i, self.weights) + self.bias
                y_predicted = self.activation_function(linear_output)

                # Update weights and bias if prediction is incorrect
                if y_predicted != y_[idx]:
                    self.weights += self.learning_rate * (y_[idx] - y_predicted) * x_i
                    self.bias += self.learning_rate * (y_[idx] - y_predicted)
    
    def predict(self, X):
        # Calculate the linear output and predict using the sign function
        linear_output = np.dot(X, self.weights) + self.bias
        y_predicted = self.activation_function(linear_output)
        return y_predicted


In [None]:
if __name__ == "__main__":
    # Imports
    import matplotlib.pyplot as plt
    from sklearn.model_selection import train_test_split
    from sklearn import datasets

    def accuracy(y_true, y_pred):
        accuracy = np.sum(y_true == y_pred) / len(y_true)
        return accuracy
    X = df[['Age', 'Glucose', 'Insulin', 'BMI']]
    y = df['Outcome']
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)

    p = CustomPerceptron(learning_rate=0.01, n_iter=1000)
    p.fit(X_train, y_train)
    predictions = p.predict(X_test)

    print("Perceptron classification accuracy", accuracy(y_test, predictions))

    fig = plt.figure()
    ax = fig.add_subplot(1, 1, 1)
    plt.scatter(X_train[:, 0], X_train[:, 1], marker="o", c=y_train)

    x0_1 = np.amin(X_train[:, 0])
    x0_2 = np.amax(X_train[:, 0])

    x1_1 = (-p.weights[0] * x0_1 - p.bias) / p.weights[1]
    x1_2 = (-p.weights[0] * x0_2 - p.bias) / p.weights[1]

    ax.plot([x0_1, x0_2], [x1_1, x1_2], "k")

    ymin = np.amin(X_train[:, 1])
    ymax = np.amax(X_train[:, 1])
    ax.set_ylim([ymin - 3, ymax + 3])

    plt.show()


In [None]:
perceptron_model = Perceptron()
perceptron_model.fit(X_train, y_train)

nb_predictions = nb_model.predict(X_test)
perceptron_predictions = perceptron_model.predict(X_test)

# Naive Bayes metrics
nb_accuracy = accuracy_score(y_test, nb_predictions)
nb_precision = precision_score(y_test, nb_predictions, average='weighted')
nb_recall = recall_score(y_test, nb_predictions, average='weighted')
nb_f1 = f1_score(y_test, nb_predictions, average='weighted')

# Perceptron metrics
perceptron_accuracy = accuracy_score(y_test, perceptron_predictions)
perceptron_precision = precision_score(y_test, perceptron_predictions, average='weighted')
perceptron_recall = recall_score(y_test, perceptron_predictions, average='weighted')
perceptron_f1 = f1_score(y_test, perceptron_predictions, average='weighted')

print("Naive Bayes Metrics:")
print(f"Accuracy: {nb_accuracy:.2f}, Precision: {nb_precision:.2f}, Recall: {nb_recall:.2f}, F1 Score: {nb_f1:.2f}")

print("\nPerceptron Metrics:")
print(f"Accuracy: {perceptron_accuracy:.2f}, Precision: {perceptron_precision:.2f}, Recall: {perceptron_recall:.2f}, F1 Score: {perceptron_f1:.2f}")


In [None]:
# Save Naive Bayes model
with open("naive_bayes_model.pkl", "wb") as file:
    pickle.dump(nb_model, file)

# Save Perceptron model
with open("perceptron_model_.pkl", "wb") as file:
    pickle.dump(perceptron_model, file)

print("Models saved successfully.")


In [None]:
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import numpy as np

# Define k for k-fold cross-validation
k = 5
kf = KFold(n_splits=k, shuffle=True, random_state=42)

# Initialize lists to store cross-validation results
nb_accuracy, nb_precision, nb_recall, nb_f1 = [], [], [], []
perceptron_accuracy, perceptron_precision, perceptron_recall, perceptron_f1 = [], [], [], []

# Perform k-fold cross-validation
for train_index, test_index in kf.split(X):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    
    # Naive Bayes Model Evaluation
    nb_model.fit(X_train, y_train)
    nb_y_pred = naive_bayes_model.predict(X_test)
    
    nb_accuracy.append(accuracy_score(y_test, nb_y_pred))
    nb_precision.append(precision_score(y_test, nb_y_pred, average='weighted', zero_division=1))
    nb_recall.append(recall_score(y_test, nb_y_pred, average='weighted', zero_division=1))
    nb_f1.append(f1_score(y_test, nb_y_pred, average='weighted', zero_division=1))
    
    # Perceptron Model Evaluation
    perceptron_model.fit(X_train, y_train)
    perceptron_y_pred = perceptron_model.predict(X_test)
    
    perceptron_accuracy.append(accuracy_score(y_test, perceptron_y_pred))
    perceptron_precision.append(precision_score(y_test, perceptron_y_pred, average='weighted', zero_division=1))
    perceptron_recall.append(recall_score(y_test, perceptron_y_pred, average='weighted', zero_division=1))
    perceptron_f1.append(f1_score(y_test, perceptron_y_pred, average='weighted', zero_division=1))

# Calculate mean metrics for each model
print("Naive Bayes Model Performance (k-fold):")
print(f"Accuracy: {np.mean(nb_accuracy):.2f}, Precision: {np.mean(nb_precision):.2f}, Recall: {np.mean(nb_recall):.2f}, F1 Score: {np.mean(nb_f1):.2f}")

print("\nPerceptron Model Performance (k-fold):")
print(f"Accuracy: {np.mean(perceptron_accuracy):.2f}, Precision: {np.mean(perceptron_precision):.2f}, Recall: {np.mean(perceptron_recall):.2f}, F1 Score: {np.mean(perceptron_f1):.2f}")
