<a href="https://colab.research.google.com/github/mbilal1267/Machine-Learning-LAB/blob/main/KNN_and_Naive_Bayes.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import pandas as pd

# Naive Bayes Classifier Implementation
class NaiveBayes:
    def fit(self, X_train, y_train):
        self.classes = np.unique(y_train)
        self.mean = {}
        self.variance = {}
        self.prior = {}

        for cls in self.classes:
            X_cls = X_train[y_train == cls]
            self.mean[cls] = np.mean(X_cls, axis=0)
            self.variance[cls] = np.var(X_cls, axis=0)
            self.prior[cls] = X_cls.shape[0] / X_train.shape[0]

    def gaussian_density(self, class_idx, x):
        mean = self.mean[class_idx]
        var = self.variance[class_idx]
        numerator = np.exp(- (x - mean) ** 2 / (2 * var))
        denominator = np.sqrt(2 * np.pi * var)
        return numerator / denominator

    def predict(self, X_test):
        predictions = []
        for x in X_test:
            posteriors = {}
            for cls in self.classes:
                prior = np.log(self.prior[cls])
                conditional = np.sum(np.log(self.gaussian_density(cls, x)))
                posteriors[cls] = prior + conditional
            predictions.append((x, max(posteriors, key=posteriors.get), posteriors))
        return predictions

    def predict_classes(self, X_test):
        return [prediction for _, prediction, _ in self.predict(X_test)]

# Load the dataset
file_path = '/content/diabetes.xlsx'
data = pd.read_excel(file_path)

# Display column names
print("Columns in the dataset:", data.columns)

# Split data and perform classification for different split ratios
splits = [(0.7, 0.3), (0.8, 0.2)]
X = data.drop('Outcome', axis=1).values
y = data['Outcome'].values

for split_ratio in splits:
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=split_ratio[1], random_state=42)

    # Naive Bayes predictions
    nb = NaiveBayes()
    nb.fit(X_train, y_train)
    nb_results = nb.predict(X_test)

    # Output formatting
    print(f"\nNaive Bayes Predictions with {split_ratio[0] * 100:.0f}% train and {split_ratio[1] * 100:.0f}% test split:")
    for i, (test_point, prediction, probabilities) in enumerate(nb_results[:5], 1):
        print(f"Test Point {i}: {test_point}")
        print(f"  Predicted Class: {prediction}")
        print(f"  Class Probabilities: {probabilities}\n")

    # Calculate and display accuracy
    y_pred = nb.predict_classes(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    print(f"Accuracy: {accuracy * 100:.2f}%\n")

# User input prediction
def user_input_prediction(nb):
    user_input = []
    print("\n--- Predict Diabetes Outcome ---")
    for column in data.columns[:-1]:  # Exclude 'Outcome' column
        value = float(input(f"Enter value for {column}: "))
        user_input.append(value)

    prediction, probabilities = nb.predict([np.array(user_input)])[0][1:]
    print(f"\nUser Input: {user_input}")
    print(f"Predicted Class: {prediction}")
    print(f"Class Probabilities: {probabilities}")

# Call user input prediction function
user_input_prediction(nb)


Columns in the dataset: Index(['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin',
       'BMI', 'DiabetesPedigreeFunction', 'Age', 'Outcome'],
      dtype='object')

Naive Bayes Predictions with 70% train and 30% test split:
Test Point 1: [  6.    98.    58.    33.   190.    34.     0.43  43.  ]
  Predicted Class: 0
  Class Probabilities: {0: -28.115894202944464, 1: -29.15233900512451}

Test Point 2: [  2.    112.     75.     32.      0.     35.7     0.148  21.   ]
  Predicted Class: 0
  Class Probabilities: {0: -27.308653393020844, 1: -30.06515998791621}

Test Point 3: [  2.    108.     64.      0.      0.     30.8     0.158  21.   ]
  Predicted Class: 0
  Class Probabilities: {0: -27.513058324447, 1: -31.07608137722258}

Test Point 4: [  8.    107.     80.      0.      0.     24.6     0.856  34.   ]
  Predicted Class: 0
  Class Probabilities: {0: -29.398533608635848, 1: -30.972797159262}

Test Point 5: [  7.   136.    90.     0.     0.    29.9    0.21  50.  ]
  Pr

In [None]:
# Function to implement KNN algorithm
def knn_predict(X_train, y_train, X_test, k):
    # Calculate distances between test points and all training points
    distances = np.sqrt(((X_train - X_test[:, np.newaxis])**2).sum(axis=2))
    # Find the indices of the k nearest neighbors
    nearest_neighbor_ids = np.argsort(distances, axis=1)[:, :k]
    # Get the classes of the nearest neighbors
    nearest_neighbor_classes = y_train[nearest_neighbor_ids].astype(int)
    # Predict the class by majority vote
    predictions = np.array([np.argmax(np.bincount(classes)) for classes in nearest_neighbor_classes])
    return predictions

# Load the dataset
file_path = '/content/diabetes.xlsx'
data = pd.read_excel(file_path)

# Display column names
print("Columns in the dataset:", data.columns)

# Split data and perform classification for different split ratios
splits = [(0.7, 0.3), (0.8, 0.2)]
X = data.drop('Outcome', axis=1).values
y = data['Outcome'].values

for split_ratio in splits:
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=split_ratio[1], random_state=42)

    # Ask the user for the value of k
    k = int(input(f"Enter the value of k for the KNN classifier (split {split_ratio[0] * 100:.0f}% train / {split_ratio[1] * 100:.0f}% test): "))

    # Predict on the test set using our KNN implementation
    y_pred = knn_predict(X_train, y_train, X_test, k)

    # Evaluate accuracy
    accuracy = np.mean(y_pred == y_test)
    print(f"\nAccuracy of the KNN classifier with k={k} on split {split_ratio[0] * 100:.0f}% train / {split_ratio[1] * 100:.0f}% test: {accuracy * 100:.2f}%\n")

# Function to predict based on user input
def user_input_prediction_knn(X_train, y_train, k):
    user_input = []
    print("\n--- Predict Diabetes Outcome using KNN ---")
    for column in data.columns[:-1]:  # Exclude 'Outcome' column
        value = float(input(f"Enter value for {column}: "))
        user_input.append(value)

    user_input = np.array(user_input).reshape(1, -1)
    user_prediction = knn_predict(X_train, y_train, user_input, k)
    class_name = 'Class 1' if user_prediction == 1 else 'Class 0'
    print(f"\nUser Input: {user_input.flatten()}")
    print(f"The KNN classifier predicts: {class_name}")

# Call user input prediction function
user_input_prediction_knn(X_train, y_train, k)


Columns in the dataset: Index(['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin',
       'BMI', 'DiabetesPedigreeFunction', 'Age', 'Outcome'],
      dtype='object')

Accuracy of the KNN classifier with k=3 on split 70% train / 30% test: 67.53%

