In [1]:
# Lab Assignment - 6
# Gaussian Naïve Bayes Classifier and GridSearchCV

# Importing Required Libraries
import numpy as np
import pandas as pd
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV

# ------------------------------------------------------------
# Q1. Gaussian Naïve Bayes Classifier on the Iris Dataset
# ------------------------------------------------------------

print("=== Gaussian Naïve Bayes Classifier on Iris Dataset ===")

# Load the Iris Dataset
iris = datasets.load_iris()
X = iris.data
y = iris.target

# Splitting Data into Training and Test Sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# ============================================================
# (i) Step-by-Step Implementation of Gaussian Naïve Bayes
# ============================================================

print("\n--- Step-by-Step Implementation ---")

class GaussianNaiveBayesManual:
    def fit(self, X, y):
        self.classes = np.unique(y)
        self.mean = {}
        self.var = {}
        self.priors = {}
        for c in self.classes:
            X_c = X[y == c]
            self.mean[c] = X_c.mean(axis=0)
            self.var[c] = X_c.var(axis=0)
            self.priors[c] = X_c.shape[0] / X.shape[0]
    
    def gaussian_density(self, class_idx, x):
        mean = self.mean[class_idx]
        var = self.var[class_idx]
        numerator = np.exp(- (x - mean)**2 / (2 * var))
        denominator = np.sqrt(2 * np.pi * var)
        return numerator / denominator
    
    def predict_single(self, x):
        posteriors = []
        for c in self.classes:
            prior = np.log(self.priors[c])
            class_conditional = np.sum(np.log(self.gaussian_density(c, x)))
            posterior = prior + class_conditional
            posteriors.append(posterior)
        return self.classes[np.argmax(posteriors)]
    
    def predict(self, X):
        return np.array([self.predict_single(x) for x in X])

# Create an instance and train
gnb_manual = GaussianNaiveBayesManual()
gnb_manual.fit(X_train, y_train)

# Predictions
y_pred_manual = gnb_manual.predict(X_test)

# Evaluation
print("Accuracy (Manual Implementation):", accuracy_score(y_test, y_pred_manual))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred_manual))
print("\nClassification Report:\n", classification_report(y_test, y_pred_manual))

# ============================================================
# (ii) Using In-built GaussianNB Classifier
# ============================================================

print("\n--- Using In-built GaussianNB Classifier ---")

gnb_builtin = GaussianNB()
gnb_builtin.fit(X_train, y_train)
y_pred_builtin = gnb_builtin.predict(X_test)

print("Accuracy (In-built GaussianNB):", accuracy_score(y_test, y_pred_builtin))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred_builtin))
print("\nClassification Report:\n", classification_report(y_test, y_pred_builtin))

=== Gaussian Naïve Bayes Classifier on Iris Dataset ===

--- Step-by-Step Implementation ---
Accuracy (Manual Implementation): 0.9777777777777777

Confusion Matrix:
 [[19  0  0]
 [ 0 12  1]
 [ 0  0 13]]

Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        19
           1       1.00      0.92      0.96        13
           2       0.93      1.00      0.96        13

    accuracy                           0.98        45
   macro avg       0.98      0.97      0.97        45
weighted avg       0.98      0.98      0.98        45


--- Using In-built GaussianNB Classifier ---
Accuracy (In-built GaussianNB): 0.9777777777777777

Confusion Matrix:
 [[19  0  0]
 [ 0 12  1]
 [ 0  0 13]]

Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        19
           1       1.00      0.92      0.96        13
           2       0.93      1.00      0.96      

In [2]:
# ------------------------------------------------------------
# Q2. GridSearchCV for Finding Best K in KNN Classifier
# ------------------------------------------------------------

print("\n=== GridSearchCV for KNN Classifier ===")

# Load another dataset (e.g., Breast Cancer Dataset)
cancer = datasets.load_breast_cancer()
X_cancer = cancer.data
y_cancer = cancer.target

# Standardizing the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_cancer)

# Split into training and testing
X_train_c, X_test_c, y_train_c, y_test_c = train_test_split(X_scaled, y_cancer, test_size=0.3, random_state=42)

# Initialize KNN Classifier
knn = KNeighborsClassifier()

# Define the parameter grid for K
param_grid = {'n_neighbors': np.arange(1, 31)}

# Perform Grid Search with Cross Validation
grid_search = GridSearchCV(knn, param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train_c, y_train_c)

# Best Parameters and Score
print("Best K value found by GridSearchCV:", grid_search.best_params_)
print("Best Cross-Validation Accuracy:", grid_search.best_score_)

# Test the model with best K
best_knn = grid_search.best_estimator_
y_pred_knn = best_knn.predict(X_test_c)

# Evaluate
print("\nTest Accuracy with Best K:", accuracy_score(y_test_c, y_pred_knn))
print("\nConfusion Matrix:\n", confusion_matrix(y_test_c, y_pred_knn))
print("\nClassification Report:\n", classification_report(y_test_c, y_pred_knn))


=== GridSearchCV for KNN Classifier ===
Best K value found by GridSearchCV: {'n_neighbors': np.int64(8)}
Best Cross-Validation Accuracy: 0.9647784810126583

Test Accuracy with Best K: 0.9649122807017544

Confusion Matrix:
 [[ 60   3]
 [  3 105]]

Classification Report:
               precision    recall  f1-score   support

           0       0.95      0.95      0.95        63
           1       0.97      0.97      0.97       108

    accuracy                           0.96       171
   macro avg       0.96      0.96      0.96       171
weighted avg       0.96      0.96      0.96       171

