In [1]:
#q1 i)Step-by-Step Implementation of Gaussian Naïve Bayes Classifier
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Load dataset
iris = load_iris()
X, y = iris.data, iris.target

# Split the dataset into train and test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

class GaussianNaiveBayes:
    def fit(self, X, y):
        self.classes = np.unique(y)
        self.mean = np.zeros((len(self.classes), X.shape[1]))
        self.var = np.zeros((len(self.classes), X.shape[1]))
        self.priors = np.zeros(len(self.classes))

        for idx, c in enumerate(self.classes):
            X_c = X[y == c]
            self.mean[idx, :] = X_c.mean(axis=0)
            self.var[idx, :] = X_c.var(axis=0)
            self.priors[idx] = X_c.shape[0] / float(X.shape[0])

    def _calculate_likelihood(self, class_idx, x):
        mean = self.mean[class_idx]
        var = self.var[class_idx]
        numerator = np.exp(-((x - mean) ** 2) / (2 * var))
        denominator = np.sqrt(2 * np.pi * var)
        return numerator / denominator

    def _calculate_posterior(self, x):
        posteriors = []
        for idx, _ in enumerate(self.classes):
            prior = np.log(self.priors[idx])
            conditional = np.sum(np.log(self._calculate_likelihood(idx, x)))
            posterior = prior + conditional
            posteriors.append(posterior)
        return self.classes[np.argmax(posteriors)]

    def predict(self, X):
        return [self._calculate_posterior(x) for x in X]

# Train the model
gnb = GaussianNaiveBayes()
gnb.fit(X_train, y_train)

# Predict and evaluate
y_pred = gnb.predict(X_test)
print(f"Accuracy: {accuracy_score(y_test, y_pred)}")


Accuracy: 0.9777777777777777


In [2]:
# ii)Using In-Built Function for Gaussian Naïve Bayes
from sklearn.naive_bayes import GaussianNB

# Instantiate the model
gnb = GaussianNB()

# Train the model
gnb.fit(X_train, y_train)

# Predict and evaluate
y_pred = gnb.predict(X_test)
print(f"Accuracy: {accuracy_score(y_test, y_pred)}")



Accuracy: 0.9777777777777777


In [3]:
# 2. Hyperparameter Tuning with GridSearchCV
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV

# Set up the parameter grid to test values of K from 1 to 10
param_grid = {'n_neighbors': np.arange(1, 11)}

# Initialize the KNN classifier
knn = KNeighborsClassifier()

# Initialize GridSearchCV with KNN and the parameter grid
grid_search = GridSearchCV(knn, param_grid, cv=5, scoring='accuracy')

# Fit GridSearchCV on the training data
grid_search.fit(X_train, y_train)

# Print the best parameter and best score
print(f"Best K: {grid_search.best_params_}")
print(f"Best Score: {grid_search.best_score_}")

# Use the best estimator to make predictions
best_knn = grid_search.best_estimator_
y_pred = best_knn.predict(X_test)
print(f"Test Accuracy with Best K: {accuracy_score(y_test, y_pred)}")


Best K: {'n_neighbors': 1}
Best Score: 0.9523809523809523
Test Accuracy with Best K: 1.0
