In [1]:
#1 i step by step implementation
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

iris = load_iris()
X = iris.data
y = iris.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

class ManualGaussianNB:

    def fit(self, X, y):
        self.classes = np.unique(y)
        self.mean = {}
        self.var = {}
        self.priors = {}

        for c in self.classes:
            X_c = X[y == c]
            self.mean[c] = np.mean(X_c, axis=0)
            self.var[c] = np.var(X_c, axis=0)
            self.priors[c] = X_c.shape[0] / X.shape[0]

    def gaussian_pdf(self, x, mean, var):
        eps = 1e-6
        numerator = np.exp(- (x - mean)**2 / (2 * (var + eps)))
        denominator = np.sqrt(2 * np.pi * (var + eps))
        return numerator / denominator

    def predict(self, X):
        y_pred = []
        for x in X:
            posteriors = []
            for c in self.classes:
                prior = np.log(self.priors[c])
                conditional = np.sum(np.log(self.gaussian_pdf(x, self.mean[c], self.var[c])))
                posterior = prior + conditional
                posteriors.append(posterior)
            y_pred.append(self.classes[np.argmax(posteriors)])
        return np.array(y_pred)

model = ManualGaussianNB()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
print("Manual Gaussian NB Accuracy:", accuracy_score(y_test, y_pred))


Manual Gaussian NB Accuracy: 1.0


In [2]:
#1 (ii) Gaussian Naive Bayes Inbuilt
from sklearn.naive_bayes import GaussianNB

model = GaussianNB()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
print("Sklearn GaussianNB Accuracy:", accuracy_score(y_test, y_pred))



Sklearn GaussianNB Accuracy: 1.0


In [3]:
#2
from sklearn.model_selection import GridSearchCV
from sklearn.neighbors import KNeighborsClassifier

param_grid = {'n_neighbors': list(range(1, 21))}
knn = KNeighborsClassifier()
grid = GridSearchCV(knn, param_grid, cv=5, scoring='accuracy')
grid.fit(X, y)

print("Best K value:", grid.best_params_)
print("Best Accuracy:", grid.best_score_)


Best K value: {'n_neighbors': 6}
Best Accuracy: 0.9800000000000001
