In [1]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score


In [2]:
iris = load_iris()
X = iris.data
y = iris.target


In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [6]:
classes = np.unique(y_train)
mean = {}
var = {}
prior = {} #P(y) 

for c in classes:
    X_c = X_train[y_train == c]
    mean[c] = X_c.mean(axis=0)
    var[c] = X_c.var(axis=0)
    prior[c] = X_c.shape[0] / X_train.shape[0]

In [7]:
def gaussian_pdf(x, mean, var):
    eps = 1e-6  # to prevent division by zero
    coeff = 1.0 / np.sqrt(2.0 * np.pi * var + eps)
    exponent = np.exp(- (x - mean)**2 / (2 * var + eps))
    return coeff * exponent

In [8]:
def predict(X):
    y_pred = []
    for x in X:
        posteriors = []
        for c in classes:
            prior_prob = np.log(prior[c])
            class_conditional = np.sum(np.log(gaussian_pdf(x, mean[c], var[c])))
            posterior = prior_prob + class_conditional
            posteriors.append(posterior)
        y_pred.append(classes[np.argmax(posteriors)])
    return np.array(y_pred)

In [9]:
y_pred = predict(X_test)
print("Accuracy (Manual Gaussian NB):", accuracy_score(y_test, y_pred))

Accuracy (Manual Gaussian NB): 0.9777777777777777


In [5]:
#directly using Gaussian Naive Bayes 
from sklearn.naive_bayes import GaussianNB

# Create and train the model
gnb = GaussianNB()
gnb.fit(X_train, y_train)

# Predict and evaluate
y_pred_builtin = gnb.predict(X_test)
print("Accuracy (Built-in GaussianNB):", accuracy_score(y_test, y_pred_builtin))


Accuracy (Built-in GaussianNB): 0.9777777777777777


In [10]:
from sklearn.datasets import load_wine
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV, train_test_split

wine = load_wine()
X, y = wine.data, wine.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

knn = KNeighborsClassifier()
param_grid = {'n_neighbors': np.arange(1, 21)}  # Try K=1 to 20
grid_search = GridSearchCV(knn, param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train, y_train)

print("Best K value:", grid_search.best_params_)
print("Best Cross-Validation Accuracy:", grid_search.best_score_)

# Evaluate on test set
best_knn = grid_search.best_estimator_
test_accuracy = best_knn.score(X_test, y_test)
print("Test Set Accuracy with best K:", test_accuracy)


Best K value: {'n_neighbors': 1}
Best Cross-Validation Accuracy: 0.7023333333333334
Test Set Accuracy with best K: 0.7962962962962963


In [None]:
#Explanation:
'''
param_grid ---> defines which parameters to search (here, n_neighbors).

cv=5 â†’ 5-fold cross-validation.

GridSearchCV tries all combinations and returns the best model.'''