In [5]:
#1.part1 step by step implementation
# Step 1: Import libraries
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report


In [7]:
# Step 2: Load the Iris dataset
iris = load_iris()
X = iris.data
y = iris.target

In [9]:
# Step 3: Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [11]:
# Step 4: Implement Gaussian Naive Bayes from scratch
class GaussianNaiveBayes:
    def fit(self, X, y):
        self.classes = np.unique(y)
        self.mean = {}
        self.var = {}
        self.priors = {}
        
        for c in self.classes:
            X_c = X[y == c]
            self.mean[c] = X_c.mean(axis=0)
            self.var[c] = X_c.var(axis=0)
            self.priors[c] = X_c.shape[0] / X.shape[0]
    
    def _pdf(self, class_idx, x):
        mean = self.mean[class_idx]
        var = self.var[class_idx]
        numerator = np.exp(-((x - mean) ** 2) / (2 * var))
        denominator = np.sqrt(2 * np.pi * var)
        return numerator / denominator
    
    def predict(self, X):
        y_pred = []
        for x in X:
            posteriors = []
            for c in self.classes:
                prior = np.log(self.priors[c])
                likelihood = np.sum(np.log(self._pdf(c, x)))
                posterior = prior + likelihood
                posteriors.append(posterior)
            y_pred.append(self.classes[np.argmax(posteriors)])
        return np.array(y_pred)

In [13]:
# Step 5: Train and evaluate
gnb_scratch = GaussianNaiveBayes()
gnb_scratch.fit(X_train, y_train)
y_pred_scratch = gnb_scratch.predict(X_test)

print("Accuracy (Scratch):", accuracy_score(y_test, y_pred_scratch))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_scratch))
print("Classification Report:\n", classification_report(y_test, y_pred_scratch))

Accuracy (Scratch): 1.0
Confusion Matrix:
 [[10  0  0]
 [ 0  9  0]
 [ 0  0 11]]
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      1.00      1.00         9
           2       1.00      1.00      1.00        11

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30



In [15]:
#part 2 using in-built function
from sklearn.naive_bayes import GaussianNB

In [17]:
# Create and train the model
gnb_builtin = GaussianNB()
gnb_builtin.fit(X_train, y_train)

# Predictions
y_pred_builtin = gnb_builtin.predict(X_test)


In [19]:
# Evaluation
print("Accuracy (Built-in):", accuracy_score(y_test, y_pred_builtin))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_builtin))
print("Classification Report:\n", classification_report(y_test, y_pred_builtin))

Accuracy (Built-in): 1.0
Confusion Matrix:
 [[10  0  0]
 [ 0  9  0]
 [ 0  0 11]]
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      1.00      1.00         9
           2       1.00      1.00      1.00        11

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30



In [21]:
#2
from sklearn.datasets import load_iris
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV

In [23]:
# Load dataset
iris = load_iris()
X = iris.data
y = iris.target

# Define parameter grid for K
param_grid = {'n_neighbors': np.arange(1, 21)}

# Create KNN model
knn = KNeighborsClassifier()

In [25]:
# Apply GridSearchCV
grid_search = GridSearchCV(knn, param_grid, cv=5)
grid_search.fit(X, y)

# Best K value
print("Best K value found:", grid_search.best_params_['n_neighbors'])
print("Best Cross-Validation Score:", grid_search.best_score_)


Best K value found: 6
Best Cross-Validation Score: 0.9800000000000001
