<a href="https://colab.research.google.com/github/katyayani-jha/ML-LAB-CS12/blob/main/ML_Lab_6.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Gaussian Naive Bayes Step-by-Step

In [None]:
import numpy as np
import pandas as pd
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

In [None]:
iris = datasets.load_iris()
X = iris.data
y = iris.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

In [None]:
means = []
variances = []
classes = np.unique(y_train)

for c in classes:
    X_c = X_train[y_train == c]
    means.append(X_c.mean(axis=0))
    variances.append(X_c.var(axis=0))

means = np.array(means)
variances = np.array(variances)

In [None]:
# Gaussian probability density function (PDF)
def gaussian_pdf(x, mean, var):
    numerator = np.exp(- ((x - mean) ** 2) / (2 * var))
    denominator = np.sqrt(2 * np.pi * var)
    return numerator / denominator

In [None]:
priors = np.array([np.mean(y_train == c) for c in classes])

In [None]:
def predict(X):
    predictions = []
    for x in X:
        posteriors = []
        for idx, c in enumerate(classes):
            prior = np.log(priors[idx])
            likelihood = np.sum(np.log(gaussian_pdf(x, means[idx], variances[idx])))
            posterior = prior + likelihood
            posteriors.append(posterior)
        predictions.append(np.argmax(posteriors))
    return np.array(predictions)

y_pred_step_by_step = predict(X_test)

In [None]:
accuracy_step_by_step = accuracy_score(y_test, y_pred_step_by_step)
classification_report_step_by_step = classification_report(y_test, y_pred_step_by_step)

print("Accuracy: ", accuracy_step_by_step)
print()
print(classification_report_step_by_step)

Accuracy:  0.9666666666666667

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      0.90      0.95        10
           2       0.91      1.00      0.95        10

    accuracy                           0.97        30
   macro avg       0.97      0.97      0.97        30
weighted avg       0.97      0.97      0.97        30



Gaussian Naive Bayes Inbuilt

In [None]:
from sklearn.naive_bayes import GaussianNB

In [None]:
gnb = GaussianNB()

gnb.fit(X_train, y_train)

y_pred_inbuilt = gnb.predict(X_test)

In [None]:
accuracy_inbuilt = accuracy_score(y_test, y_pred_inbuilt)
classification_report_inbuilt = classification_report(y_test, y_pred_inbuilt)

print("Accuracy: ", accuracy_inbuilt)
print()
print(classification_report_inbuilt)

Accuracy:  0.9666666666666667

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      0.90      0.95        10
           2       0.91      1.00      0.95        10

    accuracy                           0.97        30
   macro avg       0.97      0.97      0.97        30
weighted avg       0.97      0.97      0.97        30



GridSearchCV KNN

In [None]:
import numpy as np
from sklearn import datasets
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

In [None]:
iris = datasets.load_iris()
X = iris.data
y = iris.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

In [None]:
knn = KNeighborsClassifier()

# Define the parameter grid for 'K' (number of neighbors)
# Test K values from 1 to 30
param_grid = {'n_neighbors': np.arange(1, 31)}

# Set up GridSearchCV with cross-validation (5-fold)
grid_search = GridSearchCV(knn, param_grid, cv=5, scoring='accuracy')

grid_search.fit(X_train, y_train)

best_k = grid_search.best_params_
best_score = grid_search.best_score_

print("Best k:", best_k['n_neighbors'])
print("Best score:", best_score)

Best k: 6
Best score: 0.9833333333333334


In [None]:
best_knn = grid_search.best_estimator_

y_pred = best_knn.predict(X_test)

test_accuracy = accuracy_score(y_test, y_pred)

test_accuracy

0.9666666666666667