In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score
import numpy as np

file_path = '/content/drive/MyDrive/Colab Notebooks/DATASETS/iris.data.csv'
iris_data = pd.read_csv(file_path, header=None)

X = iris_data.iloc[:, :-1].values
y = iris_data.iloc[:, -1].values

label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

class GaussianNBManual:
    def fit(self, X, y):
        self.classes = np.unique(y)
        self.means = {}
        self.variances = {}
        self.priors = {}

        for c in self.classes:
            X_c = X[y == c]
            self.means[c] = np.mean(X_c, axis=0)
            self.variances[c] = np.var(X_c, axis=0)
            self.priors[c] = X_c.shape[0] / X.shape[0]

    def predict(self, X):
        predictions = [self._predict_instance(x) for x in X]
        return np.array(predictions)

    def _predict_instance(self, x):
        posteriors = []
        for c in self.classes:
            prior = np.log(self.priors[c])
            class_likelihood = np.sum(np.log(self._gaussian_likelihood(x, c)))
            posterior = prior + class_likelihood
            posteriors.append(posterior)
        return self.classes[np.argmax(posteriors)]

    def _gaussian_likelihood(self, x, c):
        mean = self.means[c]
        variance = self.variances[c]
        return (1 / np.sqrt(2 * np.pi * variance)) * np.exp(-(x - mean)**2 / (2 * variance))

gnb_manual = GaussianNBManual()
gnb_manual.fit(X_train, y_train)
y_pred_manual = gnb_manual.predict(X_test)

manual_accuracy = np.mean(y_pred_manual == y_test)

print(f"Step-by-step Gaussian Naïve Bayes Accuracy: {manual_accuracy * 100:.2f}%")

gnb_builtin = GaussianNB()
gnb_builtin.fit(X_train, y_train)
y_pred_builtin = gnb_builtin.predict(X_test)

builtin_accuracy = accuracy_score(y_test, y_pred_builtin)

print(f"In-built Gaussian Naïve Bayes Accuracy: {builtin_accuracy * 100:.2f}%")


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

file_path = '/content/drive/MyDrive/Colab Notebooks/DATASETS/iris.data.csv'
data = pd.read_csv(file_path, header=None)

X = data.iloc[:, :-1]
y = data.iloc[:, -1]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

param_grid = {'n_neighbors': range(1, 21)}

knn = KNeighborsClassifier()

grid_search = GridSearchCV(estimator=knn, param_grid=param_grid, cv=5, scoring='accuracy', n_jobs=-1)

grid_search.fit(X_train, y_train)

best_k = grid_search.best_params_['n_neighbors']
best_score = grid_search.best_score_

best_knn = grid_search.best_estimator_
test_accuracy = accuracy_score(y_test, best_knn.predict(X_test))

print("Best k:", best_k)
print("Best cross-validation accuracy:", best_score)
print("Test set accuracy:", test_accuracy)
