In [15]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [16]:
iris = load_iris()
X = iris.data
y = iris.target

print("Features:", iris.feature_names)
print("Classes:", iris.target_names)
print("Shape of data:", X.shape)

Features: ['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']
Classes: ['setosa' 'versicolor' 'virginica']
Shape of data: (150, 4)


In [17]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42)

In [18]:
def separate_by_class(X, y):
  classes = np.unique(y)
  return {c: X[y == c] for c in classes}

def summarize_by_class(X, y):
    separated = separate_by_class(X, y)
    summaries = {}
    for c, rows in separated.items():
        summaries[c] = [(np.mean(f), np.var(f)) for f in zip(*rows)]
    return summaries

def gaussian_prob(x, mean, var):
    eps = 1e-6
    coeff = 1 / np.sqrt(2 * np.pi * var + eps)
    exponent = np.exp(-((x - mean) ** 2) / (2 * var + eps))
    return coeff * exponent

def calculate_class_probs(summaries, X):
    probs = {}
    for c, class_summaries in summaries.items():
        probs[c] = 1
        for i in range(len(class_summaries)):
            mean, var = class_summaries[i]
            probs[c] *= gaussian_prob(X[i], mean, var)
    return probs

def predict(summaries, X):
    probs = calculate_class_probs(summaries, X)
    return max(probs, key=probs.get)

In [19]:
summaries = summarize_by_class(X_train, y_train)
y_pred_manual = [predict(summaries, x) for x in X_test]

print("Manual GaussianNB Accuracy:", accuracy_score(y_test, y_pred_manual))

Manual GaussianNB Accuracy: 0.9777777777777777


In [23]:
from sklearn.naive_bayes import GaussianNB

model = GaussianNB()
model.fit(X_train, y_train)

y_pred_builtin = model.predict(X_test)

print("In-built GaussianNB Accuracy:", accuracy_score(y_test, y_pred_builtin))

In-built GaussianNB Accuracy: 0.9777777777777777


In [27]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

iris = load_iris()
X, y = iris.data, iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, stratify=y, random_state=42)

knn = KNeighborsClassifier()
param_grid = {'n_neighbors': range(1, 21)}
grid = GridSearchCV(knn, param_grid, cv=5)
grid.fit(X_train, y_train)

best_knn = grid.best_estimator_
y_pred = best_knn.predict(X_test)

print("Best params:", grid.best_params_)
print("CV best score:", grid.best_score_)
print("Test accuracy:", accuracy_score(y_test, y_pred))

Best params: {'n_neighbors': 9}
CV best score: 0.980952380952381
Test accuracy: 0.9555555555555556
