In [9]:
import pandas as pd 
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_iris

In [6]:
class NaiveBayes:
    def __init__(self):
        self.classes = None         # Unique class labels
        self.mean = {}              # Mean of each feature per class
        self.var = {}               # Variance of each feature per class
        self.priors = {}            # Prior probability of each class

    def fit(self, X, y):
        self.classes = np.unique(y)  # Get all class labels

        for c in self.classes:
            X_c = X[y == c]                     # Select rows where class == c
            self.mean[c] = X_c.mean(axis=0)     # Mean of each feature
            self.var[c] = X_c.var(axis=0) + 1e-9  # Variance (add small value to avoid /0)
            self.priors[c] = len(X_c) / len(X)  # Prior = count of class c / total samples

    def predict(self, X):
        predictions = []

        for x in X:
            class_probs = {}

            for c in self.classes:
                prior = np.log(self.priors[c])  # log(P(class))
                
                # Compute log Gaussian likelihood for each feature
                numerator = - (x - self.mean[c])**2 / (2 * self.var[c])
                denominator = np.log(np.sqrt(2 * np.pi * self.var[c]))
                total_log_likelihood = np.sum(numerator - denominator)

                class_probs[c] = prior + total_log_likelihood  # log(P(class) * P(x|class))

            predictions.append(max(class_probs, key=class_probs.get))  # Choose highest probability class

        return np.array(predictions)


In [None]:
data = load_iris()
X, y = data.data, data.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

model = NaiveBayes()
model.fit(X_train, y_train)
predictions = model.predict(X_test)

print("Accuracy:", (predictions == y_test).mean())

Accuracy: 0.9666666666666667
