In [1]:
import numpy as np

class NaiveBayes:
    def fit(self, X, y):
        n_samples, n_features = X.shape
        self.classes = np.unique(y)
        n_classes = len(self.classes)

        # priors (P(y))
        self.priors = {}
        self.mean = {}
        self.var = {}

        for c in self.classes:
            X_c = X[y == c]  # samples belonging to class c
            self.priors[c] = X_c.shape[0] / n_samples
            self.mean[c] = np.mean(X_c, axis=0)
            self.var[c] = np.var(X_c, axis=0)

    def _pdf(self, class_idx, x):
        """Gaussian probability density function"""
        mean = self.mean[class_idx]
        var = self.var[class_idx]
        numerator = np.exp(- (x - mean) ** 2 / (2 * var))
        denominator = np.sqrt(2 * np.pi * var)
        return numerator / denominator

    def _predict_single(self, x):
        posteriors = []

        for c in self.classes:
            # log(P(y))
            prior = np.log(self.priors[c])
            # sum log(P(x|y))
            class_conditional = np.sum(np.log(self._pdf(c, x)))
            posterior = prior + class_conditional
            posteriors.append(posterior)

        return self.classes[np.argmax(posteriors)]

    def predict(self, X):
        return np.array([self._predict_single(x) for x in X])


# ----------------------------
# Example Run
# ----------------------------
if __name__ == "__main__":
    from sklearn.datasets import load_iris
    from sklearn.model_selection import train_test_split

    # Load dataset
    data = load_iris()
    X, y = data.data, data.target

    # Train/test split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Train classifier
    nb = NaiveBayes()
    nb.fit(X_train, y_train)

    # Predictions
    predictions = nb.predict(X_test)

    # Accuracy
    acc = np.sum(predictions == y_test) / len(y_test)
    print("Accuracy:", acc)


Accuracy: 1.0
