# Naive Bayes

Probabilistic classifier based on baye's theorem with the naive assumption of independence between every pair of features. It's called naive because it simplifies the calculation by assuming that the presence of one feature is independent of the presence of any other feature.

In [1]:
import numpy as np
from sklearn import datasets
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

class NaiveBayes:
    def fit(self, X, y):
        n_samples, n_features = X.shape
        self.classes = np.unique(y)
        n_classes = len(self.classes)

        self.mean = np.zeros((n_classes, n_features), dtype=np.float64)
        self.var = np.zeros((n_classes, n_features), dtype=np.float64)
        self.priors = np.zeros(n_classes, dtype=np.float64)

        for index, _class in enumerate(self.classes):
            X_c = X[y == _class]
            self.mean[index,:] = X_c.mean(axis=0)
            self.var[index,:] = X_c.var(axis=0)
            self.priors[index] = X_c.shape[0] / float(n_samples)

    def _probability_density(self, index, x):
        """Probability density function of a normal distribution"""
        mean = self.mean[index]
        var = self.var[index]
        numerator = np.exp(-((x - mean) ** 2) / (2 * var))
        denominator = np.sqrt(2 * np.pi * var)
        return numerator / denominator

    def _predict(self, x):
        posteriors = []
        n_classes = len(self.classes)

        for i in range(n_classes):
            prior = np.log(self.priors[i])
            posterior = np.sum(np.log(self._probability_density(i, x)))
            posterior += prior
            posteriors.append(posterior)

        return self.classes[np.argmax(posteriors)]

    def predict(self, X):
        return [self._predict(x) for x in X]

X, y = datasets.make_classification(
    n_samples=1000, n_features=10, n_classes=2, random_state=10
)

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=10
)

classifier = NaiveBayes()
classifier.fit(X_train, y_train)

predictions = classifier.predict(X_test)

accuracy = accuracy_score(y_test, predictions)
print("Accuracy:", accuracy)


Accuracy: 0.83
