# Gaussian Naive Bayes Classifier

In [15]:
import pandas as pd
import numpy as np
from sklearn.naive_bayes import GaussianNB
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [18]:
x, y = load_iris(return_X_y=True)
x_train, x_test, y_train, y_test = train_test_split(x, y, 
                                test_size=0.2, 
                                random_state=0)

In [21]:
classifier1 = GaussianNB()
classifier1.fit(x_train, y_train)
y_predicted = classifier1.predict(x_test)
accuracy = accuracy_score(y_test, y_predicted)
print("Accuracy:", accuracy)

Accuracy: 0.9666666666666667


# Gaussian Naive Bayes from Scratch with Python

In [36]:
class GaussianNBClassifier:
    def fit(self, x, y):
        self.classes = np.unique(y)
        self.num_classes = len(self.classes)
        self.num_features = x.shape[1]

        #p(y)
        self.priors = np.zeros(self.num_classes)

        #p(x|y)
        self.means = np.zeros((self.num_classes, self.num_features))
        self.variances = np.zeros((self.num_classes, self.num_features))
        for i, c in enumerate(self.classes):
            x_c = x[y == c]
            self.priors[i] = x_c.shape[0] / x.shape[0]
            self.means[i] = x_c.mean(axis=0)
            self.variances[i] = x_c.var(axis=0)

        
    def predic(self, x):
        num_samples = x.shape[0] 
        posteriors = np.zeros((num_samples, self.num_classes))

        for i, c in enumerate(self.classes):
            prior = np.log(self.priors[i])
            posterior = np.sum(np.log(self._gaussian_pdf(x, self.means[i], self.variances[i])), axis=1)
            posteriors[:, i] = prior + posterior

        return self.classes[np.argmax(posteriors, axis=1)]

    def _gaussian_pdf(self, x, mean, variance):
        return (1 / np.sqrt(2 * np.pi * variance)) * np.exp(-((x - mean) ** 2) / (2 * variance))

In [38]:
class GaussianNBClassifier:
    def fit(self, x, y):
        self.classes = np.unique(y)
        self.num_classes = len(self.classes)
        self.num_features = x.shape[1]

        # p(y)
        self.priors = np.zeros(self.num_classes)

        # p(x|y)
        self.means = np.zeros((self.num_classes, self.num_features))
        self.variances = np.zeros((self.num_classes, self.num_features))
        for i, c in enumerate(self.classes):
            x_c = x[y == c]
            self.priors[i] = x_c.shape[0] / x.shape[0]
            self.means[i] = x_c.mean(axis=0)
            self.variances[i] = x_c.var(axis=0)

    def predict(self, x):
        num_samples = x.shape[0]
        posteriors = np.zeros((num_samples, self.num_classes))

        for i, c in enumerate(self.classes):
            prior = np.log(self.priors[i])
            likelihood = np.sum(np.log(self._gaussian_pdf(x, self.means[i], self.variances[i])), axis=1)
            posteriors[:, i] = prior + likelihood

        return self.classes[np.argmax(posteriors, axis=1)]

    def _gaussian_pdf(self, x, mean, variance):
        eps = 1e-9   
        return (1 / np.sqrt(2 * np.pi * (variance + eps))) * np.exp(-((x - mean) ** 2) / (2 * (variance + eps)))


In [39]:
classifier2 = GaussianNBClassifier()
classifier2.fit(x_train, y_train)
y_predicted2 = classifier2.predict(x_test)
accuracy2 = accuracy_score(y_test, y_predicted2)
print("Accuracy:", accuracy2)

Accuracy: 0.9666666666666667
