**Naive Bayes**

Firstly show an example from sklearn

In [1]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB

X, y = load_iris(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=0)
gnb = GaussianNB()
y_pred = gnb.fit(X_train, y_train).predict(X_test)
print("Number of mislabeled points out of a total %d points : %d"
      % (X_test.shape[0], (y_test != y_pred).sum()))

Number of mislabeled points out of a total 75 points : 4


In [16]:
import numpy as np

class MyNB:
    def __init__(self):
        pass

    def fit(self, X, y):
        self.classes = np.unique(y)
        self.mu = {}
        self.sigma = {}
        self.py = {}

        for y_class in self.classes:
            mask = y == y_class
            X_in_class = X[mask]
            self.mu[y_class] = np.mean(X_in_class, axis=0)
            self.sigma[y_class] = np.std(X_in_class, axis=0)
            self.py[y_class] = len(X_in_class) / len(X)      

    def predict(self, X):
        posteriors = []
        for y_class in self.classes:
            sigma = self.sigma[y_class]
            mu = self.mu[y_class]
            p_x =  np.prod(np.exp((-(X-mu)**2)/(2*np.pow(sigma, 2)))/sigma, axis=1).reshape(-1,1)
            posteriors.append(p_x * self.py[y_class])

        MAP = np.argmax(np.hstack(posteriors), axis=1)
        return self.classes[MAP]

gnb = MyNB()
gnb.fit(X_train, y_train)
y_pred = gnb.predict(X_test)
print("Number of mislabeled points out of a total %d points : %d"
      % (X_test.shape[0], (y_test != y_pred).sum()))


Number of mislabeled points out of a total 75 points : 4
