### Bernoulli Naive-Bayes Model

In [1]:
import numpy as np
import pandas as pd

np.set_printoptions(precision=4, suppress=True)

In [2]:
np.random.seed(0)

data = np.random.rand(200,5)
data = np.round(data, decimals=0)

dataX = data[:, :-1]
datay = data[:, -1] * np.random.randint(1,4, size=(len(data), 1))

X_train = dataX[:150, :-1]
y_train = datay[:150, -1]

X_test = dataX[150:, :-1]
y_test = datay[150:, -1]

In [3]:
X_test.shape, y_test.shape

((50, 3), (50,))

Base Model

In [4]:
from sklearn.naive_bayes import BernoulliNB
gnb = BernoulliNB()
y_pred = gnb.fit(X_train, y_train).predict(X_test)

In [5]:
np.sum(y_pred == y_test) / len(y_test)

0.38

In [6]:
class Naive_Bayes:
    """Implements Bernoulli Naive-Bayes with Laplace Smoothing for multi-class target"""
    def compute_Px1yi(self, X, y, class_no):
        return (np.sum(X[y==class_no], axis=0) + 1) / (np.sum(y==class_no) + self.no_feat)
    
    def compute_Px0yi(self, X, y, class_no):
        return 1 - self.compute_Px1yi(X, y, class_no)
    
    def compute_Pyi(self, y, class_no):
        n = len(y)
        return (1/n) * np.sum(y==class_no)
    
    def fit(self, X, y):
        self.no_feat = X.shape[1]
        self.classes = (np.unique(y)).astype(int)
        self.no_classes = len(self.classes)
        
        # compute Px=i|y=i
        self.PhiXYd = dict()
        for class_id in range(self.no_classes):
            PhiX1yi = self.compute_Px1yi(X, y, self.classes[class_id])
            PhiX0yi = self.compute_Px0yi(X, y, self.classes[class_id])
            
            self.PhiXYd['PhiX1y'+str(class_id)] = PhiX1yi
            self.PhiXYd['PhiX0y'+str(class_id)] = PhiX0yi
        
        # compute Py=i
        self.Phiyi = dict()
        for class_id in range(self.no_classes):
            Phiyi = self.compute_Pyi(y, self.classes[class_id])
            
            self.Phiyi['Phiy' + str(class_id)] = Phiyi

    def predict_proba(self, X):
        n, d = X.shape

        assert(d == self.no_feat)
        
        # Initialize final Probablities of X belonging to the class
        self.Pyj = np.empty((n, self.no_classes)) 

        for class_id in range(self.no_classes):
            Pxyj_i = np.zeros(X.shape)
            for col in range(d):
                np.put(Pxyj_i[:, col], np.where(X[:, col] == 1), self.PhiXYd['PhiX1y'+str(class_id)][col])
                np.put(Pxyj_i[:, col], np.where(X[:, col] == 0), self.PhiXYd['PhiX0y'+str(class_id)][col])
            
            # Compute Px|y
            Pxyj = np.multiply.reduce(Pxyj_i, axis=1)
            
            # Compute Py|x = Px|y * Py
            Pyj = Pxyj * self.Phiyi['Phiy' + str(class_id)]
            
            self.Pyj[:, class_id] = Pyj
        return self.Pyj
    
    def predict(self, X):
        proba = self.predict_proba(X)
        class_indexes = np.argmax(proba, axis=1)
        
        # Replace index with class predictions
        vfunc = np.vectorize(lambda x: self.classes[x])
        class_predictions = vfunc(class_indexes)
        return class_predictions
    
    def generate_new_feature(self):
        "Source: https://www.quora.com/How-do-I-generate-data-using-a-Naive-Bayes-model"
        pass           
    

In [7]:
nb = Naive_Bayes()

In [8]:
nb.fit(X_train, y_train)

In [9]:
predictions = nb.predict(X_test)

In [10]:
predictions

array([1, 2, 2, 1, 2, 3, 3, 3, 2, 2, 1, 1, 1, 3, 3, 2, 2, 1, 3, 1, 1, 2,
       1, 2, 2, 3, 2, 1, 3, 1, 2, 2, 2, 2, 1, 1, 2, 2, 3, 1, 1, 3, 1, 3,
       2, 1, 1, 1, 3, 1])

In [11]:
np.sum(predictions == y_test) / len(y_test)

0.38

In [12]:
nb.predict_proba(X_test)[0:5]

array([[0.0589, 0.0298, 0.0434],
       [0.0368, 0.0454, 0.0322],
       [0.0368, 0.0454, 0.0322],
       [0.045 , 0.0324, 0.0264],
       [0.0368, 0.0454, 0.0322]])