In [1]:
import math
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from scipy.stats import norm
from sklearn.metrics import confusion_matrix

In [2]:
variable1 = np.random.randint(2, size = 1000)
variable2 = np.random.randint(2, size = 1000)
Class = np.random.randint(2, size = 1000)

In [3]:
df = pd.DataFrame({'X1': variable1, 'X2': variable2, 'y': Class})

In [4]:
X = df.values[:, :-1]
y = df.values[:, -1]

In [5]:
class NaiveBayes: 
    
    def __init__(self, X, y, gamma = 0):
        
        self.X = X
        self.y = y
        self.gamma = gamma     # initially laplace smoothening variable (gamma) set to zero 
        
    def trainTestSplit(self):
        #splitting the sample 
        self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(self.X, self.y, 
                                                                                test_size = 0.4,
                                                                                random_state = 10)
        #Adding new test data which is not present in train data to check laplace smoothening
        X_test_new = np.array([[0, 2], [2, 0], [1, 2], [2, 1], [2, 2]])
        self.X_test = np.vstack([self.X_test, X_test_new])
        
        #Corresponding y_test values 
        y_test_new = np.array([0, 1, 0, 1, 0])
        self.y_test = np.hstack([y, y_test_new])
        
           
    def probability(self, X, prior, dist1, dist2):
        return prior * dist1 * dist2                 #Calculating the probability

    def fit(self):
        
        self.trainTestSplit()                      # Splitting the data 
        print("X_test:\n", self.X_test)

        self.X0_train = self.X_train[self.y_train == 0]     #Splitting the dataframe based on class 0
        self.X1_train = self.X_train[self.y_train == 1]     ##Splitting the dataframe based on class 1
        
        # X[0], class= 0
        self.X0_y0_train = self.X0_train[:, 0]
        # X[1], class 0
        self.X1_y0_train = self.X0_train[:, 1]
        # X[0], class= 1
        self.X0_y1_train = self.X1_train[:, 0]
        # X[1], class= 1
        self.X1_y1_train = self.X1_train[:, 1]
        
        # Calculating the likelihoods
        
        # for class 0
        self.X0_0_y0_train = self.X0_y0_train[self.X0_y0_train == 0]
        self.X0_1_y0_train = self.X0_y0_train[self.X0_y0_train == 1]
        self.X1_0_y0_train = self.X1_y0_train[self.X1_y0_train == 0]
        self.X1_1_y0_train = self.X1_y0_train[self.X1_y0_train == 1]
        
        #  for class 1
        self.X0_0_y1_train = self.X0_y1_train[self.X0_y1_train == 0]
        self.X0_1_y1_train = self.X0_y1_train[self.X0_y1_train == 1]
        self.X1_0_y1_train = self.X1_y1_train[self.X1_y1_train == 0]
        self.X1_1_y1_train = self.X1_y1_train[self.X1_y1_train == 1]
              
        # calculating priors on y
        self.prior0 = len(self.X0_train)/len(self.X_train)    #for class 0 
        self.prior1 = len(self.X1_train)/len(self.X_train)    #for class 1 
        
        # m is the number of features in dataset
        m0 = len(np.unique(self.X_test[:, 0]))      # m value for class 0 
        m1 = len(np.unique(self.X_test[:, 1]))      # m value for class 1 
        print("m0:", m0)
        print("m1:", m1)

               
        # calculating likelihood functions for x0 and x1 - class 0
        self.likelihood_X0_0_y0 = (len(self.X0_0_y0_train)+self.gamma)/(len(self.X0_train)+(m0 * self.gamma)) 
        self.likelihood_X0_1_y0 = (len(self.X0_1_y0_train)+self.gamma)/(len(self.X0_train)+(m0* self.gamma))
        self.likelihood_X1_0_y0 = (len(self.X1_0_y0_train)+self.gamma)/(len(self.X0_train)+(m1 * self.gamma))
        self.likelihood_X1_1_y0 = (len(self.X1_1_y0_train)+self.gamma)/(len(self.X0_train)+(m1 * self.gamma))
        self.likelihood_X0_new_y0 = (0 + self.gamma)/(len(self.X0_train)+(m0* self.gamma))
        self.likelihood_X1_new_y0 = (0 + self.gamma)/(len(self.X0_train)+(m1* self.gamma))
        
         # calculating likelihood functions for x0 and x1 - class 1       
        self.likelihood_X0_0_y1 = (len(self.X0_0_y1_train)+self.gamma)/(len(self.X1_train)+(m0 * self.gamma))
        self.likelihood_X0_1_y1 = (len(self.X0_1_y1_train)+self.gamma)/(len(self.X1_train)+(m0 * self.gamma))
        self.likelihood_X1_0_y1 = (len(self.X1_0_y1_train)+self.gamma)/(len(self.X1_train)+(m1 * self.gamma))
        self.likelihood_X1_1_y1 = (len(self.X1_1_y1_train)+self.gamma)/(len(self.X1_train)+(m1 * self.gamma))
        self.likelihood_X0_new_y1 = (0 + self.gamma)/(len(self.X1_train)+(m0* self.gamma))
        self.likelihood_X1_new_y1 = (0 + self.gamma)/(len(self.X1_train)+(m1* self.gamma))
        
    def predict(self):
        
        y_hat = []
        
        for sample, target in zip(self.X_test, self.y_test):
            
            # if sample belongs to class 0

            if sample[0] == 1:
                self.dist_X0y0 = self.likelihood_X0_1_y0 #X0=1|y=0
            elif sample[0] == 0:
                self.dist_X0y0 = self.likelihood_X0_0_y0 #X0=0|y=0
            else:
                self.dist_X0y0 = self.likelihood_X0_new_y0   #X0=new/y=0
                
            if sample[1] == 1:
                self.dist_X1y0 = self.likelihood_X1_1_y0 #X1=1|y=0
            elif sample[1] == 0:
                self.dist_X1y0 = self.likelihood_X1_0_y0 #X1=0|y=0
            else: 
                self.dist_X1y0 = self.likelihood_X1_new_y0  #X1=new/y=0
                
            # if sample belongs to class 1
            
            if sample[0] == 1:
                self.dist_X0y1 = self.likelihood_X0_1_y1 #X0=1|y=1
            elif sample[0] == 0:
                self.dist_X0y1 = self.likelihood_X0_0_y1 #X0=0|y=1
            else: 
                self.dist_X0y1 = self.likelihood_X0_new_y1 #X0=new/y=1
                 
            if sample[1] == 1:
                self.dist_X1y1 = self.likelihood_X1_1_y1 #X1=1|y=1
            elif sample[1] == 0:
                self.dist_X1y1 = self.likelihood_X1_0_y1 #X1=0|y=1  
            else:
                self.dist_X1y1 = self.likelihood_X1_new_y1   #X1=new/y=1
                           
            py0 = self.probability(sample, self.prior0, self.dist_X0y0, self.dist_X1y0)  # Probability belong to class 0
            py1 = self.probability(sample, self.prior1, self.dist_X0y1, self.dist_X1y1)  # probability belong to class 1 
            
            
            print('P(y=0| %s) = %.3f' % (sample, py0*100))
            print('P(y=1| %s) = %.3f' % (sample, py1*100))
            print('Model predicted class {} and the truth was: {} \n'.format(np.argmax([py0*100, py1*100]), target)) 

# Without laplace smoothening 

In [6]:
nb = NaiveBayes(X, y, gamma = 0)

In [7]:
nb.fit()

X_test:
 [[0 1]
 [1 1]
 [1 1]
 [0 0]
 [0 1]
 [1 1]
 [0 0]
 [1 0]
 [1 1]
 [0 0]
 [1 1]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [0 1]
 [0 1]
 [0 1]
 [0 1]
 [1 1]
 [1 1]
 [1 0]
 [0 0]
 [1 1]
 [1 1]
 [0 0]
 [0 1]
 [0 0]
 [1 1]
 [0 0]
 [1 1]
 [1 0]
 [0 1]
 [0 0]
 [1 1]
 [1 0]
 [0 1]
 [1 1]
 [1 0]
 [0 1]
 [0 1]
 [1 0]
 [1 0]
 [0 1]
 [0 0]
 [0 0]
 [0 0]
 [1 1]
 [1 0]
 [1 1]
 [1 0]
 [1 1]
 [1 1]
 [1 1]
 [0 1]
 [0 0]
 [0 0]
 [1 0]
 [1 0]
 [1 1]
 [0 1]
 [1 1]
 [0 0]
 [1 0]
 [1 1]
 [0 0]
 [0 0]
 [0 0]
 [1 0]
 [0 1]
 [1 1]
 [0 0]
 [0 0]
 [1 0]
 [0 1]
 [1 0]
 [1 1]
 [1 1]
 [0 0]
 [0 0]
 [1 1]
 [0 1]
 [0 1]
 [1 0]
 [1 0]
 [0 1]
 [1 1]
 [1 0]
 [0 1]
 [0 1]
 [0 0]
 [1 1]
 [1 1]
 [0 1]
 [0 0]
 [1 0]
 [0 0]
 [1 1]
 [1 0]
 [1 0]
 [0 0]
 [1 0]
 [0 0]
 [1 0]
 [1 0]
 [1 0]
 [0 0]
 [1 1]
 [0 0]
 [1 1]
 [1 1]
 [1 1]
 [1 1]
 [0 1]
 [1 0]
 [0 1]
 [0 0]
 [1 0]
 [0 0]
 [1 1]
 [0 0]
 [0 1]
 [0 0]
 [1 1]
 [0 1]
 [0 0]
 [0 0]
 [1 0]
 [0 1]
 [1 0]
 [0 1]
 [1 0]
 [0 0]
 [1 0]
 [0 0]
 [0 1]
 [0 0]
 [0 1]
 [1 1]
 [0 1]
 [0 0]
 [1 

In [8]:
nb.predict()

P(y=0| [0 1]) = 13.083
P(y=1| [0 1]) = 12.250
Model predicted class 0 and the truth was: 1 

P(y=0| [1 1]) = 12.083
P(y=1| [1 1]) = 12.583
Model predicted class 1 and the truth was: 1 

P(y=0| [1 1]) = 12.083
P(y=1| [1 1]) = 12.583
Model predicted class 1 and the truth was: 0 

P(y=0| [0 0]) = 13.083
P(y=1| [0 0]) = 12.250
Model predicted class 0 and the truth was: 0 

P(y=0| [0 1]) = 13.083
P(y=1| [0 1]) = 12.250
Model predicted class 0 and the truth was: 1 

P(y=0| [1 1]) = 12.083
P(y=1| [1 1]) = 12.583
Model predicted class 1 and the truth was: 0 

P(y=0| [0 0]) = 13.083
P(y=1| [0 0]) = 12.250
Model predicted class 0 and the truth was: 1 

P(y=0| [1 0]) = 12.083
P(y=1| [1 0]) = 12.583
Model predicted class 1 and the truth was: 1 

P(y=0| [1 1]) = 12.083
P(y=1| [1 1]) = 12.583
Model predicted class 1 and the truth was: 0 

P(y=0| [0 0]) = 13.083
P(y=1| [0 0]) = 12.250
Model predicted class 0 and the truth was: 1 

P(y=0| [1 1]) = 12.083
P(y=1| [1 1]) = 12.583
Model predicted class 1 

P(y=0| [0 1]) = 13.083
P(y=1| [0 1]) = 12.250
Model predicted class 0 and the truth was: 0 

P(y=0| [0 0]) = 13.083
P(y=1| [0 0]) = 12.250
Model predicted class 0 and the truth was: 1 

P(y=0| [1 1]) = 12.083
P(y=1| [1 1]) = 12.583
Model predicted class 1 and the truth was: 1 

P(y=0| [0 1]) = 13.083
P(y=1| [0 1]) = 12.250
Model predicted class 0 and the truth was: 0 

P(y=0| [0 0]) = 13.083
P(y=1| [0 0]) = 12.250
Model predicted class 0 and the truth was: 1 

P(y=0| [1 0]) = 12.083
P(y=1| [1 0]) = 12.583
Model predicted class 1 and the truth was: 0 

P(y=0| [0 1]) = 13.083
P(y=1| [0 1]) = 12.250
Model predicted class 0 and the truth was: 1 

P(y=0| [0 1]) = 13.083
P(y=1| [0 1]) = 12.250
Model predicted class 0 and the truth was: 1 

P(y=0| [1 0]) = 12.083
P(y=1| [1 0]) = 12.583
Model predicted class 1 and the truth was: 1 

P(y=0| [1 1]) = 12.083
P(y=1| [1 1]) = 12.583
Model predicted class 1 and the truth was: 0 

P(y=0| [0 1]) = 13.083
P(y=1| [0 1]) = 12.250
Model predicted class 0 

As we can see, without laplace smoothening new record [2,0], [0,2], [1, 2], [2, 1], [2, 2] have probability 0. 

# With Laplace Smoothening

In [9]:
nb = NaiveBayes(X, y, gamma=5)

In [10]:
nb.fit()

X_test:
 [[0 1]
 [1 1]
 [1 1]
 [0 0]
 [0 1]
 [1 1]
 [0 0]
 [1 0]
 [1 1]
 [0 0]
 [1 1]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [0 1]
 [0 1]
 [0 1]
 [0 1]
 [1 1]
 [1 1]
 [1 0]
 [0 0]
 [1 1]
 [1 1]
 [0 0]
 [0 1]
 [0 0]
 [1 1]
 [0 0]
 [1 1]
 [1 0]
 [0 1]
 [0 0]
 [1 1]
 [1 0]
 [0 1]
 [1 1]
 [1 0]
 [0 1]
 [0 1]
 [1 0]
 [1 0]
 [0 1]
 [0 0]
 [0 0]
 [0 0]
 [1 1]
 [1 0]
 [1 1]
 [1 0]
 [1 1]
 [1 1]
 [1 1]
 [0 1]
 [0 0]
 [0 0]
 [1 0]
 [1 0]
 [1 1]
 [0 1]
 [1 1]
 [0 0]
 [1 0]
 [1 1]
 [0 0]
 [0 0]
 [0 0]
 [1 0]
 [0 1]
 [1 1]
 [0 0]
 [0 0]
 [1 0]
 [0 1]
 [1 0]
 [1 1]
 [1 1]
 [0 0]
 [0 0]
 [1 1]
 [0 1]
 [0 1]
 [1 0]
 [1 0]
 [0 1]
 [1 1]
 [1 0]
 [0 1]
 [0 1]
 [0 0]
 [1 1]
 [1 1]
 [0 1]
 [0 0]
 [1 0]
 [0 0]
 [1 1]
 [1 0]
 [1 0]
 [0 0]
 [1 0]
 [0 0]
 [1 0]
 [1 0]
 [1 0]
 [0 0]
 [1 1]
 [0 0]
 [1 1]
 [1 1]
 [1 1]
 [1 1]
 [0 1]
 [1 0]
 [0 1]
 [0 0]
 [1 0]
 [0 0]
 [1 1]
 [0 0]
 [0 1]
 [0 0]
 [1 1]
 [0 1]
 [0 0]
 [0 0]
 [1 0]
 [0 1]
 [1 0]
 [0 1]
 [1 0]
 [0 0]
 [1 0]
 [0 0]
 [0 1]
 [0 0]
 [0 1]
 [1 1]
 [0 1]
 [0 0]
 [1 

In [11]:
nb.predict()

P(y=0| [0 1]) = 12.658
P(y=1| [0 1]) = 11.867
Model predicted class 0 and the truth was: 1 

P(y=0| [1 1]) = 11.721
P(y=1| [1 1]) = 12.179
Model predicted class 1 and the truth was: 1 

P(y=0| [1 1]) = 11.721
P(y=1| [1 1]) = 12.179
Model predicted class 1 and the truth was: 0 

P(y=0| [0 0]) = 12.658
P(y=1| [0 0]) = 11.867
Model predicted class 0 and the truth was: 0 

P(y=0| [0 1]) = 12.658
P(y=1| [0 1]) = 11.867
Model predicted class 0 and the truth was: 1 

P(y=0| [1 1]) = 11.721
P(y=1| [1 1]) = 12.179
Model predicted class 1 and the truth was: 0 

P(y=0| [0 0]) = 12.658
P(y=1| [0 0]) = 11.867
Model predicted class 0 and the truth was: 1 

P(y=0| [1 0]) = 11.721
P(y=1| [1 0]) = 12.179
Model predicted class 1 and the truth was: 1 

P(y=0| [1 1]) = 11.721
P(y=1| [1 1]) = 12.179
Model predicted class 1 and the truth was: 0 

P(y=0| [0 0]) = 12.658
P(y=1| [0 0]) = 11.867
Model predicted class 0 and the truth was: 1 

P(y=0| [1 1]) = 11.721
P(y=1| [1 1]) = 12.179
Model predicted class 1 

P(y=0| [0 0]) = 12.658
P(y=1| [0 0]) = 11.867
Model predicted class 0 and the truth was: 0 

P(y=0| [0 1]) = 12.658
P(y=1| [0 1]) = 11.867
Model predicted class 0 and the truth was: 0 

P(y=0| [1 0]) = 11.721
P(y=1| [1 0]) = 12.179
Model predicted class 1 and the truth was: 0 

P(y=0| [0 1]) = 12.658
P(y=1| [0 1]) = 11.867
Model predicted class 0 and the truth was: 1 

P(y=0| [1 0]) = 11.721
P(y=1| [1 0]) = 12.179
Model predicted class 1 and the truth was: 1 

P(y=0| [0 1]) = 12.658
P(y=1| [0 1]) = 11.867
Model predicted class 0 and the truth was: 1 

P(y=0| [1 1]) = 11.721
P(y=1| [1 1]) = 12.179
Model predicted class 1 and the truth was: 0 

P(y=0| [1 1]) = 11.721
P(y=1| [1 1]) = 12.179
Model predicted class 1 and the truth was: 1 

P(y=0| [1 1]) = 11.721
P(y=1| [1 1]) = 12.179
Model predicted class 1 and the truth was: 0 

P(y=0| [0 0]) = 12.658
P(y=1| [0 0]) = 11.867
Model predicted class 0 and the truth was: 1 

P(y=0| [0 0]) = 12.658
P(y=1| [0 0]) = 11.867
Model predicted class 0 

As we can see, without laplace smoothening new record [2,0], [0,2], [1, 2], [2, 1], [2, 2] have nonzero probability. 