In [None]:
import numpy as np
from sklearn import model_selection
from sklearn import datasets
from sklearn.naive_bayes import GaussianNB

In [None]:
class NaiveBayes:
    def __init__(self):
        self.__cont_val_features=[]
        self.prob= {}
    
    def fit(self, X_train, Y_train):
        self.prob={}
        self.prob["total_count"]=  len(Y_train)
        pos_class= set(Y_train)
        for cur_class in pos_class:
            self.prob[cur_class]={}
            X_train_current= X_train[Y_train == cur_class]
            Y_train_current= Y_train[Y_train == cur_class]
            self.prob[cur_class]["count"]= len(Y_train_current)
            num_features= X_train.shape[1]
            
            for j in range(1, num_features+1):
                self.prob[cur_class][j]={}
                X_j_unique= set(X[:, j-1])
                for unique_val in X_j_unique:
                    self.prob[cur_class][j][unique_val]= (X_train_current[:,j-1]== unique_val).sum()
    
    def __predict_one(self, X):
        best_p= None
        best_class= None
        first_run= True
        for cur_class in self.prob:
            if cur_class== "total_count":
                continue
            # Because prob can be very low and multiply with small numbers leads to more smaller numbers
            # Thats why instead of multiplying, we'll be adding small numbers using log
            cur_prob= np.log(self.prob[cur_class]["count"])- np.log(self.prob["total_count"])
            num_features= len(X)
            
            for j in range(1, num_features+1):
                cur_prob+= np.log(self.prob[cur_class][j][X[j-1]]+1)-np.log(self.prob[cur_class]["count"] +
                                                                            len(self.prob[cur_class][j]))
            
            if first_run or cur_prob> best_p:
                best_p= cur_prob
                best_class= cur_class
            first_run = False
            
        return best_class
        
    def predict(self, X_test):
        y_pred= []
        for x in X_test:
            y_pred_one= self.__predict_one(x)
            y_pred.append(y_pred_one)
        
        return np.array(y_pred)
    
    def score(self, Y_truth, Y_pred):
        from collections import Counter
        dict_Y_truth= dict(Counter(Y_truth))
        dict_Y_pred= dict(Counter(Y_pred))
        accurate=0
        for i in dict_Y_truth:
            if i in dict_Y_pred:
                accurate+=min(dict_Y_truth[i],dict_Y_pred[i])
        coeff= accurate/len(Y_truth)
        return coeff

In [None]:
def makeLabelled(column):
    second_limit = column.mean()
    first_limit = 0.5 * second_limit
    third_limit = 1.5*second_limit
    for i in range (0,len(column)):
        if (column[i] < first_limit):
            column[i] = 0
        elif (column[i] < second_limit):
            column[i] = 1
        elif(column[i] < third_limit):
            column[i] = 2
        else:
            column[i] = 3
    return column


In [None]:
iris = datasets.load_iris()
X = iris.data
Y = iris.target

In [None]:
for i in range(0,X.shape[-1]):
    X[:,i] = makeLabelled(X[:,i])

In [None]:
X_train, X_test, Y_train, Y_test = model_selection.train_test_split(X, Y, test_size=0.25, random_state=0)

In [None]:
clf1= NaiveBayes()
clf1.fit(X_train, Y_train)

In [None]:
clf1.prob

In [None]:
Y_test_pred= clf1.predict(X_test)
print("Test Score: ", clf1.score(Y_test, Y_test_pred))

Y_train_pred= clf1.predict(X_train)
print("Train Score: ", clf1.score(Y_train, Y_train_pred))

In [None]:
from sklearn.metrics import classification_report, confusion_matrix
print(classification_report(Y_test,Y_test_pred))
print(confusion_matrix(Y_test,Y_test_pred))

# Sklearn Naive Bayes

In [None]:
clf2 = GaussianNB()
clf2.fit(X_train, Y_train)

In [None]:
Y_test_pred_skl = clf2.predict(X_test)
print("Test Score: ", clf2.score(X_test, Y_test))

Y_train_pred_skl= clf2.predict(X_train)
print("Train Score: ", clf2.score(X_train, Y_train))

In [None]:
print(classification_report(Y_test,Y_test_pred_skl))
print(confusion_matrix(Y_test,Y_test_pred_skl))