In [1]:
import numpy as np

In [4]:
def fit(x_train,y_train):
    result={}
    class_values=set(y_train) #distinct values of class
    for current_class in class_values:
        #Now every class will have a dictionary i.e, different features
        result[current_class]={}
        #total data
        result["total_data"]=len(y_train)
        #We have to select that training data which have current class
        current_class_rows=(y_train==current_class)#We will get boolean values
        x_train_current=x_train[current_class_rows]
        y_train_current=y_train[current_class_rows]
        num_features=x_train.shape[1]#no of features
        #total count of training data which belongs to current class 
        result[current_class]["total_count"]=len(y_train_current)
        for j in range(1,num_features+1):
            #Now for each features,we will create new dictinary to store the different values of that feature
            result[current_class][j]={}
            all_possible_values=set(x_train[:,j-1]) #distinct values of a feature
            for current_value in all_possible_values:
                #Now every distinct values wiil have some count
                result[current_class][j][current_value]=(x_train_current[:,j-1]==current_value).sum()
    return result
    #it will give us a dictionary
                

In [5]:
def probability(dictionary,x,current_class):
    output=np.log(dictionary[current_class]["total_count"])-np.log(dictionary["total_data"]) 
    num_features=len(dictionary[current_class].keys())-1 #because "total count is an extra key"
    for j in range(1,num_features+1):
        xj=x[j-1]
        count_current_class_with_value_xj=dictionary[current_class][j][xj]
        count_current_class=dictionary[current_class]["total_count"]+len(dictionary[current_class][j].keys())
        current_xj_probability=np.log(count_current_class_with_value_xj)-np.log(count_current_class)
        output=output+current_xj_probability
    return output

In [6]:
def predictSinglePoint(dicionary,x):
    #For single point we have predict which class it belongs to
    classes=dictionary.keys()
    best_p=-1000 #best probability
    best_class=-1
    first_run=True  # in the first run we are absolutely going to update
    for current_class in classes:
        if(current_class=="total_data"):
            continue
        #what is the probability that this x belongs to this current class
        p_current_class=probability(dictionary,x,current_class)
        if (first_run or p_current_class>best_p):
            best_p=p_current_class
            best_class=current_class
        first_run=False 
    return best_class

In [7]:
def predict(dictionary,x_test):
    #we are going to predict the class for the testing data
    y_pred=[]
    for x in x_test:
        x_class=predictSinglePoint(dictionary,x) #predict the class
        y_pred.append(x_class)
    return y_pred

In [11]:
 def makeLabelled(column):
        second_limit=column.mean()
        first_limit=0.5*second_limit
        third_limit=1.5*second_limit
        for i in range(0,len(column)):
            if(column[i]<first_limit):
                column[i]=0
            elif(column[i]<second_limit):
                column[i]=1
            elif(column[i]<third_limit):
                column[i]=2
            else:
                column[i]=3
        return column

In [12]:
from sklearn import datasets
iris=datasets.load_iris()
x=iris.data
y=iris.target

In [13]:
for i in range(0,x.shape[-1]):
    x[:,i]=makeLabelled(x[:,i])

In [14]:
from sklearn import model_selection
x_train,x_test,y_train,y_test=model_selection.train_test_split(x,y,test_size=0.25,random_state=0)

In [15]:
dictionary=fit(x_train,y_train)

In [16]:
y_pred=predict(dictionary,x_test)

  current_xj_probability=np.log(count_current_class_with_value_xj)-np.log(count_current_class)


In [17]:
from sklearn.metrics import classification_report, confusion_matrix
print(classification_report(y_test,y_pred))
print(confusion_matrix(y_test,y_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        13
           1       0.94      1.00      0.97        16
           2       1.00      0.89      0.94         9

    accuracy                           0.97        38
   macro avg       0.98      0.96      0.97        38
weighted avg       0.98      0.97      0.97        38

[[13  0  0]
 [ 0 16  0]
 [ 0  1  8]]


In [18]:
from sklearn.naive_bayes import GaussianNB
clf=GaussianNB()
clf.fit(x_train,y_train)
y_pred=clf.predict(x_test)
print(classification_report(y_test,y_pred))
print(confusion_matrix(y_test,y_pred))

              precision    recall  f1-score   support

           0       1.00      0.85      0.92        13
           1       0.76      1.00      0.86        16
           2       1.00      0.67      0.80         9

    accuracy                           0.87        38
   macro avg       0.92      0.84      0.86        38
weighted avg       0.90      0.87      0.87        38

[[11  2  0]
 [ 0 16  0]
 [ 0  3  6]]


In [None]:
from sklearn.naive_bayes import MultinomialNB
clf=GaussNB()
clf.fit(x_train,y_train)
y_pred=clf.predict(x_test)
print(classification_report(y_test,y_pred))
print(confusion_matrix(y_test,y_pred))