# Own implementation of Multinomial Naive Bayes

In [1]:
import numpy as np
import pandas as pd
import math
from sklearn.naive_bayes import MultinomialNB

# Loading the Dataframes

In [2]:
train_df = pd.read_csv("TrainDataFrame.csv")
test_df = pd.read_csv("TestDataFrame.csv")

In [3]:
v = train_df.shape[1]-2

# Implementation

In [4]:
class MultiNB:
    
    
    def __init__(self):
        self.dd = None
    
    
    def fit(self, train_df):
        
        self.dd = {}
        cls = set(train_df["Output"])
        
        for i in cls:
            
            self.dd[i] = dict(((train_df.loc[train_df["Output"] == i]).iloc[:,:-1]).sum())
            self.dd[i]["TOTAL"] = sum(self.dd[i].values())
            
        #print("Multinomial Naive Bayes fit")
        
        
    def probability(self, data, category_d, total):
        
        pt = 0
        for i in category_d:
            
            if i == "TOTAL":
                continue
            pt -= np.log(((category_d[i]+1)/total) ** data[i])
            
        return pt
    
                 
    def helper(self, data):
        
        prob = math.inf
        cls = ""
        
        for i in self.dd.keys():
            
            p = self.probability(data, self.dd[i], self.dd[i]["TOTAL"]+v)
            if p < prob:
                prob = p
                cls = i
                
        return cls
    
    
    def predict(self, x_t):
        
        y_p = []
        row = x_t.shape[0]
        j = 0
        
        for i in range(row):
            
            #print(j)
            j += 1
            data = dict(x_t.iloc[i])
            y_p.append(self.helper(data))
            
        return y_p
    
    
    def score(self, x_test, y_test):
        
        y_p = self.predict(x_test)
        
        return (y_p == y_test).sum() / len(y_p)

# Comparison

In [None]:
mnb1 = MultiNB()
mnb2 = MultinomialNB()

x = train_df.iloc[:,1:-1]
xa = x.copy()
y = train_df.iloc[:,-1]
xa["Output"] = y

mnb1.fit(xa)
mnb2.fit(x,y)

x_t = test_df.iloc[:,1:-1]
y_t = test_df.iloc[:,-1]

print("Score with own implementation of Multinomial Naive Bayes =", mnb1.score(x_t,y_t))
print("Score with sklearn Multinomial Naive Bayes =", mnb2.score(x_t,y_t))

