In [1]:
from sklearn.datasets import load_iris
iris=load_iris()

In [2]:
X=iris.data
y=iris.target

In [3]:
print(X.shape)
print(y.shape)

(150, 4)
(150,)


In [4]:
print(y)

[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 2]


In [5]:
import pandas as pd
df=pd.DataFrame(X,columns=iris.feature_names)

In [6]:
df.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


In [7]:
print(df.shape)

(150, 4)


In [8]:
print(type(X))

<class 'numpy.ndarray'>


In [9]:
from sklearn.model_selection import train_test_split

In [10]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3)

In [11]:
print(X_train.shape,y_train.shape)
print(X_test.shape,y_test.shape)

(105, 4) (105,)
(45, 4) (45,)


In [12]:
import numpy as np

In [13]:
print(np.unique(y_train))

[0 1 2]


### Multinomial Naive Bayes From Scratch

<img src="i1.jpg">

In [14]:
class Multinomailnb:
    #constructor
    def __init__(self,alpha=1):
        self.alpha=alpha;
    
    def fit(self, X_train, y_train):
        m, n = X_train.shape
        self._classes = np.unique(y_train)
        n_classes = len(self._classes)

        # init: Prior & Likelihood
        self._priors = np.zeros(n_classes)
        self._likelihoods = np.zeros((n_classes, n))

        # Get Prior and Likelihood
        for idx, c in enumerate(self._classes):
            X_train_c = X_train[c == y_train]
            self._priors[idx] = X_train_c.shape[0] / m 
            self._likelihoods[idx, :] = ((X_train_c.sum(axis=0)) + self.alpha) / (np.sum(X_train_c.sum(axis=0) + self.alpha))
    
    def calc_likelihood(self, cls_likeli, x_test):
        return np.log(cls_likeli) * x_test
    
    def _predict(self, x_test):
        # Calculate posterior for each class
        posteriors = []
        for idx, c in enumerate(self._classes):
            prior_c = np.log(self._priors[idx])
            likelihoods_c = self.calc_likelihood(self._likelihoods[idx,:], x_test)
            posteriors_c = np.sum(likelihoods_c) + prior_c
            posteriors.append(posteriors_c)
            
        return self._classes[np.argmax(posteriors)]
    
    def predict(self, X_test):
        return [self._predict(x_test) for x_test in X_test]

    def score(self, X_test, y_test):
        y_pred = self.predict(X_test)
        return np.sum(y_pred == y_test)/len(y_test)


In [15]:
mnb=Multinomailnb()

In [16]:
mnb.fit(X_train,y_train)

In [17]:
mnb.score(X_train,y_train)

0.9619047619047619

In [18]:
mnb.predict(X_test)

[0,
 0,
 1,
 2,
 0,
 1,
 0,
 2,
 0,
 1,
 0,
 1,
 1,
 2,
 0,
 2,
 2,
 0,
 1,
 1,
 0,
 2,
 0,
 2,
 2,
 2,
 1,
 1,
 1,
 2,
 0,
 1,
 0,
 0,
 1,
 0,
 0,
 2,
 0,
 0,
 2,
 2,
 2,
 2,
 0]

In [19]:
mnb.score(X_test,y_test)

0.9555555555555556

### Multinomial Naive bayes using sklearn

In [20]:
from sklearn.naive_bayes import MultinomialNB

In [21]:
mnb=MultinomialNB()

In [22]:
mnb.fit(X_train,y_train)

MultinomialNB()

In [23]:
mnb.predict(X_test)

array([0, 0, 1, 2, 0, 1, 0, 2, 0, 1, 0, 1, 1, 2, 0, 2, 2, 0, 1, 1, 0, 2,
       0, 2, 2, 2, 1, 1, 1, 2, 0, 1, 0, 0, 1, 0, 0, 2, 0, 0, 2, 2, 2, 2,
       0])

In [24]:
mnb.score(X_test,y_test)

0.9555555555555556