## Practical 4 : Naive Bayes using `sklearn`

In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import BernoulliNB, MultinomialNB, GaussianNB
from sklearn.metrics import confusion_matrix, accuracy_score
import matplotlib.pyplot as plt

### Loeading data

In [2]:
X, y = load_iris(return_X_y=True)

X_train, y_train = X[0:150:2, ...], y[0:150:2]
X_test, y_test = X[1:150:2, ...], y[1:150:2]

### Gaussian NB Model

In [3]:
gnb_model = GaussianNB()

gnb_model.fit(X_train, y_train)

GaussianNB()

In [4]:
y_pred_gnb = gnb_model.predict(X_test)

In [5]:
confusion_matrix(y_test, y_pred_gnb)

array([[25,  0,  0],
       [ 0, 24,  1],
       [ 0,  2, 23]])

In [6]:
accuracy_score(y_test, y_pred_gnb)

0.96

### Bernoulli NB model

In [7]:
X = np.array([
       ['Tirth Tirth Patel'],
       ['Tirth Hihoriya Ramesh'],
       ['Hihoriya Ashesh Ramesh'],
       ['Tirth Ashesh Patel'],
       ['Ramesh Hihoriya'],
       ]).reshape(-1,)
y = np.array([1, 0, 1, 1, 0])

In [8]:
cv = CountVectorizer(binary=True)
X = cv.fit_transform(X).toarray()

In [9]:
cv.vocabulary_

{'tirth': 4, 'patel': 2, 'hihoriya': 1, 'ramesh': 3, 'ashesh': 0}

In [10]:
X

array([[0, 0, 1, 0, 1],
       [0, 1, 0, 1, 1],
       [1, 1, 0, 1, 0],
       [1, 0, 1, 0, 1],
       [0, 1, 0, 1, 0]])

In [11]:
bnb_model = BernoulliNB()

bnb_model.fit(X, y)

BernoulliNB()

In [12]:
y_pred_bnb = bnb_model.predict(X)

In [13]:
confusion_matrix(y, y_pred_bnb)

array([[2, 0],
       [1, 2]])

In [14]:
accuracy_score(y, y_pred_bnb)

0.8

### Multinomial NB Model

In [15]:
X = np.array([
       ['Tirth Tirth Patel'],
       ['Tirth Hihoriya Ramesh'],
       ['Hihoriya Ashesh Ramesh'],
       ['Tirth Ashesh Patel'],
       ['Ramesh Hihoriya'],
       ]).reshape(-1,)
y = np.array([1, 0, 1, 1, 0])

In [16]:
cv = CountVectorizer()
X = cv.fit_transform(X).toarray()

In [17]:
cv.vocabulary_

{'tirth': 4, 'patel': 2, 'hihoriya': 1, 'ramesh': 3, 'ashesh': 0}

In [18]:
X

array([[0, 0, 1, 0, 2],
       [0, 1, 0, 1, 1],
       [1, 1, 0, 1, 0],
       [1, 0, 1, 0, 1],
       [0, 1, 0, 1, 0]])

In [19]:
mnb_model = MultinomialNB()

mnb_model.fit(X, y)

MultinomialNB()

In [20]:
y_pred_mnb = mnb_model.predict(X)

In [21]:
confusion_matrix(y, y_pred_mnb)

array([[2, 0],
       [1, 2]])

In [22]:
accuracy_score(y, y_pred_mnb)

0.8