## Naive Bayes

This is a technique that has remained popular over the years, despite its name. It is especially appropriate when the dimension of the feature space is high, making density estimation unattractive. 

In [1]:
# Naive Bayes Classifier

from sklearn import datasets
from sklearn.naive_bayes import GaussianNB

iris = datasets.load_iris()
X = iris.data
y = iris.target

classifier = GaussianNB()

model = classifier.fit(X, y)

observation = [[4, 4, 4, 0.4]]

print("The class of observation is", model.predict(observation) )

The class of observation is [1]


## Text Classification

In [3]:
# Classifier for discrete and count features
# Text Classification using bags of words or tf-idf
import numpy as np
from sklearn.naive_bayes import MultinomialNB
from sklearn.feature_extraction.text import CountVectorizer

text = np.array([ 'I Love Brazil, Brazil', 'Brazil is best', 'Germany beats both'])

count = CountVectorizer()
bag_of_words = count.fit_transform(text)

X = bag_of_words.toarray()
y = np.array([0, 0, 1])

classifier = MultinomialNB(class_prior=[0.25, 0.5])

model = classifier.fit(X, y)

observation = [[0, 0, 0, 1, 0, 1, 0]]

model.predict(observation)

array([0])

In [15]:
# Naive Bayes for Binary Features
import numpy as np
from sklearn.naive_bayes import BernoulliNB

X = np.random.randint(2, size=(100, 3))
y = np.random.randint(2, size=(100, 1)).ravel()

classifier = BernoulliNB(class_prior=[0.25, 0.5])
model = classifier.fit(X, y)
y

array([0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1,
       1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1,
       1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1,
       0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1,
       1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0])