In [3]:
#### Training a Classifier for Continuous Features

##You have only continuous features and you want to train a naive Bayes classifier

# Load libraries
from sklearn import datasets
from sklearn.naive_bayes import GaussianNB

# Load data
iris = datasets.load_iris()
features = iris.data
target = iris.target

# Create Gaussian Naive Bayes object
classifer = GaussianNB()
# Train model
model = classifer.fit(features, target)
model.class_prior_
model.class_count_

array([50., 50., 50.])

In [4]:
# Create new observation
new_observation = [[ 4, 4, 4, 0.4]]
# Predict class
model.predict(new_observation)

array([1])

In [5]:
### customize the priors

# Create Gaussian Naive Bayes object with prior probabilities of each class
clf = GaussianNB(priors=[0.25, 0.25, 0.5])
# Train model
model = classifer.fit(features, target)

In [6]:
# Create new observation
new_observation = [[ 4, 4, 4, 0.4]]
# Predict class
model.predict(new_observation)

array([1])

In [7]:
model.predict_proba(new_observation)

array([[1.34715602e-38, 9.99949727e-01, 5.02727760e-05]])

In [9]:
### Training a Classifier for Discrete and Count Features

# Load libraries
import numpy as np
from sklearn.naive_bayes import MultinomialNB
from sklearn.feature_extraction.text import CountVectorizer

# Create text
text_data = np.array(['I love Brazil. Brazil!',
'Brazil is best',
'Germany beats both'])

In [11]:
# Create bag of words
count = CountVectorizer()
bag_of_words = count.fit_transform(text_data)
bag_of_words

<3x7 sparse matrix of type '<class 'numpy.int64'>'
	with 8 stored elements in Compressed Sparse Row format>

In [12]:
# Create feature matrix
features = bag_of_words.toarray()
features

array([[0, 0, 0, 2, 0, 0, 1],
       [0, 1, 0, 1, 0, 1, 0],
       [1, 0, 1, 0, 1, 0, 0]], dtype=int64)

In [15]:
# Create target vector
target = np.array([0,0,1])

# Create multinomial naive Bayes object with prior probabilities of each class
classifer = MultinomialNB(class_prior=[0.25, 0.5])

# Train model
model = classifer.fit(features, target)
model.class_prior 

[0.25, 0.5]

In [17]:
# Create new observation
new_observation = [[0, 0, 0, 1, 0, 1, 0]]

# Predict new observation's class
model.predict(new_observation)

array([0])

In [18]:
###Training a Naive Bayes Classifier for Binary Features

# Load libraries
import numpy as np
from sklearn.naive_bayes import BernoulliNB

In [20]:
# Create three binary features
features = np.random.randint(2, size=(100, 3))

In [22]:
# Create a binary target vector
target = np.random.randint(2, size=(100, 1)).ravel()
target

array([0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1,
       1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1,
       0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1,
       1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0,
       0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0])

In [25]:
# Create Bernoulli Naive Bayes object with prior probabilities of each class
classifer = BernoulliNB(class_prior=[0.25, 0.5])

# Train model
model = classifer.fit(features, target)

In [26]:
## Calibrating Predicted Probabilities

# Load libraries
from sklearn import datasets
from sklearn.naive_bayes import GaussianNB
from sklearn.calibration import CalibratedClassifierCV

# Load data
iris = datasets.load_iris()
features = iris.data
target = iris.target

# Create Gaussian Naive Bayes object
classifer = GaussianNB()

# Create calibrated cross-validation with sigmoid calibration
classifer_sigmoid = CalibratedClassifierCV(classifer, cv=2, method='sigmoid')

# Calibrate probabilities
classifer_sigmoid.fit(features, target)

CalibratedClassifierCV(base_estimator=GaussianNB(priors=None,
                                                 var_smoothing=1e-09),
                       cv=2, method='sigmoid')

In [27]:

# Create new observation
new_observation = [[ 2.6, 2.6, 2.6, 0.4]]
# View calibrated probabilities
classifer_sigmoid.predict_proba(new_observation)

array([[0.31859969, 0.63663466, 0.04476565]])

In [28]:
# Train a Gaussian naive Bayes then predict class probabilities
classifer.fit(features, target).predict_proba(new_observation)

array([[2.31548432e-04, 9.99768128e-01, 3.23532277e-07]])