In [1]:
import numpy as np

from sklearn.naive_bayes import BernoulliNB
from sklearn import datasets
from sklearn.naive_bayes import GaussianNB
from sklearn.calibration import CalibratedClassifierCV
from sklearn.naive_bayes import BernoulliNB
from sklearn.linear_model import LogisticRegression

from sklearn.preprocessing import StandardScaler

In [2]:
# Load data
iris = datasets.load_iris()
X = iris.data
y = iris.target

Attribute Information:
1. sepal length in cm
2. sepal width in cm
3. petal length in cm
4. petal width in cm
5. class: 
   - Iris Setosa: 0
   - Iris Versicolour: 1
   - Iris Virginica: 2

**Train Gaussian Naive Bayes Classifier**

In [3]:
# Create Gaussian Naive Bayes object with prior probabilities of each class
gnb = GaussianNB()
# Train model
model_gnb = gnb.fit(X, y)
# Mean accuracy on the given test data and labels.
model_gnb.score(X, y)

0.96

In [4]:
# Create new observation
observation_v1 = [[4.9, 3.1, 1.5, 0.2]]  #"Iris-setosa"
observation_v2 = [[6, 7, 8, 9]]  #"Iris-setosa"

In [5]:
# Predict class
model_gnb.predict(observation_v1), model_gnb.predict_proba(observation_v1), model_gnb.predict(observation_v2), model_gnb.predict_proba(observation_v2)

(array([0]),
 array([[1.00000000e+00, 3.04840351e-17, 6.32348452e-25]]),
 array([2]),
 array([[0.00000000e+000, 1.92066107e-206, 1.00000000e+000]]))

**Calibrate Predicted Probabilities**

In [7]:
# Create calibrated cross-validation with sigmoid calibration
sigmoid = CalibratedClassifierCV(model_gnb, cv=2, method='sigmoid')

# Calibrate probabilities
model_sigmoid = sigmoid.fit(X, y)

In [8]:
# View calibrated probabilities
model_sigmoid.predict(observation_v1), model_sigmoid.predict_proba(observation_v1), model_sigmoid.predict(observation_v2), model_sigmoid.predict_proba(observation_v2) 

(array([0]),
 array([[0.91953506, 0.04131332, 0.03915162]]),
 array([2]),
 array([[0.0195019 , 0.04473133, 0.93576677]]))

**Multinomial Logistic Regression**

<br>$P(y_i=k \mid X)={\frac {e^{\beta_{k}x_{i}}}{{\sum_{j=1}^{K}}e^{\beta_{j}x_{i}}}}$

where $P(y_i=k \mid X)$ is the probability the i-th observation’s target value, yi is class k, and K is the total number of classes. One practical advantage of the MLR is that its predicted probabilities using the predict_proba method are more reliable (i.e. better calibrated).

In [9]:
# Standarize features
scaler = StandardScaler()
X_std = scaler.fit_transform(X)

In [10]:
# Create one-vs-rest logistic regression object
mlr = LogisticRegression(random_state=0, multi_class='multinomial', solver='newton-cg')

# Train model
model_mlr = mlr.fit(X_std, y)

In [11]:
observation_v1 = [[4.9, 3.1, 1.5, 0.2]]  #"Iris-setosa"
observation_v2 = [[6, 7, 8, 9]]

model_mlr.predict(observation_v1), model_mlr.predict_proba(observation_v1), model_mlr.predict(observation_v2), model_mlr.predict_proba(observation_v2) 

(array([1]),
 array([[1.96125634e-04, 7.53070356e-01, 2.46733518e-01]]),
 array([2]),
 array([[3.47553906e-30, 3.39809192e-20, 1.00000000e+00]]))

**Bernoulli Naive Bayes Classifier**
<br>The Bernoulli naive Bayes classifier assumes that all our features are binary such that they take only two values (e.g. a nominal categorical feature that has been one-hot encoded).

In [12]:
# Create three binary features
X = np.random.randint(2, size=(100, 3))

# Create a binary target vector
y = np.random.randint(2, size=(100, 1)).ravel()

In [13]:
X[0:10], y[0:10]

(array([[0, 0, 1],
        [1, 1, 0],
        [1, 0, 0],
        [1, 1, 0],
        [0, 1, 1],
        [0, 1, 0],
        [1, 1, 1],
        [1, 0, 0],
        [1, 0, 0],
        [0, 0, 1]]), array([0, 1, 1, 1, 0, 1, 0, 0, 1, 1]))

In [14]:
# Create Bernoulli Naive Bayes object with prior probabilities of each class
bnb = BernoulliNB(class_prior=[0.25, 0.5])
# Train model
model_bnb = bnb.fit(X, y)

In [15]:
observation_v1 = [[0, 0, 1]]
observation_v2 = [[-4, 7, 8]]

In [16]:
model_bnb.predict(observation_v1), model_bnb.predict_proba(observation_v1), model_bnb.predict(observation_v2), model_bnb.predict_proba(observation_v2) 

(array([1]),
 array([[0.28611845, 0.71388155]]),
 array([1]),
 array([[0.27099945, 0.72900055]]))