
# Naive Bayes classifier -  Visualisation

Naive Bayes classifier

Computing the posterior probability of *x* being from class **c** using **Bayes** rule.

\begin{align}P(y_c|x)= \frac{P(x|y_c)P(y_c)}{P(x)}\end{align}


This script demonstrates Naive Bayes classifier using three examples

* Example 1: Iris dataset
* Example 2: Breast Cancer
* Example 3: Digit Classification


In [None]:
# libraries

import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.model_selection import train_test_split

import spkit
print('spkit version :', spkit.__version__)

from spkit.ml import NaiveBayes

## Example 1 : Iris dataset



In [None]:
data = datasets.load_iris()
X = data.data
y = data.target

Xt,Xs,yt,ys = train_test_split(X,y,test_size=0.3)

print('Shapes: ',Xt.shape,yt.shape,Xs.shape,ys.shape)

# Fitting model (Estimating the parameters)

model = NaiveBayes()
model.fit(Xt,yt)


# ## Prediction and Accuracy
ytp = model.predict(Xt)
ysp = model.predict(Xs)

print('Training Accuracy : ',np.mean(ytp==yt))
print('Testing  Accuracy : ',np.mean(ysp==ys))


# Predicting probabilities

ytpr = model.predict_prob(Xt)
yspr = model.predict_prob(Xs)


print(ytpr[0])
print(model.predict(Xs[0]), model.predict_prob(Xs[0]))

# Parameters :: $\mu$, $\sigma$

print('model parameters')
print(model.parameters)


# Visualizing the distribution

# Setting the names of classes and features (Optional)


model.set_class_labels(data['target_names'])
model.set_feature_names(data['feature_names'])

fig = plt.figure(figsize=(12,10))
model.VizPx(show=False)
plt.suptitle('Example 1: Iris dataset')
plt.tight_layout()
plt.show()

## Example 2: Breast Cancer



In [None]:
data = datasets.load_breast_cancer()
X = data.data
y = data.target

Xt,Xs,yt,ys = train_test_split(X,y,test_size=0.3)

print(Xt.shape,yt.shape,Xs.shape,ys.shape)


# ## Fitting model (estimating the parameters)

model = NaiveBayes()
model.fit(Xt,yt)


# ## Accuracy


ytp = model.predict(Xt)
ysp = model.predict(Xs)

print('Training Accuracy : ',np.mean(ytp==yt))
print('Testing  Accuracy : ',np.mean(ysp==ys))


# Parameters :: $\mu$, $\sigma$

print('model parameters')
print(model.parameters[0])

model.set_class_labels(data['target_names'])


# Visualizing first 16 features

fig = plt.figure(figsize=(12,10))
model.VizPx(nfeatures=range(16),show=False)
plt.suptitle('Example 2: Breast Cancer')
plt.tight_layout()
plt.show()

# Visualizing next 14 features

fig = plt.figure(figsize=(12,10))
model.VizPx(nfeatures=range(16,30),show=False)
plt.suptitle('Example 2: Breast Cancer')
plt.tight_layout()
plt.show()

## Example 3:: Digit Classification



In [None]:
data = datasets.load_digits()
X = data.data
y = data.target

# Avoiding features with zero variance (contant value)
# X = X[:,X.var(0)>0]

Xt,Xs,yt,ys = train_test_split(X,y,test_size=0.3)

print(Xt.shape,yt.shape,Xs.shape,ys.shape)


# Fitting model (estimating the parameters)

model = NaiveBayes()
model.fit(Xt,yt)


# Accuracy

ytp = model.predict(Xt)
ysp = model.predict(Xs)

print('Training Accuracy : ',np.mean(ytp==yt))
print('Testing  Accuracy : ',np.mean(ysp==ys))


# Predicting probablities

print(model.predict(Xs[0]), model.predict_prob(Xs[0]))


plt.imshow(Xs[0].reshape([8,8]),cmap='gray')
plt.axis('off')
plt.show()
print('Prediction',model.predict(Xs[0]))


# Visualizing 
fig = plt.figure(figsize=(12,10))
model.VizPx(nfeatures=range(5,19),show=False)
plt.suptitle('Example 3: Digit Classification')
plt.tight_layout()
plt.show()