<a href="https://colab.research.google.com/github/sherna90/inteligencia_artificial/blob/master/8-naive_bayes_tensorflow.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Naive Bayes Tensorflow

In [None]:
import tensorflow as tf

In [None]:
print(tf.__version__)

Carga y prepara los datos [MNIST dataset](http://yann.lecun.com/exdb/mnist/). 

In [None]:
mnist = tf.keras.datasets.mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()
#x_train=x_train.reshape((-1,784))
#x_test=x_test.reshape((-1,784))
x_train.shape

In [None]:
x_train=x_train.reshape((-1,784))
x_test=x_test.reshape((-1,784))

Entrena un modelo generativo multinomial:

In [None]:
from sklearn.naive_bayes import GaussianNB,MultinomialNB,BernoulliNB

mnb = MultinomialNB(alpha=1.0)
y_pred_naive_multinomial = mnb.fit(x_train, y_train).predict(x_test)

In [None]:
mnb.classes_

Entrena un modelo generativo gaussiano:

In [None]:
from sklearn import preprocessing 

scaler=preprocessing.StandardScaler()
x_train_normalized = scaler.fit_transform(x_train)
x_test_normalized= scaler.transform(x_test)

gnb = GaussianNB()
y_pred_naive_gaussian = gnb.fit(x_train_normalized, y_train).predict(x_test_normalized)

In [None]:
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
from sklearn.metrics import ConfusionMatrixDisplay

fig = plt.figure(figsize=(15,15))
ax1 = plt.subplot(2, 2, 1)
ax2 = plt.subplot(2, 2, 2)


cm = confusion_matrix(y_test, y_pred_naive_multinomial,normalize='pred')
ConfusionMatrixDisplay(cm,display_labels=mnb.classes_).plot(ax=ax1)
ax1.set_title('Multinomial Naive Bayes')

cmg = confusion_matrix(y_test, y_pred_naive_gaussian,normalize='pred')
ConfusionMatrixDisplay(cmg,display_labels=gnb.classes_).plot(ax=ax2)
ax2.set_title('Gaussian Naive Bayes')

In [None]:
from sklearn.metrics import classification_report

print(classification_report(y_test, y_pred_naive_multinomial))


In [None]:
print(classification_report(y_test, y_pred_naive_gaussian))

In [None]:
import numpy as np

x_train_normalized = (x_train>128).astype(np.int)
x_test_normalized=  (x_test>128).astype(np.int)

bnb = BernoulliNB(alpha=1.0)
y_pred_naive_bernoulli = bnb.fit(x_train_normalized, y_train).predict(x_test_normalized)

In [None]:
print(classification_report(y_test, y_pred_naive_bernoulli))

# Naive Bayes  Tensorflow Probability

In [None]:
import tensorflow as tf
import tensorflow_probability as tfp
import tensorflow.keras as keras
tfd = tfp.distributions


# the data, split between train and test sets
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()

x_train=x_train.reshape((-1,784))
x_test=x_test.reshape((-1,784))
print("x_train shape:", x_train.shape)
print(x_train.shape[0], "train samples")
print(x_test.shape[0], "test samples")


In [None]:
x_train.shape

In [None]:
x_train_binary=(x_train>128).astype(np.int)
x_test_binary=(x_test>128).astype(np.int)


In [None]:
def get_prior(y):
    probs=[np.sum(y==c_k)/len(y) for c_k in np.unique(y)]
    print('The class priors are {}'.format(np.sum(probs)))
    priors=tfd.Categorical(probs=probs)
    return priors

In [None]:
prior=get_prior(y_train)

In [None]:
prior.probs

In [None]:
labels=np.unique(y_train)
plt.bar(range(10), prior.probs.numpy())
plt.xlabel("Class")
plt.ylabel("Prior probability")
plt.title("Class prior distribution")
plt.xticks(range(10), labels)
plt.show()

Los datos originales de las imagenes contienen regiones donde siempre los valores son cero. Una forma de suavizar la estimacion de probabilidades condicionales es usar suavizado de Laplace (https://en.wikipedia.org/wiki/Additive_smoothing)

In [None]:
def laplace_smoothing(binary_data,labels,alpha=1):
    n_pixels=binary_data.shape[1]
    n_classes=len(np.unique(labels))
    theta = np.zeros([n_classes, n_pixels])
    for c_k in range(n_classes):
        class_mask = (labels == c_k)
        N = class_mask.sum() # number of pixels in class
        theta[c_k, :] = (binary_data[class_mask, :].sum(axis=0) + alpha)/(N + alpha*2)
    return theta

In [None]:
theta=laplace_smoothing(x_train_binary,y_train,1)

In [None]:
plt.imshow(theta[0].reshape((28,28)))

In [None]:
plt.imshow(theta[1].reshape((28,28)))

In [None]:
plt.imshow(theta[7].reshape((28,28)))

In [None]:
def get_class_conditionals(probs):
    class_conditionals=tfd.Bernoulli(probs=probs)
    return class_conditionals

In [None]:
class_conditionals=get_class_conditionals(theta)

In [None]:
class_conditionals

In [None]:
digits_sample=class_conditionals.sample(1).numpy()

In [None]:
rows=4
cols=3
titles = ['digit 0','digit 1', 'digit 2',
         'digit 3','digit 4', 'digit 5',
         'digit 6','digit 7', 'digit 8',
         'digit 9']
axes=[]
fig=plt.figure(figsize=(8,8))
for i in range(len(titles)):
    mv_samples = digits_sample[:,i,:] #take the ith batch [samples x event_shape]
    axes.append( fig.add_subplot(rows, cols, i+1) )
    subplot_title=(titles[i])
    axes[-1].set_title(subplot_title)  
    plt.imshow(mv_samples.reshape([28,28]), cmap='Greys')
    plt.axis('off')
plt.show()

In [None]:
def predict_sample(prior, class_conditionals, sample):
    cond_probs = class_conditionals.log_prob(sample)
    prior_probs=tf.cast(prior.logits_parameter(),cond_probs.dtype)
    joint_likelihood = tf.add(prior_probs, tf.reduce_sum(cond_probs,axis=1))
    norm_factor = tf.math.reduce_logsumexp(joint_likelihood, axis=-1, keepdims=True)
    log_prob = joint_likelihood - norm_factor
    return tf.math.exp(log_prob).numpy()

In [None]:
prob=predict_sample(prior,class_conditionals,x_test_binary[0])

In [None]:
prob.argmax()

In [None]:
y_test[0]

In [None]:
def predict_class(prior, class_conditionals, x):
    pred=[]
    for sample in x:
        prob=predict_sample(prior, class_conditionals,sample)
        pred.append(prob.argmax())
    return pred

In [None]:
y_pred_tfp=predict_class(prior, class_conditionals, x_test_binary)

In [None]:
print(classification_report(y_test, y_pred_tfp))

In [None]:
plt.figure(figsize=(15,15))
cm_tfp = confusion_matrix(y_test, y_pred_tfp,normalize='pred')
ConfusionMatrixDisplay(cm_tfp,display_labels=np.unique(y_train)).plot()

# https://jaketae.github.io/study/bayes-multi-bandit/

In [None]:
def get_betabernoulli_class_conditionals(binary_data,labels,alpha=1,beta=1):
    n_pixels=binary_data.shape[1]
    n_classes=len(np.unique(labels))
    alpha_posterior = np.zeros([n_classes, n_pixels])
    beta_posterior = np.zeros([n_classes, n_pixels])
    for c_k in range(n_classes):
        class_mask = (labels == c_k)
        N = class_mask.sum() # number of pixels in class
        y=binary_data[class_mask, :].sum(axis=0)
        alpha_posterior[c_k, :] = (alpha+y)
        beta_posterior[c_k, :] = (beta+N-y)
    probs=tfd.Beta(alpha_posterior,beta_posterior)
    class_conditionals=tfd.Bernoulli(probs=probs.mode())
    return class_conditionals

In [None]:
class_conditionals_beta=get_betabernoulli_class_conditionals(x_train_binary,y_train,alpha=1,beta=1)

In [None]:
digits_sample=class_conditionals_beta.sample(1).numpy()

In [None]:
rows=4
cols=3
titles = ['digit 0','digit 1', 'digit 2',
         'digit 3','digit 4', 'digit 5',
         'digit 6','digit 7', 'digit 8',
         'digit 9']
axes=[]
fig=plt.figure(figsize=(8,8))
for i in range(len(titles)):
    mv_samples = digits_sample[:,i,:] #take the ith batch [samples x event_shape]
    axes.append( fig.add_subplot(rows, cols, i+1) )
    subplot_title=(titles[i])
    axes[-1].set_title(subplot_title)  
    plt.imshow(mv_samples.reshape([28,28]), cmap='Greys')
    plt.axis('off')
plt.show()

# https://www.tensorflow.org/probability/examples/Probabilistic_Layers_VAE