# Implementing Naïve Bayes from scratch

In [4]:
import numpy as np
from collections import defaultdict

In [5]:
X_train = np.array([
        [0, 1, 1],
        [0, 0, 1],
        [0, 0, 0],
        [1, 1, 0]])
Y_train = ['Y', 'N', 'Y', 'Y']

In [7]:
X_test = np.array([[1,1,0]])

In [4]:
def get_label_indices(lables):
    label_indices = defaultdict(list)
    
    for index,lable in enumerate(lables):
        label_indices[lable].append(index)
    return label_indices

In [5]:
lables_indices = get_label_indices(Y_train)
print('label indices:\n',lables_indices)

label indices:
 defaultdict(<class 'list'>, {'Y': [0, 2, 3], 'N': [1]})


In [6]:
def get_prior(label_indices):
    prior = {label: len(indices) for label,indices in label_indices.items()}
    total_count = sum(prior.values())
    for label in prior:
        prior[label] /=total_count
    return prior

In [7]:
prior = get_prior(lables_indices)
print('prior:\n',prior)

prior:
 {'Y': 0.75, 'N': 0.25}


## Workout

In [8]:
(X_train[[0,2,3],:].sum(axis=0)+1)/(3+2*1)

array([0.4, 0.6, 0.4])

In [9]:
def get_likelihood(features,label_indices,smoothing=0):
    likelihood={}
    for label,indices in label_indices.items():
        likelihood[label] = features[indices, :].sum(axis=0)+smoothing
        total_count = len(indices)
        likelihood[label] = likelihood[label]/(total_count + 2 * smoothing)
    return likelihood

In [10]:
smoothing = 1
likelihood = get_likelihood(X_train,lables_indices,smoothing)
print('Likelihood :\n',likelihood)

Likelihood :
 {'Y': array([0.4, 0.6, 0.4]), 'N': array([0.33333333, 0.33333333, 0.66666667])}


In [20]:
def get_posterior(X,prior,likelihood):
    posteriors = []
    for x in X:
        posterior = prior.copy()
        for label,likelihood_label in likelihood.items():
            for index,bool_value in enumerate(x):
                posterior[label]*=likelihood_label[index]if bool_value else (1 - likelihood_label[index])
        
        sum_posterior = sum(posterior.values())
        for label in posterior:
            if posterior[label] == float('inf'):
                posterior[label] = 1.0
            else:
                posterior[label]/=sum_posterior
        posteriors.append(posterior.copy())
    return posteriors

In [21]:
posterior = get_posterior(X_test,prior,likelihood)
print('Posterior: \n',posterior)

Posterior: 
 [{'Y': 0.9210360075805433, 'N': 0.07896399241945673}]


# Implementing Naïve Bayes with scikit-learn

In [1]:
from sklearn.naive_bayes import BernoulliNB

In [2]:
clf = BernoulliNB(alpha=1.0,fit_prior=True)

In [6]:
clf.fit(X_train,Y_train)

BernoulliNB()

In [9]:
pred_prob = clf.predict_proba(X_test)
print('Probability of test : \n',pred_prob)

Probability of test : 
 [[0.07896399 0.92103601]]


In [10]:
pred = clf.predict(X_test)
print('Prediction: \n',pred)

Prediction: 
 ['Y']
