# Naive Bayes 
- Probablistic Classifier 
- Applies Bayes Theorem
- Naive Assumption : Features are mututally Independent
- Selects Class with highest Probablity
- P(y) : Prior Probablity --> Frequency of each class
- P(xi | y) : Class Condtional Probablity --> Model with gaussian
### Theorem
    - **Main Formula:** P (A|B) = P(B|A) * P(A) / P(B)
    - **For our case:**  P (y|X) = P(X|y) * P(y) / P(X) : X: Feature Vector
### Training
    - Calculate Mean, var prior (frequency) for each class
### Testing
    - Calulate posteroe for each class using
    - y = argmax(y) log(P(x1 |y)) +.... log(P(xn |y)) + log(P(y)) and Gaussian formula
    - Choose class with highest posterior probablity

In [1]:
# importing math library
import numpy as np

In [19]:
# Naive Bayes Class

class NaiveBayes:

    def fit(self, X, y):
        n_samples, n_features = X.shape
        self._classess = np.unique(y)
        n_classess = len(self._classess)

        # Calculating Mean , variance and prior for every class
        self._mean = np.zeros((n_classess, n_features), dtype=np.float64)
        self._var = np.zeros((n_classess, n_features), dtype=np.float64)
        self._prior = np.zeros(n_classess, dtype=np.float64)

        # 
        for idx, c in enumerate(self._classess):
            X_c = X[y == c]
            self._mean[idx, :] = X_c.mean(axis=0)
            self._var[idx, :] = X_c.var(axis=0)
            self._prior[idx] = X_c.shape[0] / float(n_samples)


    def predict(self, X_test):
        y_pred = [self._predict(x) for x in X_test]
        return np.array(y_pred)

    def _predict(self, x):
        posteriors = []

        # Calc posterior probab for each class
        for idx, c in enumerate(self._classess):
            prior = np.log(self._prior[idx])
            posterior = np.sum(np.log(self._pdf(idx, x)))
            posterior = posterior + prior
            posteriors.append(posterior)

        # return highest posteriro
        return self._classess[np.argmax(posteriors)]

    def _pdf(self, class_idx, x):
        mean = self._mean[class_idx]
        var = self._var[class_idx]
        numerator = np.exp(-(x-mean)**2 /(2 * var))
        denominator = np.sqrt(2*np.pi*var)
        return numerator / denominator

## Setting Up Accuracy Model

In [4]:
def accuracy (preds, y_test):
    return np.sum(preds == y_test) / len(y_test)

## Settin up Dataset

In [5]:
from sklearn import datasets
from sklearn.model_selection import train_test_split

In [6]:
data = datasets.load_breast_cancer()
X,y = data.data, data.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=1234)

## Setting Up Model

In [20]:
NB = NaiveBayes()
NB.fit(X_train, y_train)
preds = NB.predict(X_test)
print(f"Accuracy Score : {accuracy(preds, y_test)}")

Accuracy Score : 0.8936170212765957


  posterior = np.sum(np.log(self._pdf(idx, x)))


## Comparing To Sklearn Naive bayes

In [23]:
from sklearn.naive_bayes import GaussianNB

In [24]:
MNB = GaussianNB()
MNB.fit(X_train, y_train)
preds = MNB.predict(X_test)
print(f"Accuracy Score : {accuracy(preds, y_test)}")

Accuracy Score : 0.898936170212766
