# Naive Bayes Classifier

## Import Libraries:

In [5]:
import numpy as np
from util import get_data
from datetime import datetime
from scipy.stats import norm
from scipy.stats import multivariate_normal as mvn

## Implementation, mathematics:
##### We assume independence between features (the Naive Bayes case). The Formula (Independent Features). When features are independent, the joint probability $p(x)$ is simply the product of individual probabilities for each feature.  The formula becomes:$$p(x | \mu, \sigma^2) = \prod_{i=1}^{d} \frac{1}{\sqrt{2\pi\sigma_i^2}} \exp\left( -\frac{(x_i - \mu_i)^2}{2\sigma_i^2} \right)$$Where:
* $d$ is the number of features.
* $\mu_i$ is the mean of the $i$-th feature.
* $\sigma_i^2$ is the variance of the $i$-th feature.

##### Why the Covariance Matrix Changes?

In a general (Non-Naive) Gaussian model, we use a full covariance matrix $\Sigma$. However, under the independence assumption:
1) Diagonal Matrix:  The covariance between different features $i$ and $j$ ($i \neq j$) is $0$.
2) Structure of $\Sigma$: The matrix $\Sigma$ becomes a diagonal matrix, where only the variances ($\sigma^2$) stay on the main diagonal.

$$\Sigma = \begin{bmatrix} \sigma_1^2 & 0 & \dots & 0 \\ 0 & \sigma_2^2 & \dots & 0 \\ \vdots & \vdots & \ddots & \vdots \\ 0 & 0 & \dots & \sigma_d^2 \end{bmatrix}$$

In [6]:
class NaiveBayes(object):
    def fit(self, X, Y, smoothing = 10e-3):

        # Creating empty dictionaries
        self.gaussians = dict()
            # self.gaussians = {0:{"mean": 3.41, 4.42, 5.23, "variance": 1.34, 1.23, 1.53}, 1:mean": ..., "variance": ...}}

        self.priors = dict()
            # self.priors = {0:p(Y = 0), 1:p(Y = 1)}

        # Unique values of Y
        labels = set(Y)

        # Filling in data for our empty dictionaries
        for c in labels:
            current_x = X[Y == c]
            self.gaussians[c] = {"mean" : current_x.mean(axis = 0), "variance" : current_x.var(axis = 0) + smoothing}
            # P(Y) - Prior
            self.priors[c] = float(len(Y[Y == c]) / len(Y))

    def predict(self, X):
        N, D = X.shape
        K = len(self.gaussians)
        P = np.zeros((N, K))
        for c, g in self.gaussians.iteritems():
            mean, variance = g["mean"], g["variance"]
            # P(X|Y) - Likelihood MVN
            P[:, c] = mvn.logpdf(X, mean=mean, variance=variance) + np.log(self.priors[c])
        return np.argmax(P, axis = 1)

    def score(self, X, Y):
        P = self.predict(X)
        return np.mean(P == Y)

## Load data, run all code

In [None]:
if __name__ == '__main__':
    X, Y = get_data(10000)
    Ntrain = len(Y) / 2
    Xtrain, Ytrain = X[:Ntrain], Y[:Ntrain]
    Xtest, Ytest = X[Ntrain:], Y[Ntrain:]

    model = NaiveBayes()
    t0 = datetime.now()
    model.fit(Xtrain, Ytrain)
    print("Training time: ", datetime.now() - t0)

    t0 = datetime.now()
    print("Test accuracy:", model.score(Xtest, Ytest))
    print("Time to compute test accuracy:", (datetime.now() - t0), "Test size:", len(Ytest))