# Non-Naive Bayes Classifier

## Import Libraries:

In [5]:
import numpy as np
from util import get_data
from datetime import datetime
from scipy.stats import norm
from scipy.stats import multivariate_normal as mvn

#### In the Non-Naive Bayes approach, we do not assume independence between features. Instead, we model the relationships and correlations between them using the full structure of the Multivariate Gaussian distribution.

#### The Formula (Dependent Features)

When features are dependent, the joint probability $p(x)$ cannot be simplified into a simple product. We must use the general form of the Multivariate Normal distribution:

$$p(x | \mu, \Sigma) = \frac{1}{(2\pi)^{d/2} |\Sigma|^{1/2}} \exp\left( -\frac{1}{2} (x - \mu)^T \Sigma^{-1} (x - \mu) \right)$$

Where:
* $d$ is the number of features.
* $\mu$ is the mean vector for all features.
* $\Sigma$ is the Full Covariance Matrix.
* $|\Sigma|$ is the determinant of the covariance matrix.
* $\Sigma^{-1}$ is the inverse of the covariance matrix.

#### Why use a Full Covariance Matrix?

In a Non-Naive model, we acknowledge that features often influence one another.
1) Non-Zero Covariance: The covariance between different features $i$ and $j$ ($i \neq j$) can be anything, not just zero.
2) Structure of $\Sigma$: The matrix $\Sigma$ is a full square matrix where the off-diagonal elements capture the "non-naive" dependencies between variables.

$$\Sigma = \begin{bmatrix} \sigma_1^2 & \text{cov}(1, 2) & \dots & \text{cov}(1, d) \\ \text{cov}(2, 1) & \sigma_2^2 & \dots & \text{cov}(2, d) \\ \vdots & \vdots & \ddots & \vdots \\ \text{cov}(d, 1) & \text{cov}(d, 2) & \dots & \sigma_d^2 \end{bmatrix}$$

In [6]:
class NaiveBayes(object):
    def fit(self, X, Y, smoothing = 10e-3):
        N, D = X.shape

        # Creating empty dictionaries
        self.gaussians = dict()
            # self.gaussians = {0:{"mean": 3.41, 4.42, 5.23, "variance": 1.34, 1.23, 1.53}, 1:mean": ..., "variance": ...}}

        self.priors = dict()
            # self.priors = {0:p(Y = 0), 1:p(Y = 1)}

        # Unique values of Y
        labels = set(Y)

        # Filling in data for our empty dictionaries
        for c in labels:
            current_x = X[Y == c]
            self.gaussians[c] = {"mean" : current_x.mean(axis = 0), "covariance" : np.cov(current_x.T) + np.eye(D)*smoothing}
            # P(Y) - Prior
            self.priors[c] = float(len(Y[Y == c]) / len(Y))

    def predict(self, X):
        N, D = X.shape
        K = len(self.gaussians)
        P = np.zeros((N, K))
        for c, g in self.gaussians.iteritems():
            mean, variance = g["mean"], g["covariance"]
            # P(X|Y) - Likelihood MVN
            P[:, c] = mvn.logpdf(X, mean=mean, covariance=covariance) + np.log(self.priors[c])
        return np.argmax(P, axis = 1)

    def score(self, X, Y):
        P = self.predict(X)
        return np.mean(P == Y)

## Load data, run all code

In [None]:
if __name__ == '__main__':
    X, Y = get_data(10000)
    Ntrain = len(Y) / 2
    Xtrain, Ytrain = X[:Ntrain], Y[:Ntrain]
    Xtest, Ytest = X[Ntrain:], Y[Ntrain:]

    model = NaiveBayes()
    t0 = datetime.now()
    model.fit(Xtrain, Ytrain)
    print("Training time: ", datetime.now() - t0)

    t0 = datetime.now()
    print("Test accuracy:", model.score(Xtest, Ytest))
    print("Time to compute test accuracy:", (datetime.now() - t0), "Test size:", len(Ytest))