# Naive Bayes Family of Distributions

In [1]:
import numpy as np
import pandas as pd
from scipy.stats import multivariate_normal as mvn

In [2]:
#Create Gaussian Naive Bayes Classifer
class GaussNB():
    def fit(self, X, y, epsilon = 1e-2):
        self.likelihoods = dict()
        self.priors = dict()
        self.K = set(y.astype(int))
        
        for k in self.K:
            X_k = X[y == k,:]
            mu_k = X_k.mean(axis=0)
            self.likelihoods[k] = {"mean":mu_k, "cov":X_k.var(axis=0) + epsilon}
            self.priors[k] = len(X_k)/len(X)
            
    def predict(self, X):
        N, D = X.shape
        P_hat = np.zeros((N,len(self.K)))
        
        for k, l in self.likelihoods.items():
            P_hat[:,k] = mvn.logpdf(X, l["mean"], l["cov"]) + np.log(self.priors[k])
            
        return P_hat.argmax(axis = 1)
    
    def accuracy(self, X, y):
        y_hat = self.predict(X)
        return np.mean(y == y_hat)

In [3]:
#Create Gaussian Bayes Classifer
class GaussBayes():
    def fit(self, X, y, epsilon = 1e-2):
        self.likelihoods = dict()
        self.priors = dict()
        self.K = set(y.astype(int))
        
        for k in self.K:
            X_k = X[y == k,:] #All X's in class y==k
            N_k, D = X_k.shape #Dimensions of X's in class k
            mu_k = X_k.mean(axis=0) #Mean of row values
            self.likelihoods[k] = {"mean": mu_k, "cov":(1/(N_k - 1))*np.matmul(X_k.T,X_k-mu_k) + epsilon*np.identity(D)}
            self.priors[k] = len(X_k)/len(X)
            
    def predict(self, X):
        N, D = X.shape #Shape of variable table
        P_hat = np.zeros((N,len(self.K))) #Zero array N(rows) x K(columns)
        
        for k, l in self.likelihoods.items():
            P_hat[:,k] = mvn.logpdf(X, l["mean"], l["cov"]) + np.log(self.priors[k])
            
        return P_hat.argmax(axis = 1) #column wise
    
    def accuracy(self, X, y):
        y_hat = self.predict(X)
        return np.mean(y == y_hat)

In [4]:
#Create Multinomial Naive Bayes Classifer
class MultinoNB():  
    def fit(self, X, y, epsilon = 1e-3):
        self.sum = []
        self.priors = []
        self.K = set(y.astype(int))
        
        for k in self.K:
            X_k = X[y == k,:]  
            N = X.shape[0]
            self.sum.append(X_k.sum(axis=0) + epsilon)
            self.priors.append(np.log(len(X_k) / N))
        
        self.log_prob = np.log(np.array(self.sum) / np.array(self.sum).sum(axis=1)[np.newaxis].T)  
    
    def predict(self, X):
        P_hat = [(self.log_prob * x).sum(axis=1) + self.priors for x in X]
        return np.argmax(P_hat, axis=1)
    
    def accuracy(self, X, y):
        y_hat = self.predict(X)
        return np.mean(y == y_hat)

In [5]:
#Create Bernoulli Naive Bayes Classifer
class BernoNB():
    def fit(self, X, y, epsilon= 1e-2):
        self.sum = []
        self.priors = []
        self.class_obs = []
        self.K = set(y.astype(int))
        
        for k in self.K:
            X_k = X[y == k,:]  
            N = X.shape[0]
            self.sum.append(X_k.sum(axis=0) + epsilon)
            self.priors.append(np.log(len(X_k) / N))
            self.class_obs.append(len(X_k) + 2 * epsilon)
            
        self.probs = np.array(self.sum) / np.array(self.class_obs)[np.newaxis].T


    def predict(self, X):
        P_hat = [(np.log(self.probs) * x + np.log(1 - self.probs) * np.abs(x - 1)).sum(axis=1) + self.priors for x in X]
        return np.argmax(P_hat, axis=1)
    
    def accuracy(self, X, y):
        y_hat = self.predict(X)
        return np.mean(y == y_hat)