In [197]:
import numpy as np
import math

In [198]:
class NaiveBayes:
  def __init__(self, ):
    self.classes = 0

  
  def fit(self,X_train, y_train):
    n_sample, n_features = X_train.shape
    self.classes = np.unique(y_train)
    n_classes = len(self.classes)

    self.mean = np.zeros((n_classes, n_features), dtype = np.float64) #initializing the mean ndarray
    self.var = np.zeros((n_classes, n_features), dtype = np.float64) #initializing the variance ndarray
    self.prior = np.zeros(n_classes, dtype = np.float64) #initializing the prior ndarray 

    for c in self.classes:
      X_train_c = X_train[c == y_train]
      self.mean[c, :] = X_train_c.mean(axis = 0)
      self.var[c, :] = X_train_c.mean(axis = 0)
      self.prior[c] = X_train_c.shape[0]/ n_sample

  def predict(self, X_test):
    y_predict = [self._predict(x) for x in X_test]
    return y_predict

  def _predict(self, x):
    posterior_list = []
    for c in self.classes:

      #prior
      prior_of_class = np.log(self.prior[c])

      #class probability
      mean_of_class = self.mean[c]
      var_of_class = self.var[c]
      num = np.exp(-(x-mean_of_class)**2) / 2*var_of_class
      deno  = np.sqrt(2*math.pi*var_of_class)
      pdf = num/deno
      class_prob = np.sum(np.log(pdf))

      #posterior probability
      posterior = prior_of_class + class_prob
      posterior_list.append(posterior)

    return self.classes[np.argmax(posterior_list)]

def accuracy(y_true, y_pred):
  return np.sum([y_true==y_pred])/len(y_true)

In [199]:
from sklearn import datasets
from sklearn.model_selection import train_test_split
X,y = datasets.make_classification(n_samples=1000, n_features=5, n_classes=2, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X,y)

nb = NaiveBayes()
nb.fit(X_train, y_train)
y_pred = nb.predict(X_test)
accuracy(y_test, y_pred)



0.516

In [200]:
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score

In [201]:
gnb = GaussianNB()
gnb.fit(X_train, y_train)
gnb.predict(X_test)
accuracy_score(y_test, y_pred)

0.516