<a href="https://colab.research.google.com/github/nkrj01/Models-from-scratch/blob/main/Naivebayes.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Importing Dependencies

In [18]:
import numpy as np
from sklearn.datasets import load_breast_cancer
import matplotlib.pyplot as plt
from scipy.stats import norm as gaussian
from sklearn.model_selection import train_test_split

Loading example data

In [19]:
data_obj = load_breast_cancer()
X = data_obj["data"]
y = data_obj["target"].reshape(-1, 1)
data = np.hstack((X, y))
print(data.shape)

(569, 31)


Creating Naive Bayes estimator class

In [37]:
class naiveBayes:

  def __init__(self) -> None:
    pass

  def fit(self, data):
    """
    Given the train data, this function fits the model and returns
    relevant parameters of the model
    """
    X = data[:, :-1]
    y = data[:, -1]
    labels = np.unique(y)
    X_class = {}

    for label in labels:
      index = np.where(y == label)
      X_class[label] = X[index]

    mean = {}
    std = {}
    prior = {}
    for label in labels:
      mean[label] = np.mean(X_class[label], axis=0)
      std[label] = np.std(X_class[label], axis=0)
      prior[label] = np.count_nonzero(y == label)/len(y)

    self.parameters = {
        "mean" : mean,
        "std" : std,
        'prior' : prior,
        'labels': labels
    }

    return self.parameters

  def predict(self, x):
    """
    After fitting the model, this function can be called to predict
    new test data.
    """
    posteriors = {}
    labels = self.parameters["labels"]
    for label in labels:
      p = 0
      for col in range(len(x)):
        mean = self.parameters["mean"][label][col]
        std = self.parameters["std"][label][col]
        gauss = gaussian(mean, std)
        likelihood = gauss.pdf(x[col])
        if likelihood == 0:
          likelihood = 1e-15 # to prevent underflow
        p = p + np.log(likelihood) # adding all log likelihood

      posteriors[label] = p + np.log(self.parameters["prior"][label]) # adding prior

    posterior = -1e15
    for key in posteriors.keys():
      if (posteriors[key] > posterior):
        posterior = posteriors[key]
        answer = key
    return answer

  def accuracy(self, x_test, y_test):
    """
    This function calculation binary accuracy of the test set.
    """
    y_predict = []
    for row in range(x_test.shape[0]):
      y_predict.append(self.predict(x_test[row, :]))

    y_test = y_test.flatten()
    y_predict = np.array(y_predict)
    matching_elements = (y_predict == y_test)
    count_matches = np.sum(matching_elements)
    accuracy = count_matches/x_test.shape[0]
    return accuracy

Testing model's performance

In [39]:
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)
data_train = np.hstack((x_train, y_train.reshape(-1, 1)))
nb = naiveBayes()
nb.fit(data_train)
y_predict = nb.predict(x_test[0, :])
print(y_predict)
print(nb.accuracy(x_test, y_test))

0.0
0.9298245614035088
