Creating a Naive Bayes classifier from scratch involves understanding and implementing the mathematical foundations of the algorithm. Below is a Python implementation of a simple Naive Bayes classifier for a classification problem:

# Steps:
* **Calculate Prior Probabilities:** The probabilities of each class in the dataset.
* **Calculate Likelihood:** The conditional probability of each feature value given a class.
* **Apply Bayes' Theorem:** Combine the prior and likelihood to calculate the posterior probabilities.
* **Predict:** Assign the class with the highest posterior probability.



In [1]:
import numpy as np
import pandas as pd


In [12]:
class NaiveBayesClassifier :

  def __init__(self):
    """
    Constructor for Initialising the parameters
    """
    self.classes = None
    self.prior_prob={}
    self.likelihood={}




  def fit(self, X, y):
    """
    This method is used to fit the naive bayes model on the training data
    """

    # Calculate the no of uique classes
    self.classes = np.unique(y)

    # Set no of rows and col
    n_rows , n_col= X.shape

    #Iterate through each class
    for c in self.classes:
         # Extract the class data
         class_data = X[y==c]

         # Calculate the prior probability
         self.prior_prob[c]= len(class_data)/n_rows

         # Calculate the likelihood
         self.likelihood[c]={
             features : {
                 value : (np.sum(class_data[:,features]==value))/len(class_data)
                 for value in np.unique(class_data[:,features])
             }for features in range(n_col)
         }


  def predict(self , X):
      """
      This method is used to predict the class of new data points
      """
      # Initialise the prediction list
      prediction = []

      # Computaion for each row in the input sample
      for x in X:
        posterior_prob= {}
        # Calculate prob for each class
        for c in self.classes:
          # initially P(y|X) = log(P(y))
          post_prob= np.log(self.prior_prob[c])
          # P(y|X) += log(P(X|y))
          for f in range(len(x)):
            lh = self.likelihood[c][f]
            post_prob+=np.log(lh.get(x[f], 1e-6))
          posterior_prob[c]= post_prob
        prediction.append(max(posterior_prob , key = posterior_prob.get))
      return np.array(prediction)






In [13]:
data = np.array([
    [1, 'S'],
    [1, 'M'],
    [1, 'M'],
    [1, 'S'],
    [0, 'S'],
    [0, 'M'],
    [0, 'M'],
    [0, 'L'],
    [0, 'L'],
    [1, 'L']
])
labels = np.array(['Yes', 'Yes', 'No', 'No', 'No', 'No', 'Yes', 'No', 'Yes', 'Yes'])

# Preprocessing: Convert categorical features to numeric
df = pd.DataFrame(data, columns=["Feature1", "Feature2"])
df["Feature2"] = df["Feature2"].astype('category').cat.codes
data = df.values

# Train the classifier
nb = NaiveBayesClassifier()
nb.fit(data, labels)

# Test the classifier
test_data = np.array([[1, 0], [0, 2], [1, 1]])
predictions = nb.predict(test_data)

print("Predictions:", predictions)

Predictions: ['Yes' 'No' 'No']
