<a href="https://colab.research.google.com/github/tahawarsi360/NLP_assignment/blob/main/NaiveBayesClassifier.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Apply Naïve Bayes Classifier to classify the given testing data, and predict the output
class of the given sample.

In [13]:
X = [[1, 1, 0, 1, 1, 1],
    [0, 1, 0, 1, 1, 0],
    [0, 1, 0, 0, 0, 0],
    [1, 0, 0, 1, 1, 1],
    [0, 0, 1, 0, 0, 1],
    [1, 1, 0, 1, 1, 0]]

y = ["Yes", "No", "Yes", "Yes", "No", "No"]

In [14]:
def train_naive_bayes(X, y):
  # Calculate class probabilities
  class_counts = {}
  for label in y:
    if label in class_counts:
      class_counts[label] += 1
    else:
      class_counts[label] = 1

  class_probabilities = {}
  for label, count in class_counts.items():
    class_probabilities[label] = count / len(y)

  # Calculate feature probabilities
  feature_counts = {}
  for i in range(len(X)):
    features = X[i]
    label = y[i]
    if label not in feature_counts:
      feature_counts[label] = {}
    for j, feature in enumerate(features):
      if j not in feature_counts[label]:
        feature_counts[label][j] = {}
      if feature in feature_counts[label][j]:
        feature_counts[label][j][feature] += 1
      else:
        feature_counts[label][j][feature] = 1

  feature_probabilities = {}
  for label, feature_counts_per_label in feature_counts.items():
    feature_probabilities[label] = {}
    for feature_index, feature_counts_per_index in feature_counts_per_label.items():
      feature_probabilities[label][feature_index] = {}
      total_counts = sum(feature_counts_per_index.values())
      for feature, count in feature_counts_per_index.items():
        feature_probabilities[label][feature_index][feature] = count / total_counts

  return class_probabilities, feature_probabilities

In [15]:
def predict_naive_bayes(class_probabilities, feature_probabilities, X):
  y_pred = []
  for features in X:
    max_prob = -1
    predicted_label = None
    for label, class_prob in class_probabilities.items():
      prob = class_prob
      for feature_index, feature in enumerate(features):
        if feature_index in feature_probabilities[label] and feature in feature_probabilities[label][feature_index]:
          prob *= feature_probabilities[label][feature_index][feature]
      if prob > max_prob:
        max_prob = prob
        predicted_label = label
    y_pred.append(predicted_label)
  return y_pred

In [16]:
x_test = [[1, 0, 1, 1, 1, 1]]

In [17]:
class_probabilities, feature_probabilities = train_naive_bayes(X, y)

In [18]:
print( predict_naive_bayes(class_probabilities, feature_probabilities, x_test))

['Yes']


Naive bayes using laplace smoothing

In [19]:
def train_naive_bayes_with_smoothing(X, y, alpha=1):
    # Calculate class probabilities
    class_counts = {}
    for label in y:
        if label in class_counts:
            class_counts[label] += 1
        else:
            class_counts[label] = 1

    class_probabilities = {}
    for label, count in class_counts.items():
        class_probabilities[label] = (count + alpha) / (len(y) + alpha * len(class_counts))

    # Calculate feature probabilities
    feature_counts = {}
    for i in range(len(X)):
        features = X[i]
        label = y[i]
        if label not in feature_counts:
            feature_counts[label] = {}
        for j, feature in enumerate(features):
            if j not in feature_counts[label]:
                feature_counts[label][j] = {}
            if feature in feature_counts[label][j]:
                feature_counts[label][j][feature] += 1
            else:
                feature_counts[label][j][feature] = 1

    feature_probabilities = {}
    for label, feature_counts_per_label in feature_counts.items():
        feature_probabilities[label] = {}
        for feature_index, feature_counts_per_index in feature_counts_per_label.items():
            feature_probabilities[label][feature_index] = {}
            total_counts = sum(feature_counts_per_index.values())
            for feature, count in feature_counts_per_index.items():
                # applying laplace smothing alpha = 1 to probabilities
                feature_probabilities[label][feature_index][feature] = (count + alpha) / (total_counts + alpha * len(feature_counts_per_index))

    return class_probabilities, feature_probabilities

In [20]:
class_probabilities, feature_probabilities = train_naive_bayes_with_smoothing(X, y)

In [21]:
print( predict_naive_bayes(class_probabilities, feature_probabilities, x_test))

['Yes']
