In [None]:
import numpy as np

In [None]:
class NaiveBayes:
  def cal_mean_var(self, features, target):
    self.mean = features.groupby(target).apply(np.mean).to_numpy()
    self.var = features.groupby(target).apply(np.var).to_numpy()
    return self.mean, self.var

  def pdf(self, class_idx, x):
    mean = self.mean[class_idx]
    var = self.var[class_idx]
    numerator = np.exp((-1/2)*((x - mean)**2) / (2 * var))
    denominator = np.sqrt(2 * np.pi * var)
    prob = numerator / denominator
    return prob

  def cal_prior(self, features, target):
    self.prior = (features.groupby(target).apply(lambda x: len(x))/self.rows).to_numpy()
    return self.prior

  def cal_posterior(self, x):
    posteriors = []
    for i in range(self.count):
        prior = np.log(self.prior[i]) 
        conditional = np.sum(np.log(self.pdf(i, x)))
        posterior = prior + conditional
        posteriors.append(posterior)
    return self.classes[np.argmax(posteriors)]

  def fit(self, features, target):
    self.classes = np.unique(target)
    self.count = len(self.classes)
    self.feature_nums = features.shape[1]
    self.rows = features.shape[0]

    self.cal_mean_var(features, target)
    self.cal_prior(features, target)

  def predict(self, features):
    preds = [self.cal_posterior(f) for f in features.to_numpy()]
    return preds

In [None]:
import pandas as pd
iris = pd.read_csv('/content/drive/MyDrive/Data Science datasets/Iris.csv')

In [None]:
iris.head() 

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,1,5.1,3.5,1.4,0.2,Iris-setosa
1,2,4.9,3.0,1.4,0.2,Iris-setosa
2,3,4.7,3.2,1.3,0.2,Iris-setosa
3,4,4.6,3.1,1.5,0.2,Iris-setosa
4,5,5.0,3.6,1.4,0.2,Iris-setosa


In [None]:
nb = NaiveBayes()

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(iris, iris['Species'], test_size = 0.3, random_state = 0)

In [None]:
nb.fit(X_train.iloc[:,[1,2,3,4]],y_train)

In [None]:
y_pred = nb.predict(X_test.iloc[:,[1,2,3,4]])

In [None]:
from sklearn import metrics
score = metrics.accuracy_score(y_test, y_pred)
score

0.9777777777777777

In [None]:
cf = metrics.confusion_matrix(y_test, y_pred)
cf

array([[16,  0,  0],
       [ 0, 18,  0],
       [ 0,  1, 10]])