In [175]:
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split


In [176]:
X, Y = datasets.make_classification(n_samples=10000, n_features=10, n_informative=3 , n_classes=3)#, random_state=3119)

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=3119)


In [177]:
class Naive_Bayes:
    def __init__(self):
        pass

    def fit(self, X, Y):
        self.n_samples, self.n_features = X.shape

        self._classes = np.unique(Y)
        n_classes = len(self._classes)

        self.mean = np.zeros((n_classes, self.n_features), dtype=np.float64)
        self.var = np.zeros((n_classes, self.n_features), dtype=np.float64)
        self.prior = np.zeros(n_classes, dtype=np.float64)

        for idx, cls in enumerate(self._classes):
            X_c = X[Y == cls]
            self.mean[idx, :] = X_c.mean(axis = 0)
            self.var[idx, :] = X_c.var(axis = 0)
            self.prior[idx] = X_c.shape[0] / float(self.n_samples) 
    

    def predict(self, X):
        predictions = [self._predict(x) for x in X]
        return np.array(predictions)
    
    def _predict(self, x):
        posterior_prob = []

        for idx, cls in enumerate(self._classes):
            log_prior = np.log(self.prior[idx])

            log_likelihoods = np.sum(np.log(self._pdf(idx, x)))

            posterior = log_likelihoods + log_prior
            posterior_prob.append(posterior)
        return self._classes[np.argmax(posterior_prob)]

    def _pdf(self, idx, x):
        mean = self.mean[idx]
        var = self.var[idx]

        numerator = np.exp(-((x-mean)**2) / (2 * var))
        denominator = np.sqrt(2 * np.pi * var)

        return numerator/denominator
    


In [178]:
nb = Naive_Bayes()
nb.fit(X_train, Y_train)

predictions = nb.predict(X_test)

acc = np.sum(predictions == Y_test)/len(Y_test)
print(f"Accuracy: {acc}")

Accuracy: 0.8745
