In [31]:
class NaiveBayes:
    import numpy as np
    
    def fit(self, x, y):
        n_samples, n_features = x.shape
        self.classes = np.unique(y)
        n_classes = len(self.classes)
        
        # calculate mean, var and prior for each class
        self.mean = np.zeros((n_classes, n_features), dtype=np.float64)
        self.var = np.zeros((n_classes, n_features), dtype=np.float64)
        self.priors = np.zeros(n_classes, dtype=np.float64)
        
        for idx, c in enumerate(self.classes):
            x_c = x[y==c]
            self.mean[idx, :] = x_c.mean(axis=0)
            self.var[idx, :] = x_c.var(axis=0)
            self.priors[idx] = x_c.shape[0]/float(n_samples)
        return 'data has been trained carefully'
        
    def predict(self, x):
        y_pred = [self._predict(i) for i in x]
        return y_pred
    
    def _predict(self, x):
        posteriors = []
        
        # calculate posterior probability for each class
        for idx, c, in enumerate(self.classes):
            prior = np.log(self.priors[idx])
            posterior = np.sum(np.log(self._pdf(idx, x)))
            posterior += prior
            posteriors.append(posterior)
            
        # return class with highest posterior probability
        return self.classes[np.argmax(posteriors)]
    
    def _pdf(self, class_idx, x):
        mean = self.mean[class_idx]
        var = self.var[class_idx]
        numerator = np.exp(-((x - mean)**2)/(2*var))
        denominator = np.sqrt(2*np.pi*var)
        return numerator/denominator
    
    def score(self, y_true, y_pred):
        acc = np.sum(y_true==y_pred)/len(y_true)
        return acc

In [32]:
from sklearn.model_selection import train_test_split as split
from sklearn.datasets import make_classification

In [33]:
x, y = make_classification(
    n_samples=1000, n_features=10, n_classes=2, random_state=123
)

In [34]:
x_train, x_test, y_train, y_test = split(
    x, y, test_size=.2, random_state=123
)

In [35]:
model = NaiveBayes()

In [36]:
model.fit(x_train, y_train)

'data has been trained carefully'

In [37]:
y_pred = model.predict(x_test)

In [38]:
model.score(y_test, y_pred)

0.965