In [1]:
import numpy as np

In [2]:
class NaiveBayes:
    def fit(self,X,y):
        n_samples,n_features = X.shape
        self._classes = np.unique(y)
        self.n_classes = len(self._classes)
        
        
        self._mean = np.zeros((self.n_classes,n_features),dtype = np.float64)
        self._var = np.zeros((self.n_classes,n_features),dtype = np.float64)
        self._priors = np.zeros(self.n_classes,dtype=np.float64)
        
        for idx,c in enumerate(self._classes):
            X_c = X[y==c]
            
            self._mean[idx] = X_c.mean(axis=0)
            self._var[idx] = X_c.var(axis=0)
            self._priors[idx] = len(X_c)/n_samples
            
    
    def predict(self,X):
        preds = [self._predict(x) for x in X]
        return preds
    
    def _predict(self,x):
        posteriors = []
        
        for idx,c in enumerate(self._classes):
            prior = self._priors[idx]
            
            posterior = np.sum(np.log(self._pdf(idx,x)))
            
            posterior += prior
            
            posteriors.append(posterior)
            
        return self._classes[np.argmax(posteriors)]
    
    def _pdf(self,idx,x):
        mean = self._mean[idx]
        var = self._var[idx]
        
        num = np.exp(-((x-mean)**2)/(2*var))
        den = np.sqrt(2*np.pi*var)
        
        return num/den

In [3]:
import sklearn
from sklearn import datasets

bc = datasets.load_breast_cancer()

X,y = bc.data,bc.target

from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y)

In [4]:
clf = NaiveBayes()
clf.fit(X_train,y_train)
preds = clf.predict(X_test)

In [5]:
acc = np.sum(preds == y_test) /len(y_test) 
acc

0.9230769230769231