In [1]:
import numpy as np
import sklearn
from sklearn.model_selection import train_test_split
from sklearn import datasets
from sklearn import metrics 


In [2]:
X,y=datasets.make_classification(n_samples=100,n_features=10,n_classes=2,random_state=123)

In [3]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=123)

In [4]:

class NaiveBayes:

	def fit(self,X,y):
		n_sample,n_feature=X.shape
		self._classes=np.unique(y)
		n_classes=len(self._classes)


		#init mean,varianve,prioirs
		self._mean=np.zeros((n_classes,n_feature),dtype=np.float64)
		self._var=np.zeros((n_classes,n_feature),dtype=np.float64)
		self._priors=np.zeros(n_classes,dtype=np.float64)



		for idx, c in enumerate(self._classes):
			X_c=X[y==c]
			self._mean[idx,:]=X_c.mean(axis=0)
			self._var[idx,:]=X_c.var(axis=0)
			self._priors[idx]=X_c.shape[0]/ float(n_sample)



	def predict(self,X):
		y_pred=[self._predict(x) for x in X]
		return np.array(y_pred)

	def _predict(self,x):
	    posteriors=[]

	    #probability of each class

	    for idx ,c in enumerate(self._classes):
	    	prior=np.log(self._priors[idx])
	    	class_condition=np.sum(np.log(self._pdf(idx,x)))
	    	posterior=prior+class_condition
	    	posteriors.append(posterior)
	    return self._classes[np.argmax(posteriors)]
	    #return class with highest probability


#pdf used for gaussasin distribution:
	def _pdf(self,class_idx,x):
	 	mean=self._mean[class_idx]
	 	var=self._var[class_idx]
	 	numerator=np.exp(-(x-mean)**2/(2*var))
	 	denominator=np.sqrt(2*np.pi*var)
	 	return numerator/denominator


In [5]:
nb=NaiveBayes()
nb.fit(X_train,y_train)
final=nb.predict(X_test)
final


array([0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1])

In [6]:
accuracy = metrics.accuracy_score(y_test, final)

In [7]:
accuracy

0.9