In [52]:
from collections import OrderedDict
from functools import reduce
from operator import mul
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import numpy as np
from sklearn.datasets import load_iris

In [62]:
#Getting the feature matrix and target vector from iris data set.
X = load_iris()['data']
y = load_iris()['target']

In [63]:
#Scaling data
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [64]:
#Splitting the data in train and test with train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 42)

In [56]:
class Gauss:
    def fit(self, X: np.array, y: np.array):
        '''
            The fitting function of the gaussian naive bayes model.
        :param X: np.array
            The feature matrix.
        :param y: np.array
            The target vector.
        :return: Gauss
            This function return the object of the trained model.
        '''
        # Preparing the mean, standard deviation and priors vectors.
        self.mu = OrderedDict()
        self.std = OrderedDict()
        self.priors = OrderedDict()
        # Computing the mean, standard deviation and prior vectors for every class
        for cls in np.unique(y):
            self.priors[cls] = len(y[y == cls]) / len(y)
            self.mu[cls] = np.mean(X[ np.where(y == cls)], axis=0)
            self.std[cls] = np.std(X[ np.where(y == cls)], axis=0)
        return self

    def normal_distribution(self, x: np. array , cls : str, i : int)-> float:
        '''
            The normal distribution formula.
        :param x: np.array
            The sample for which we want to find the probability.
        :param cls: str or int
            The class for which we want to compute the probability.
        :param i: int
            The index of the feature.
        :return: float
            The probability of the sample for the normal distribution.
        '''
        exponent = np.exp(-((x[i] - self.mu[cls][i]) ** 2 / (2 * self.std[cls][i] ** 2)))
        return (1 / (np.sqrt(2 * np.pi) * self.std [cls][i] ** 2)) * exponent
    def predict_proba(self, X: np. array) -> np.array:
        '''
            This function returns the probability for every sample in the data set.
        :param X: np.array
            The feature matrix passed to make predictions on.
        :return: np.array
            An array with the probabilities for every class for every sample.
        '''
        #Creating the empty list with probabilities
        y_pred = []
        #Computing the probabilities for every class for every sample
        for x in X:
            y_pred.append ([])
            #Computing the probability for every class for this sample
            for cls in self.priors :
                prob = reduce (mul, [self.normal_distribution(x, cls, i) 
                                     for i in range(len(x))]) * self.priors[cls]
                y_pred[-1].append(prob)
            y_pred[-1] = np.array(y_pred [-1])
            # Normalizing the vector
            for i in range(len(y_pred[-1])):
                y_pred[-1] = y_pred[-1] / np.linalg.norm(y_pred[-1])
        return np.array(y_pred)
    def predict(self, X : np. array) -> np. array:
        '''
            This function returns the predicted class for every sample in the data set
        :param X: np.array
            The feature matrix passed to make predictions on 
        :return: np.array
            An array with the predicted classes for every class for every sample
        '''
        # Creating the empty list for storing the predicted classes 
        y = []
        # Getting the predicted probabilities for every passed sample 
        probas = self.predict_proba(X)
        # Getting the class with the highest probability for every sample. 
        for pr in probas:
            y.append( list(self.priors.keys())[np.argmax(pr)])
        return y

In [57]:
#Instantiating the class
gauss_scr = Gauss()

In [58]:
#Fitting function
gauss_scr.fit(X_train, y_train)

<__main__.Gauss at 0x11c7e687808>

In [59]:
#Predict probabilities function
prob = gauss_scr.predict_proba(X_test)

In [60]:
#Predict function
y_pred = gauss_scr.predict(X_test)

In [61]:
#Accuracy score
accuracy_score(y_test, y_pred)

1.0

## GaussianNB from sklearn

In [65]:
#Importing the model
from sklearn.naive_bayes import GaussianNB

In [66]:
#Creating the model object
gauss = GaussianNB()

In [67]:
#Fitting the model
gauss.fit(X_train, y_train)

GaussianNB()

In [68]:
#Making predictions on new samples
y_pred = gauss.predict(X_test)

In [69]:
#Accuracy score
accuracy_score(y_test, y_pred)

0.9777777777777777