In [1]:
from collections import OrderedDict
from functools import reduce 
from operator import mul

In [2]:
import numpy as np 
import pandas as pd

In [3]:
class Gauss:
    
    def fit(self,X : np.array, y : np.array):
        """"
             The fitting function of the gassuan naive bayes model.
        :param X: np.array
            The feature matrix.
        :param y: np.array
            The target vector.
        :return: Gausss
              This function return the oject of the trained model.
        """ 
        
        # Preparing the mean, standard deviation and priors vectors. 
        self.mu = OrderedDict()
        self.std = OrderedDict()
        self.priors = OrderedDict() 
        
        # Computing th mean, standard deviation and prior vectors for every class 
        for cls in np.unique(y):
            self.priors[cls] = len(y [ y == cls ]) / len(y) 
            self.mu[cls] = np.mean(X[np.where( y == cls)], axis = 0 )
            self.std[cls] = np.std(X [np.where( y == cls )], axis = 0) 
        return self 
    
    def normal_distribution(self, x : np.array, cls : str, i : int) -> float:
        """"
            The normal distribution formula.
        :param x : np.array
            The sample for which we want to find the probability.
        :param cls : str or int
            The class for which we want to compute the probability.
        :param i : int 
             The index of the feature
        :return float 
             The probability of the sample for the normal distribution.
        """
        
        exponent = np.exp(-((x[i] - self.mu[cls][i]) ** 2)) 
        exponent /= (2*self.std[cls][i] ** 2) 
        return (1/ (np.sqrt(2 * np.pi) * self.std[cls][i] ** 2 )) * exponent 
    
    def predict_proba(self, X : np.array ) -> np.array:
        """"
            This function return the probability for
            eevery sample in the data set.
        :param X: np.array
            The feature matrix passed to make predictions on.
        :return: np.array
            An array with the probabilities for every class for every sample 
        """
        
        # Creating the empty list with probabilities 
        y_pred = []
        
        # Computing the probabilities for every class for this sample.  
        for x in X:
            y_pred.append([]) 
            
            for cls in self.priors:
                prob = reduce(mul, [self.normal_distribution(x,cls,i)
                                    for i in range(len(x))]) * self.priors[cls] 
                y_pred[-1].append(prob)  
            
            y_pred[-1] = np.array(y_pred[-1]) 

            # Normalizing the vector 
            for i in range (len(y_pred[-1])):
                y_pred[-1] = y_pred[-1] / np.linalg.norm(y_pred[-1]) 
        return np.array(y_pred)  
    
    def predict(self, X : np.array) -> np.array:
        """"
            This function return the precited class for every 
            sample in the data set.
        :param X : np.array 
            The feature matrix passed to make predcitions on.
        :return: np.array 
            An array with the predicted classes for every class for every sample.
        """ 
        
        # Creating the empty list for storing the predicted classes. 
        y = [] 
        
        # Getting the precited probabilities fro every passed sample. 
        probas = self.predict_proba(X) 
        
        # Getting the class with the highest probability for every sample.
        for pr in probas:
            y.append( 
                list(self.priors.keys())[np.argmax(pr)]
            ) 
        return y 


In [4]:
dataset = pd.read_csv('heart.csv')

In [25]:
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

In [26]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0)

In [27]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test) 

In [28]:
from sklearn.naive_bayes import GaussianNB
classifier = GaussianNB()
classifier.fit(X_train, y_train) 

GaussianNB()

In [29]:
y_pred = classifier.predict(X_test) 

#print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))

# Results from sklearn 


In [30]:
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test, y_pred) 

[[24  9]
 [ 4 39]]


0.8289473684210527

In [12]:
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

In [13]:
gaus = Gauss()

In [14]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 42)

In [15]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test) 

In [16]:
gaus.fit(X_train, y_train)

<__main__.Gauss at 0x1bd7a3ee9c8>

In [17]:
y_pred = gaus.predict(X_test)

# Result from from-scratch


In [18]:
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test, y_pred) 

[[27  8]
 [ 3 38]]


0.8552631578947368