In [None]:
'''
Implementing a Gaussian Naive Bayes Classifier
'''

In [17]:
import math
import pandas as pd

class MyNaiveBayes():
    
    def fit(self, a, b):
        '''
        Input: Pandas Dataframe
        Output: List (of predictions)
        '''
        
        self.target = list(b.columns.values)[0]
        self.list_of_classes = b[self.target].unique()
        self.list_of_predictors = a.columns.values
        
        # P(c) is the prior probability and is calculated by dividing the number of times a class member appears in our training data by the total size of the training data.
        self.pCs = [None] * len(self.list_of_classes)
        total = b.count()
        for x in range(0,len(self.list_of_classes)):
            self.pCs[x] = (((b[self.target]==self.list_of_classes[x]).sum())/total)[0]
    
        #Means and Variances
        self.means_ys = [[[None] for x in range(len(self.list_of_predictors))] for y in range(len(self.list_of_classes))]
        self.variance_ys = [[[None] for x in range(len(self.list_of_predictors))] for y in range(len(self.list_of_classes))]
        
        joint_dataframe = pd.merge(a,b, left_index=True, right_index=True)
        
        #calculating means and variances
        for x in range(0,len(self.list_of_predictors)):
            for y in range(0,len(self.list_of_classes)):
                by_class = joint_dataframe.loc[joint_dataframe[self.target]==self.list_of_classes[y]]
                self.means_ys[x][y] = by_class[self.list_of_predictors[x]].mean()
                self.variance_ys[x][y] = (by_class[self.list_of_predictors[x]].std())**2
                #print("mean of ",self.listOfPredictors[x],"of ",self.listOfClasses[y],": ",x,y,self.meansYs[x][y])
                #print("variance of ",self.listOfPredictors[x],"of ",self.listOfClasses[y],": ",self.varianceYs[x][y])
        
    def p_xi_given_y(self, xi, mean_y, variance_y):
        return (1/(math.sqrt(2*math.pi*variance_y)))*math.exp(-(((xi-mean_y)**2))/(2*variance_y))
    
    #returns product of elements of given list
    def list_product(self, l):
        result = 1
        if len(l) == 0:
            return 0;
        for x in range(0,len(l)):
            result = result * l[x]
        return result
    
    def predict(self, x_to_predict):
        p_feature_vectors_for_each_class = [None] * len(self.list_of_predictors)
        p_d_given_c = [None] * len(self.list_of_classes)
        pxi_of_each_class = [None] * len(self.list_of_predictors)
        
        probabilites = [None] * len(self.list_of_classes)
        max_probabilities_for_each_class = [None] * len(self.list_of_classes)
        index_of_max_probability_for_each_element_in_x = [None] * len(x_to_predict)
        
        for z in range(0, len(x_to_predict)):
            current_row = x_to_predict.iloc[z]
            for x in range(0, len(self.list_of_classes)):
                for y in range(0, len(current_row)):
                    pxi_of_each_class[y] = self.p_xi_given_y(current_row[y],self.means_ys[y][x], self.variance_ys[y][x])
                p_d_given_c[x] = self.list_product(pxi_of_each_class)

                probabilites[x] =  p_d_given_c[x] * self.pCs[x]
            index_of_max_probability_for_each_element_in_x[z] = probabilites.index(max(probabilites))
        
        final_list = [None] * len(index_of_max_probability_for_each_element_in_x)
        for i in range(0, len(final_list)):
            final_list[i] = self.list_of_classes[index_of_max_probability_for_each_element_in_x[i]]
            
        return final_list