In [12]:
#--------------------------------------------
#Authors : Kai-Rui Hsu
#Task : Naive Bayes Classifier Implementation
#--------------------------------------------

import math
import numpy as np

class NaiveBayes:
    def __init__(self):
        self.density_parameters = dict()

    def estimation_parameter(self, train_x, train_label):
        # Model training
        print("======================================================")
        print("Naive Bayes Model Training:")

        # Estimate parameter of the given data
        # 1. Only find the unique elements of the given array.
        unique_Lables = np.unique(train_label)

        # 2. Seperate each class
        for label in unique_Lables:
            # in this project, we have two classes. We seprate the class by their label(0 or 1)
            classes = []
            row = train_x.shape[0]
            for i in range(row):
                if label == train_label[i]:
                    classes.append(train_x[i])
                    
            classes = np.array(classes)
        
        # 3. Create mu matrix 
            estimate_parameters = dict()
            # there are two features(mean, standard deviation), so the col size is 2
            col = train_x.shape[1]
            for i in range(col):
                feature_name = "mu" + str(i+1)
                density = dict()
                density['mean'] = np.mean(classes[:, i])
                density['std'] = np.std(classes[:, i])
                estimate_parameters[feature_name] = density
 
        # 4. Create covariance matrix
            covarianceMatrix = np.zeros((classes.shape[1], classes.shape[1]))
            # Due to Naive assumption, covariance is diagonal matrix
            for i in range(classes.shape[1]):
                covarianceMatrix[i][i] = np.std(classes[:, i])

            estimate_parameters['covariance_matrix'] = covarianceMatrix

            # prior probability estimation for the class
            estimate_parameters['class_prior_probability'] = len(classes)/len(train_x)

            self.density_parameters[label] = estimate_parameters
        # Display the data density parameters
        self.show_density_parameters()

    def show_density_parameters(self):
        for label in self.density_parameters:

            print("------------------------------------------------------")
            print("Class: ", label)
            print("------------------------------------------------------")

            feature_densities = self.density_parameters[label]

            row_format = '{0:<15} {1:>15} {2:>15}'
            print("1.Feature")
            param_list = ["Mean", "StdDev"]
            print(row_format.format("", *param_list))

            feature_list = []
            sorted(feature_densities)
            for feature in feature_densities:
                # only retreive mu1, mu2 feature
                if feature not in ['covariance_matrix', 'class_prior_probability']:
                    feature_list.append(feature)
                    row_format = '{0:<15} {1:>15.3f} {2:>15.3f}'
                    print(row_format.format(feature, feature_densities[feature]['mean'], feature_densities[feature]['std']))

            print()
            
            row_format = '{0:<15} {1:>15} {2:>15}'
            print("2.Covariance Matrix:")
            feature_list = ["mu1", "mu2"]
            print(row_format.format("", *feature_list))

            row_format = '{0:<15} {1:>15.3f} {2:>15.3f}'
            # only retreive covariance matrix feature
            for feature, row in zip(feature_list, feature_densities['covariance_matrix']):
                print(row_format.format(feature, *row))

    def predictFunc(self, test_x):
        # Predict the class for a given sample
        result = []

        for i in test_x:
            temp = 0
            max_probability = 0
            
            for label in self.density_parameters:
                feature_densities = self.density_parameters[label]
                # only retreive mu1, mu2 feature
                muList = []
                for feature in feature_densities:
                    if feature not in ['covariance_matrix', 'class_prior_probability']:
                        muList.append(feature_densities[feature]['mean'])
                muList = np.array(muList)

                # Calculate normal distribution PDF P(i1,i2|y)
                cov = feature_densities['covariance_matrix']
                numerator = np.exp((np.dot(np.dot((i - muList), np.linalg.inv(cov)), (i - muList).T)) / -2)
                denominator = math.sqrt(math.pow((2 * math.pi), 2) * np.linalg.det(cov))
                conditional_probability = numerator/ denominator
                
                # Estimate the posterior probability P(y|i)
                class_probability = feature_densities['class_prior_probability'] * conditional_probability

                # Record the maximum probability
                if class_probability > max_probability:
                    max_probability = class_probability
                    temp = label

            result.append(temp)

        return np.array(result)
