In [71]:
class NaiveBayes(object):
    
    def __init__(self):
        self.summaries = None
    
    def separate_by_class(self, dataset):
        """
        This method allows us to separate the training data by classes.
        A dictionary object maps each class value to list of all records
        """
        
        # initializes dict to map class values to a 
        separated = dict() 
        
        # maps individual class values to a dataframe with matching class values
        """for class_value in dataset.iloc[:, -1].unique():
            separated[class_value] = dataset.loc[ dataset.iloc[:, -1] == class_value]"""
        
        for i in range(len(dataset)):
            vector = dataset[i]
            class_value = vector[-1]
            if class_value not in separated:
                separated[class_value] = list()
            separated[class_value].append(vector)
                
        return separated
    
   
    def summarize_dataset(self, dataset):
        """
        This method returns the important statistics for each column
        in the dataset. These are mean, std and count. 
        """
        def _mean(numbers):
            return sum(numbers) / float(len(numbers))
        def _stdev(numbers):
            from math import sqrt
            avg = _mean(numbers)
            variance = sum([(x-avg)**2 for x in numbers]) / float(len(numbers) - 1)
            return sqrt(variance)
        
        # uses list comprehension to store tuples of important statistics for each column
        # last column is class column and is not needed
        summaries = [(_mean(column), _stdev(column), 
                      len(column)) for column in zip(*dataset)]
        del(summaries[-1])
        
        return summaries
    
    
    def fit(self, dataset):
        """
        This method splits the dataset by class and calculates statistics for each
        """
        # initializes a dict to map class values to a summary of their matching dataframes
        summaries = dict()
        
        # separates dataset by class values 
        separated = self.separate_by_class(dataset)
        
        # calculates columns' statistics for each class value
        for class_value, rows in separated.items():
            summaries[class_value] = self.summarize_dataset(rows)
            
        self.summaries = summaries    
   
    def calculate_probability(self, x, mean, stdev):
        """
        This method calculates the Gaussian probability distribution function for x
        """
        from math import exp
        from math import pi
        from math import sqrt 
        
        # calculates exponent first for simplification
        exponent = exp(-((x - mean) ** 2 / (2 * (stdev ** 2))))
        return (1 / ( sqrt(2 * pi) * stdev)) * exponent
    
    
    def calculate_class_probabilities(self, summaries, row):
        """
        This method calculates the probabilites of each class for a given row
        """
        
        # finds total number of rows in entire dataset
        total_rows = sum([summaries[label][0][2] for label in summaries])
        
        # initializes a dictionary to hold probabilities
        probabilities = dict()
        
        # calculates probabilities 
        for class_value, class_summaries in summaries.items():
            probabilities[class_value] = summaries[class_value][0][2] / float(total_rows)
            for i in range(len(class_summaries)):
                mean, stdev, _ = class_summaries[i]
                probabilities[class_value] *= self.calculate_probability(row[i], mean, stdev)
        return probabilities
    
    def _pred(self, summaries, row):
            """
            This method predicts class for a given row
            """
            probabilities = self.calculate_class_probabilities(self.summaries, row)
            best_label, best_prob = None, -1
            for class_value, probability in probabilities.items():
                if best_label is None or probability > best_prob:
                    best_prob = probability
                    best_label = class_value
            return best_label
        
    def predict(self, test):
        """
        This method predicts class for a whole dataset
        """
        
        predictions = list()
        for row in test:
            output = self._pred(self.summaries, row)
            predictions.append(output)
            
        return(predictions)

In [25]:
dataset = [[3.393533211,2.331273381,0],
	[3.110073483,1.781539638,0],
	[1.343808831,3.368360954,0],
	[3.582294042,4.67917911,0],
	[2.280362439,2.866990263,0],
	[7.423436942,4.696522875,1],
	[5.745051997,3.533989803,1],
	[9.172168622,2.511101045,1],
	[7.792783481,3.424088941,1],
	[7.939820817,0.791637231,1]]

In [74]:
dataset1 = [[3.393533211,2.331273381,0],
	[3.110073483,1.781539638,0],
	[1.343808831,3.368360954,0],
    [3.582294042,4.67917911, 0],
    [7.423436942,4.696522875,1],
	[5.745051997,3.533989803,1],
	[9.172168622,2.511101045,1],
    [7.939820817,0.791637231,1]]

dataset2 = [[2.280362439,2.866990263],
    [7.792783481,3.424088941]]

In [75]:
model = NaiveBayes()

model.fit(dataset1)

In [76]:
model.predict(dataset2)

[0, 1]

https://machinelearningmastery.com/naive-bayes-classifier-scratch-python/