In [0]:
import sys
import numpy as np
import pandas as pd
from collections import Counter, defaultdict

filename = "AllElectronic.csv"

In [0]:
class NaiveBayes(object):

    
    def __init__(self, X, Y):
        self.labels = np.unique(Y)
        self.features =X.shape[1]
        self.likelihoods = self.initialize_likelihoods()
        self.class_probabilities = self.get_probability(Y)
        self.train(X, Y)


    @staticmethod
    def get_probability(outcome):
        no_of_samples = len(outcome)
        probability = dict(Counter(outcome))
        for key in probability.keys():
            probability[key] /= no_of_samples
        return probability


    @staticmethod
    def create_subset(X, Y, label):
        row_indices = np.where(Y == label)[0]
        return X[row_indices, :]


    @staticmethod
    def get_max_value_key(dictionary):
        return max(dictionary, key=dictionary.get)


    def initialize_likelihoods(self):
        return dict((label, defaultdict(list)) for label in self.labels)
    
    
    def train(self, X, Y):
        for label in self.labels:
            subset_X = self.create_subset(X, Y, label=label)
            for feature in range(self.features):
                self.likelihoods[label][feature] += list(subset_X[:, feature])
        for label in self.labels:
            for feature in range(self.features):
                self.likelihoods[label][feature] = self.get_probability(self.likelihoods[label][feature])


    def classify(self, X_test):
        prediction = {}
        for label in self.labels:
            class_probability = self.class_probabilities[label]
            for feature in range(self.features):
                relative_feature_values = self.likelihoods[label][feature]
                if X_test[feature] in relative_feature_values.keys():
                    class_probability *= relative_feature_values[X_test[feature]]
                else:
                    class_probability = 0
            prediction[label] = class_probability
        return self.get_max_value_key(prediction)

In [0]:
def import_data(filename):
    dataset = pd.read_csv(filename)
    headers = dataset.columns
    print('DATASET LENGTH\t:\t', len(dataset))
    print('DATASET SHAPE\t:\t', dataset.shape)
    return headers, dataset


def main():
    headers, dataset = import_data(filename)
    print('\n{}'.format(dataset))
    dataset = dataset.to_numpy()
    X_train, y_train = np.array(dataset[:-1, :-1]), np.array(dataset[:-1, -1])
    test_data = np.array(dataset[-1, :-1])
    print('\n{}\t:\t{}'.format(headers[-1], NaiveBayes(X_train, y_train).classify(test_data)))


if __name__ == '__main__':
    main()

DATASET LENGTH	:	 28
DATASET SHAPE	:	 (28, 5)

       age  income student credit_rating buys_computer
0    Youth    high      no          fair            no
1    Youth    high      no     excellent            no
2   middle    high      no          fair           yes
3   senior  medium      no          fair           yes
4   senior     low     yes          fair           yes
5   senior     low     yes     excellent            no
6   middle     low     yes     excellent           yes
7    Youth  medium      no          fair            no
8    Youth     low     yes          fair           yes
9   senior  medium     yes          fair           yes
10   Youth  medium     yes     excellent           yes
11  middle  medium      no     excellent           yes
12  middle    high     yes          fair           yes
13  senior  medium      no     excellent            no
14  middle    high     yes          fair           yes
15  senior  medium      no     excellent            no
16  middle     low