In [0]:
import sys
import numpy as np
import pandas as pd
from collections import Counter, defaultdict

datast = "DATASET.xlsx"

In [0]:
class NaiveBayes(object):

    
    def __init__(self, X, Y):
        self.labels = np.unique(Y)
        self.features =X.shape[1]
        self.likelihoods = self.initialize_likelihoods()
        self.class_probabilities = self.get_probability(Y)
        self.train(X, Y)


    @staticmethod
    def get_probability(outcome):
        no_of_samples = len(outcome)
        probability = dict(Counter(outcome))
        for key in probability.keys():
            probability[key] /= no_of_samples
        return probability


    @staticmethod
    def create_subset(X, Y, label):
        row_indices = np.where(Y == label)[0]
        return X[row_indices, :]


    @staticmethod
    def get_max_value_key(dictionary):
        return max(dictionary, key=dictionary.get)


    def initialize_likelihoods(self):
        return dict((label, defaultdict(list)) for label in self.labels)
    
    
    def train(self, X, Y):
        for label in self.labels:
            subset_X = self.create_subset(X, Y, label=label)
            for feature in range(self.features):
                self.likelihoods[label][feature] += list(subset_X[:, feature])
        for label in self.labels:
            for feature in range(self.features):
                self.likelihoods[label][feature] = self.get_probability(self.likelihoods[label][feature])


    def classify(self, X_test):
        prediction = {}
        for label in self.labels:
            class_probability = self.class_probabilities[label]
            for feature in range(self.features):
                relative_feature_values = self.likelihoods[label][feature]
                if X_test[feature] in relative_feature_values.keys():
                    class_probability *= relative_feature_values[X_test[feature]]
                else:
                    class_probability = 0
            prediction[label] = class_probability
        return self.get_max_value_key(prediction)

In [7]:
def import_data(datast):
    dataset_xlsx = pd.read_excel(datast, 'Sheet1', index_col=None)
    dataset_xlsx.to_csv('dataset.csv', encoding='utf-8')
    dataset = pd.read_csv('dataset.csv')
    headers = dataset.columns
    print('DATASET LENGTH\t:\t', len(dataset))
    print('DATASET SHAPE\t:\t', dataset.shape)
    return headers, dataset


def main():
    headers, dataset = import_data(datast)
    print('\n{}'.format(dataset))
    dataset = dataset.to_numpy()
    X_train, y_train = np.array(dataset[:-1, :-1]), np.array(dataset[:-1, -1])
    test_data = np.array(dataset[-1, :-1])
    print('\n{}\t:\t{}'.format(headers[-1], NaiveBayes(X_train, y_train).classify(test_data)))


if __name__ == '__main__':
    main()

DATASET LENGTH	:	 12
DATASET SHAPE	:	 (12, 13)

    Unnamed: 0 Example ALT Bar Fri  ... Rain Res     Type        Est Target Wait
0            0      X1   T   F   F  ...    F   T   French    0 to 10           T
1            1      X2   T   F   F  ...    F   F     Thai   30 to 60           F
2            2      X3   F   T   F  ...    F   F   Burger    0 to 10           T
3            3      X4   T   F   T  ...    F   F     Thai  10 to  30           T
4            4      X5   T   F   T  ...    F   T   French       > 60           F
5            5      X6   F   T   F  ...    T   T  Italian    0 to 10           T
6            6      X7   F   T   F  ...    T   F   Burger    0 to 10           F
7            7      X8   F   F   F  ...    T   T     Thai    0 to 10           T
8            8      X9   F   T   T  ...    T   F   Burger        >60           F
9            9     X10   T   T   T  ...    F   T  Italian   10 to 30           F
10          10     X11   F   F   F  ...    F   F     Thai    