In [81]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
import math
from collections import Counter
import numpy as np

iris = load_iris()

x = iris.data
y = iris.target

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.1, random_state=123)

In [120]:

class NaiveBayes:
    def __init__(self):
        self.priors = {}
        self.likelihoods = {}

        self.classes_likelihoods = {}

    def build_classifier(self, train_features, train_classes, interval_count, classess_count):
        discrete_features = self.data_discretization(train_features, interval_count)
        attr_length = len(discrete_features[0])
        data_length = len(train_features)
        classes_counter = Counter(train_classes)
        # train_features_sorted = []
        for i in range(classess_count):
            self.likelihoods[i] = {}
            self.classes_likelihoods[i] = classes_counter[i] / data_length
            features_for_class = []
            for j in range(data_length):
                if train_classes[j] == i:
                    features_for_class.append(discrete_features[j])
            for j in range(attr_length):
                list = [value[j] for value in features_for_class]
                attrs_likelihoods = Counter(list)
                for k in range(attr_length):
                    try:
                        attrs_likelihoods[k] = attrs_likelihoods[k] / classes_counter[i]
                    except KeyError:
                        attrs_likelihoods[k] = 0
                self.likelihoods[i][j] = attrs_likelihoods
        # for i in range(attr_length):
        #     list = [value[i] for value in discrete_features]
        #     attr_likelihoods = Counter(list)
        #     for j in range(attr_length):
        #         attr_likelihoods[j] = attr_likelihoods[j] / data_length
        #     self.likelihoods[i] = attr_likelihoods


    @staticmethod
    def data_discretization(data, interval_count):
        discrete_data = []
        borders_lists = []
        data_length = len(data)
        attr_length = len(data[0])
        for i in range(attr_length):
            list = [value[i] for value in data]
            list.sort()
            borders = []
            for i in range(interval_count - 1):
                borders.append(list[int(data_length*(i+1)/interval_count)])
            borders_lists.append(borders)

        for record in data:
            discrete_record = []
            for i in range(attr_length):
                value = record[i]
                for j in range(interval_count - 1):
                    added = False
                    if value <= borders_lists[i][j] and not added:
                        discrete_record.append(j)
                        added = True
                        break
                    elif value > borders_lists[i][-1] and not added:
                        discrete_record.append(interval_count-1)
                        added = True
                        break
            discrete_data.append(discrete_record)

        return discrete_data



    def predict(self, sample):
        likelihoods = {}
        likelihoods_normalized = {}
        sum = 0
        for i in range(len(self.classes_likelihoods)):
            class_likelihood = self.classes_likelihoods[i]
            for j in range(len(sample)):
                # class_likelihood = class_likelihood * self.likelihoods[i][j][sample[j]]
                try:
                    class_likelihood += math.log(self.likelihoods[i][j][sample[j]])
                except ValueError:
                    class_likelihood = 0
            sum += class_likelihood
            likelihoods[i] = class_likelihood
        for i in range(len(self.classes_likelihoods)):
            likelihoods_normalized[i] = likelihoods[i] #/ sum
        return likelihoods_normalized

class GaussianNaiveBayes:
    def __init__(self):
        self.priors = {}
        self.likelihoods = {}

    def build_classifier(self, train_features, train_classes):
        pass

    @staticmethod
    def normal_dist(x, mean, std):
        pass

    def predict(self, sample):
        pass

In [123]:
# print(x_train)

naive_bayes = NaiveBayes()
# print(naive_bayes.data_discretization(x_train, 4))

naive_bayes.build_classifier(x_train, y_train, 4, 3)

print(naive_bayes.classes_likelihoods)
print(naive_bayes.likelihoods)


predictions = []
x_test_discrete = naive_bayes.data_discretization(x_test, 4)
print(x_test)
print(x_test_discrete)
for sample, gt in zip(x_test_discrete, y_test):
    likelihoods = naive_bayes.predict(sample)
    prediction = max(likelihoods, key=likelihoods.get)
    predictions.append(prediction)
    print(f'Prediction: {predictions[-1]}; Real class: {gt}')
accuracy = np.mean(predictions == y_test)
print(f"Accuracy: {accuracy}")

{0: 0.34074074074074073, 1: 0.3333333333333333, 2: 0.32592592592592595}
{0: {0: Counter({0: 0.717391304347826, 1: 0.2826086956521739, 2: 0.0, 3: 0.0}), 1: Counter({3: 0.43478260869565216, 2: 0.41304347826086957, 1: 0.13043478260869565, 0: 0.021739130434782608}), 2: Counter({0: 0.8695652173913043, 1: 0.13043478260869565, 2: 0.0, 3: 0.0}), 3: Counter({0: 0.8043478260869565, 1: 0.1956521739130435, 2: 0.0, 3: 0.0})}, 1: {0: Counter({1: 0.37777777777777777, 2: 0.3333333333333333, 3: 0.2, 0: 0.08888888888888889}), 1: Counter({0: 0.5111111111111111, 1: 0.3111111111111111, 2: 0.17777777777777778, 3: 0.0}), 2: Counter({1: 0.5111111111111111, 2: 0.4888888888888889, 0: 0.0, 3: 0.0}), 3: Counter({1: 0.5555555555555556, 2: 0.4444444444444444, 0: 0.0, 3: 0.0})}, 2: {0: Counter({3: 0.5, 2: 0.36363636363636365, 1: 0.11363636363636363, 0: 0.022727272727272728}), 1: Counter({0: 0.4090909090909091, 2: 0.29545454545454547, 1: 0.22727272727272727, 3: 0.06818181818181818}), 2: Counter({3: 0.6818181818181818