# Naive Bayes Classifier Example Dataset : Iris
## Import Necesary Modules

In [1]:
import os
os.chdir(r"C:\Users\tahsi\OneDrive - University of Eastern Finland\Python Algorithm and Data Structure\GitHub\ml-algorithms-python-numpy\1. Codes")
import numpy as np
import random

In [2]:
import load_data_to_matrix, split_train_test, int_label_to_str_label, performance_measurement, k_means_algorithm, distance_measurement

# Load the Dataset

In [3]:
from load_data_to_matrix import load_text_data, map_label
Iris, d_map = load_text_data(file = "Iris.txt", delimiter = ",", header = False , label_col_is_str = True , label_col = 4)

In [4]:
# first 5 rows of the data. The 5th column was the label. The label was string. The function mapped it to integer. The map of label
# is stored in the d_map
print(Iris[0:5 ,:])

[[5.1 3.5 1.4 0.2 0. ]
 [4.9 3.  1.4 0.2 0. ]
 [4.7 3.2 1.3 0.2 0. ]
 [4.6 3.1 1.5 0.2 0. ]
 [5.  3.6 1.4 0.2 0. ]]


In [5]:
print(d_map)

{'Iris-setosa': 0, 'Iris-versicolor': 1, 'Iris-virginica': 2}


# Split the Data to Train and Test

In [6]:
from split_train_test import split_train_test

In [7]:
train_test_data = split_train_test(data = Iris, percentage = 20, label = 4)

train_data_feature = train_test_data.train_feature
train_data_label = train_test_data.train_label
test_feature = np.asarray(train_test_data.test_feature) 
true_label = np.asarray(train_test_data.test_label)
train_data_feature_label = np.asarray(train_test_data.train)

# Training the Model

In [8]:
from naive_bayes_classifier import *

In [9]:
train_data = train_data_feature_label
label = 4
mean, var, prior_prob, class_labels = train_naive_bayes(train_data, label)

# Testing the Model

In [10]:
predicted_label = naive_bayes_classification(test_feature, mean, var, prior_prob, class_labels)

# Performance Measurement

In [11]:
from performance_measurement import confusion_matrix

In [12]:
acuraccy, con_table = confusion_matrix(true_label = true_label, pred_label = predicted_label, percentage = True)

In [13]:
print(acuraccy)

91.67


In [14]:
print(con_table)

[[30.83  0.    0.  ]
 [ 0.   30.83  7.5 ]
 [ 0.    0.83 30.  ]]


# Code of the Naive Bayes Classifier

In [15]:
# Training of the model.

def train_naive_bayes(train_data, label):
    
    """
    Input:
    1. train_data: the train data with the label column. The data must be in 2d array(not matrix).
    2. label: the column index of the label in the train_data
    
    Output:
    1. mean: the mean vector of the features in dictionary.
    2. var: the variance vector of the features in dictionary.
    3. prior_prob : the prior probability of each of the labels dictionary.
    4. class_label : label of the classes.
    
    In output dictionary the keys are the label.
    """

    import numpy as np

    # Storing mean and variances of each of the features of each of the classes in dictionary.
    
    mean = {}
    var  = {}
    prior_prob = {}
    
    

    N = train_data.shape[0]
    class_labels = np.unique(train_data[:, label])
    for i in class_labels:
        X = train_data[train_data[:, label] == i, :] # make seperate matrix for each of the classes.
        X_features = np.delete(X, label, axis = 1)       # delete the class column and take only the features.
        mean[i] = X_features.mean(0)                 # means of the features
        var[i]  = X_features.var(0)                  # variances of the features
        prior_prob[i] = X_features.shape[0]/N
        
    return mean, var, prior_prob, class_labels
    



# Tesing the model

def naive_bayes_classification(test_feature, mean, var, prior_prob, class_labels):
    
    """
    Input:
    1. test_feature: the test data in 2d array.
    2. mean : means of each of the class in dictionary. [returned from the training step]
    3. var  : variance of each of the class in dictionary.[returned from the training step]
    4. prior_prob: prior probariliy of each of the class in dictionary.[returned from the training step]
    5. class_labels : the class labels of the training data.[returned from the training step]
    
        
    Output:
    1. Predicted Label in a list.
    """
    
    import math
    import numpy as np
    
    
    pred_label = []
    
    
    for i in range(len(test_feature)):
        x = test_feature[i]

        class_compare = np.empty([len(class_labels), 3])
        class_compare[:] = np.nan
        class_compare[:, 0] = class_labels

        bayes_numerator_all_class = []

        bayes_numerator_all_class = [bayes_numerator(x, mu = mean[i], variance = var[i], prior = prior_prob[i]) for i in class_labels]

        normalizing_factor = sum(bayes_numerator_all_class)

        posterior_porb = [bayes_numerator_all_class[i]/ normalizing_factor for i in range(len(bayes_numerator_all_class))]

        class_compare[:, 1] = bayes_numerator_all_class
        class_compare[:, 2] = posterior_porb
        maximum_posterior =  max(class_compare[:, 2])
        c = class_compare[class_compare[:,2] == maximum_posterior,][0][0]

        pred_label.append(c)
        
        
    return pred_label
    
    


# Testing
# The likelihood function.
def normal_likelihood(x, mu, var):
    import math
    
    p1 = 1/ math.sqrt(var * 2 * math.pi)
    p2 = math.exp(-0.5 * pow((x - mu), 2)/var)
    
    return (p1*p2)
    

# The Bayes Numerator
def bayes_numerator(x, mu, variance, prior):
    import numpy as np
    likelihood = [normal_likelihood(x[i], mu[i], variance[i]) for i in range(len(x))]
    bayes_numerator = np.prod(likelihood) * prior
    
    return bayes_numerator
   
