# Gaussian(Multivariate Normal) Bayes Classifier with Cost Function Example Dataset : Iris
## Import necessary modules

In [1]:
import os
os.chdir(r"C:\Users\tahsi\OneDrive - University of Eastern Finland\Python Algorithm and Data Structure\GitHub\ml-algorithms-python-numpy\1. Codes")
import numpy as np
import random

import load_data_to_matrix, split_train_test, int_label_to_str_label, performance_measurement, k_means_algorithm, distance_measurement

# Load Data

In [2]:
from load_data_to_matrix import load_text_data, map_label
Iris, d_map = load_text_data(file = "Iris.txt", delimiter = ",", header = False , label_col_is_str = True , label_col = 4)

In [3]:
# first 5 rows of the data. The 5th column was the label. The label was string. The function mapped it to integer. The map of label
# is stored in the d_map
print(Iris[0:5 ,:])

[[5.1 3.5 1.4 0.2 0. ]
 [4.9 3.  1.4 0.2 0. ]
 [4.7 3.2 1.3 0.2 0. ]
 [4.6 3.1 1.5 0.2 0. ]
 [5.  3.6 1.4 0.2 0. ]]


In [4]:
print(d_map)

{'Iris-setosa': 0, 'Iris-versicolor': 1, 'Iris-virginica': 2}


# Split the Data to Train and Test

In [5]:
from split_train_test import split_train_test

In [6]:
train_test_data = split_train_test(data = Iris, percentage = 50, label = 4)

train_data_feature = train_test_data.train_feature
train_data_label = train_test_data.train_label
test_feature = np.asarray(train_test_data.test_feature) 
true_label = np.asarray(train_test_data.test_label)
train_data_feature_label = np.asarray(train_test_data.train)

# Training the Model

In [7]:
from gaussian_bayes_classifier_with_cost_function import *

In [8]:
train_data = train_data_feature_label
label = 4
mean, covariance, prior_prob, class_labels = train_naive_bayes(train_data, label)

# Cost Function

In [9]:
# here we generated a random cost function. Replace it with desired cost function
k = len(class_labels)
cost_function = np.random.rand(k , k)
np.fill_diagonal(cost_function, 0)
print(cost_function)

[[0.         0.14544777 0.73474227]
 [0.83772352 0.         0.03412607]
 [0.99875251 0.16591533 0.        ]]


# Testing the Model

In [10]:
predicted_label = gaussian_bayes_classification_with_cost(test_feature , mean, covariance, prior_prob, class_labels, cost_function)

In [11]:
print(predicted_label)

[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 1.0, 1.0, 2.0, 1.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 1.0]


# Codes of the Gaussian(Multivariate Normal) Bayes Classifier

In [12]:
# Training of the model.

def train_naive_bayes(train_data, label):
    
    """
    Input:
    1. train_data: the train data with the label column. The data must be in 2d array(not matrix).
    2. label: the column index of the label in the train_data.
    
    Output:
    1. mean: the mean vector of the features in dictionary.
    2. covariance: the covariance of the features in dictionary.
    3. prior_prob : the prior probability of each of the labels dictionary.
    4. class_label : label of the classes.
    
    In output dictionary the keys are the label.
    """

    import numpy as np

    # Storing mean and variances of each of the features of each of the classes in dictionary.
    
    # Train Model.

    # Storing mean and variances of each of the features of each of the classes in dictionary.
    mean = {}
    covariance  = {}
    prior_prob = {}


    
    N = train_data.shape[0]
    class_labels = np.unique(train_data[:, label])
    for i in class_labels:
        X = train_data[train_data[:, label] == i, :] # make seperate matrix for each of the classes.
        X_features = np.delete(X, label, axis = 1)       # delete the class column and take only the features.
        mean[i] = X_features.mean(0)                 # means of the features
        covariance[i]  = np.cov(X_features.T)        # covariances of the features
        prior_prob[i] = X_features.shape[0]/N


            
    return mean, covariance, prior_prob, class_labels
    


# Testing

# Normal Likelihood

def multivariate_gaussian_likelihood(x, mu, sigma):
    
    import numpy as np
    import math
    
    p = len(x)
    
    p1 =  1 / (pow(2 * (math.pi), p/2) * np.linalg.det(sigma))

    X = np.asmatrix(x - mu)
    Y = np.asmatrix(np.linalg.inv(sigma))
    p2 = math.exp(-0.5 * np.dot(np.dot(X,Y), X.T))

    return p1*p2

    

    

# Bayes Numerator

def bayes_numerator(likelihood, prior):
    return likelihood * prior




def gaussian_bayes_classification_with_cost(test_feature, mean, covariance, prior_prob, class_labels, cost_function):
    """
    Input:
    1. test_feature: the test features in 2D array(not matrix).
    2. mean: the mean vector of the features in dictionary.(from the training step)
    3. covariance: the covariance of the features in dictionary.(from the training step)
    4. prior_prob : the prior probability of each of the labels dictionary.(from the training step)
    5. cost_function: a 2d array associated with corresponding cost of misclassification.

    Output:
    The predicted label in list.
    """

    import numpy as np

    pred_label = []

    # Test the data
    for i in range(len(test_feature)):
        x = test_feature[i]

        class_compare = np.empty([len(class_labels), 4])
        class_compare[:] = np.nan
        class_compare[:, 0] = class_labels

        bayes_numerator_all_class = [multivariate_gaussian_likelihood(x, mean[i], covariance[i]) for i in class_labels]

        normalizing_factor = sum(bayes_numerator_all_class)

        posterior_porb = [bayes_numerator_all_class[i]/ normalizing_factor for i in range(len(bayes_numerator_all_class))]

        class_compare[:, 1] = bayes_numerator_all_class
        class_compare[:, 2] = posterior_porb

        risk_function = np.dot(cost_function, posterior_porb)
    
        class_compare[:, 3] = risk_function  
        

        
        minimum_cost      =  min(class_compare[:, 3])
        
        
        

        c = class_compare[class_compare[:,3] == minimum_cost,][0][0]

        pred_label.append(c)
    
    return pred_label