### IRIS CLASSIFIER USING GAUSIAN NAIVE BAYES

In [6]:
# load libraries
import numpy as np

# load dataset
data = []
with open('iris.data.txt', 'r') as file:
    for line in file.readlines():
        data.append(line.rstrip().split(','))
    data = np.array(data)
print(data)

[['5.1' '3.5' '1.4' '0.2' 'Iris-setosa']
 ['4.9' '3.0' '1.4' '0.2' 'Iris-setosa']
 ['4.7' '3.2' '1.3' '0.2' 'Iris-setosa']
 ['4.6' '3.1' '1.5' '0.2' 'Iris-setosa']
 ['5.0' '3.6' '1.4' '0.2' 'Iris-setosa']
 ['5.4' '3.9' '1.7' '0.4' 'Iris-setosa']
 ['4.6' '3.4' '1.4' '0.3' 'Iris-setosa']
 ['5.0' '3.4' '1.5' '0.2' 'Iris-setosa']
 ['4.4' '2.9' '1.4' '0.2' 'Iris-setosa']
 ['4.9' '3.1' '1.5' '0.1' 'Iris-setosa']
 ['5.4' '3.7' '1.5' '0.2' 'Iris-setosa']
 ['4.8' '3.4' '1.6' '0.2' 'Iris-setosa']
 ['4.8' '3.0' '1.4' '0.1' 'Iris-setosa']
 ['4.3' '3.0' '1.1' '0.1' 'Iris-setosa']
 ['5.8' '4.0' '1.2' '0.2' 'Iris-setosa']
 ['5.7' '4.4' '1.5' '0.4' 'Iris-setosa']
 ['5.4' '3.9' '1.3' '0.4' 'Iris-setosa']
 ['5.1' '3.5' '1.4' '0.3' 'Iris-setosa']
 ['5.7' '3.8' '1.7' '0.3' 'Iris-setosa']
 ['5.1' '3.8' '1.5' '0.3' 'Iris-setosa']
 ['5.4' '3.4' '1.7' '0.2' 'Iris-setosa']
 ['5.1' '3.7' '1.5' '0.4' 'Iris-setosa']
 ['4.6' '3.6' '1.0' '0.2' 'Iris-setosa']
 ['5.1' '3.3' '1.7' '0.5' 'Iris-setosa']
 ['4.8' '3.4' '1

In [7]:
# split into input and output elements
X = data[:, :-1].astype(float)
y = data[:, -1].astype(str)

#### Training the model
In the following section, we will train a Gaussian Naive Bayes model on the training dataset and make predictions for the given events. To handle data, we log all the probabilities of each class and feature since all the features yield small values and may raise underflow issues.

In [16]:
# conpute mean and variance of each feature of each class
def mean_and_var(data):
    mean = []
    variance = []
    for i in range(len(data[0])):
        mean.append(np.mean(data[:, i]))
        variance.append(np.var(data[:, i]))
    return mean, variance


# train gaussian naive bayes model
def train_gaussian_nb(X, y):
    y_unique = np.unique(y)
    means = {i: [] for i in range(len(y_unique))}
    variances = {i: [] for i in range(len(y_unique))}
    priors = {i: [] for i in range(len(y_unique))}
    
    for i in range(len(y_unique)):
        X_i = X[y == y_unique[i]]
        mean, var = mean_and_var(X_i)
        means[i] = mean
        variances[i] = var
        priors[i] = np.log(len(X_i) / len(y))
    
    return means, variances, priors
        

# compute gaussian distribution function
def gauss_dist(x, mean, var):
    coef = (1 / np.sqrt(2 * np.pi * var))
    exp = - 1 / 2 * ((x - mean) / (np.sqrt(var))) ** 2
    return np.log(coef) + exp


# predict the class of a new sample
def predict_gaussian_nb(X, means, variances, priors):
    n = len(X)
    m = len(means)
    y_pred = []
    for i in range(n):
        posteriors = np.zeros(m)
        for j in range(m):
            likelihood = 0
            for k in range(len(X[i])):
                likelihood += gauss_dist(X[i][k], means[j][k], variances[j][k])
            posteriors[j] = priors[j] + likelihood
        y_pred.append(np.argmax(posteriors))
    return y_pred


def prediction_iris(X, y, new_event):
    means, variances, priors = train_gaussian_nb(X, y)
    y_pred  = predict_gaussian_nb(new_event, means, variances, priors)
    y_unique = np.unique(y)
    return y_unique[y_pred][0]

#### Prediction
After trained, the model now can be used to predict the class of a new sample. Here are some examples.

In [17]:
# Sample 1
new_event = np.array([[6.3, 3.3, 6.0, 2.5]])
pred = prediction_iris(X, y, new_event)
print('Predicted Class: ', pred)

Predicted Class:  Iris-virginica


In [18]:
# Sample 2
new_event = np.array([[5.0, 2.0, 3.5, 1.0]])
pred = prediction_iris(X, y, new_event)
print('Predicted Class: ', pred)

Predicted Class:  Iris-versicolor


In [19]:
# Sample 3
new_event = np.array([[4.9, 3.1, 1.5, 0.1]])
pred = prediction_iris(X, y, new_event)
print('Predicted Class: ', pred)

Predicted Class:  Iris-setosa
