###### U20EC147
# Experiment 3
### Aim : Design a Naive Bayes classifier and train it on iris dataset


In [1]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
import numpy as np
import pandas as pd

In [2]:
iris = load_iris()

In [3]:
df = pd.DataFrame(data=iris.data,
                  columns=iris.feature_names)
df.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


In [4]:
y = iris.target
y.shape

(150,)

In [5]:
x = iris.data
x.shape

(150, 4)

In [6]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=3)

In [7]:
class_data = {}
for i in y_train:
    class_data[i] = x_train[y_train==i]

In [8]:
class_stats = {}
for class_label, class_values in class_data.items():
    class_stats[class_label] = {
        'mean': np.mean(class_values, axis=0),
        'std': np.std(class_values, axis=0)
    }

In [9]:
class_stats

{0: {'mean': array([5.03  , 3.4325, 1.465 , 0.2375]),
  'std': array([0.31796226, 0.36358458, 0.18103867, 0.10650704])},
 2: {'mean': array([6.5525, 2.9875, 5.5325, 2.01  ]),
  'std': array([0.62248996, 0.32108215, 0.53403535, 0.25475478])},
 1: {'mean': array([5.93  , 2.7875, 4.2675, 1.335 ]),
  'std': array([0.48487112, 0.31637596, 0.46711214, 0.20068632])}}

In [10]:
def gaussian_pdf (x, mean, std):
    exponent = np.exp(-((x - mean)**2 / (2*std**2)))
    return (1 / (std * np.sqrt(2*np.pi))) * exponent

In [11]:
def predict(sample, class_stats):
    best_class = None
    best_prob = -1
    for class_label, class_info in class_stats.items():
        class_mean = class_info['mean']
        class_std = class_info['std']
        likelihoods = gaussian_pdf(sample, class_mean, class_std)
        if np.prod(likelihoods) > best_prob:
            best_prob = np.prod(likelihoods)
            best_class = class_label
    return best_class

In [12]:
y_pred = np.array([predict(sample, class_stats) for sample in x_test])

In [13]:
y_pred

array([0, 0, 0, 0, 0, 2, 1, 0, 2, 1, 1, 0, 1, 1, 2, 0, 1, 2, 2, 0, 2, 2,
       2, 1, 0, 2, 1, 1, 1, 1])

In [14]:
y_test

array([0, 0, 0, 0, 0, 2, 1, 0, 2, 1, 1, 0, 1, 1, 2, 0, 1, 2, 2, 0, 2, 2,
       2, 1, 0, 2, 2, 1, 1, 1])

In [15]:
cm = confusion_matrix(y_test,y_pred)
cm

array([[10,  0,  0],
       [ 0, 10,  0],
       [ 0,  1,  9]])

In [16]:
accuracy = (cm[0,0] + cm[1,1] + cm[2,2]) / np.sum(cm) * 100
accuracy

96.66666666666667