# Imports

In [1]:
import numpy as np
import pandas as pd
import math

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix 
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score

#  Requirements in Naive Bayesian Classifier:

    1: Separate Dataset By Class.
    2: calculate mean and std_div of given column
    3: calculate mean and std_div of each column(feature) based on which class they belong in detaset.
    4: Gaussian Probability Density calculation for each attribute in a tupple (as we will use continuous data).
    5: calculate Class Probabilities and select class with max probability as predicted class.

# Using public Dataset (continuous data)
        
        Dataset used : Iris
        Description :
                     150 rows / examples 
                     5 coloumn (4 features , 1 class label)
                     feature: Sepal-Length	Sepal-Width	Petal-Length	Petal-Width
                     3 classes : Iris-setosa , iris-versicolor , iris-virginica

In [2]:
iris = pd.read_csv('Iris.csv')
iris.head()

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,1,5.1,3.5,1.4,0.2,Iris-setosa
1,2,4.9,3.0,1.4,0.2,Iris-setosa
2,3,4.7,3.2,1.3,0.2,Iris-setosa
3,4,4.6,3.1,1.5,0.2,Iris-setosa
4,5,5.0,3.6,1.4,0.2,Iris-setosa


In [3]:
species = {'Iris-setosa': 0,'Iris-versicolor': 1,'Iris-virginica' : 2}
iris.Species = [species[item] for item in iris.Species]
iris.head()

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,1,5.1,3.5,1.4,0.2,0
1,2,4.9,3.0,1.4,0.2,0
2,3,4.7,3.2,1.3,0.2,0
3,4,4.6,3.1,1.5,0.2,0
4,5,5.0,3.6,1.4,0.2,0


In [4]:
iris = iris[['SepalLengthCm','SepalWidthCm','PetalLengthCm','PetalWidthCm','Species']] # dropping the id col as not needed
iris.head()

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


In [5]:
iris.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
SepalLengthCm    150 non-null float64
SepalWidthCm     150 non-null float64
PetalLengthCm    150 non-null float64
PetalWidthCm     150 non-null float64
Species          150 non-null int64
dtypes: float64(4), int64(1)
memory usage: 5.9 KB


In [6]:
iris.describe()  # just to comapre the results we will get below

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
count,150.0,150.0,150.0,150.0,150.0
mean,5.843333,3.054,3.758667,1.198667,1.0
std,0.828066,0.433594,1.76442,0.763161,0.819232
min,4.3,2.0,1.0,0.1,0.0
25%,5.1,2.8,1.6,0.3,0.0
50%,5.8,3.0,4.35,1.3,1.0
75%,6.4,3.3,5.1,1.8,2.0
max,7.9,4.4,6.9,2.5,2.0


In [7]:
d = iris.groupby('Species').count()
d

Unnamed: 0_level_0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm
Species,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,50,50,50,50
1,50,50,50,50
2,50,50,50,50


# converting the Dataframe into Numpy array:

    ( easier and simpler for calculations )

In [8]:
Dataset = np.array(iris)
Dataset[0:5]

array([[5.1, 3.5, 1.4, 0.2, 0. ],
       [4.9, 3. , 1.4, 0.2, 0. ],
       [4.7, 3.2, 1.3, 0.2, 0. ],
       [4.6, 3.1, 1.5, 0.2, 0. ],
       [5. , 3.6, 1.4, 0.2, 0. ]])

# Requirement 1:

# Function Seperate the dataset according to their label and store in a dictionary

In [9]:
def DivideByLabel(dataset): # this functions requires the dataset to contain class labels
    Label_divided_data = dict()
    d = len(dataset[1]) # used later to exclude the lables
    for i in range(len(dataset)):
        row = dataset[i]
        label = row[-1]#last column of row has label
        if (label not in Label_divided_data):
            Label_divided_data[label] = list()# for each new label create a list, the label name becomes key and the list is the item 
        Label_divided_data[label].append(row[0:d-1]) # put each row in a list, whose key matches its label (exclude last column i.e class label)
    return Label_divided_data

# Testing Function DivideByLabel

In [10]:
Label_divided_data = DivideByLabel(Dataset)

In [11]:
print(len(Label_divided_data[0])) # get no of items for key value '0'
print(len(Label_divided_data[1])) # get no of items for key value '1'
print(len(Label_divided_data[2])) # get no of items for key value '2'

50
50
50


In [12]:
for label in Label_divided_data:
    print(label)
    for rows in Label_divided_data[label]:
        print(rows)
    print("")

0.0
[5.1 3.5 1.4 0.2]
[4.9 3.  1.4 0.2]
[4.7 3.2 1.3 0.2]
[4.6 3.1 1.5 0.2]
[5.  3.6 1.4 0.2]
[5.4 3.9 1.7 0.4]
[4.6 3.4 1.4 0.3]
[5.  3.4 1.5 0.2]
[4.4 2.9 1.4 0.2]
[4.9 3.1 1.5 0.1]
[5.4 3.7 1.5 0.2]
[4.8 3.4 1.6 0.2]
[4.8 3.  1.4 0.1]
[4.3 3.  1.1 0.1]
[5.8 4.  1.2 0.2]
[5.7 4.4 1.5 0.4]
[5.4 3.9 1.3 0.4]
[5.1 3.5 1.4 0.3]
[5.7 3.8 1.7 0.3]
[5.1 3.8 1.5 0.3]
[5.4 3.4 1.7 0.2]
[5.1 3.7 1.5 0.4]
[4.6 3.6 1.  0.2]
[5.1 3.3 1.7 0.5]
[4.8 3.4 1.9 0.2]
[5.  3.  1.6 0.2]
[5.  3.4 1.6 0.4]
[5.2 3.5 1.5 0.2]
[5.2 3.4 1.4 0.2]
[4.7 3.2 1.6 0.2]
[4.8 3.1 1.6 0.2]
[5.4 3.4 1.5 0.4]
[5.2 4.1 1.5 0.1]
[5.5 4.2 1.4 0.2]
[4.9 3.1 1.5 0.1]
[5.  3.2 1.2 0.2]
[5.5 3.5 1.3 0.2]
[4.9 3.1 1.5 0.1]
[4.4 3.  1.3 0.2]
[5.1 3.4 1.5 0.2]
[5.  3.5 1.3 0.3]
[4.5 2.3 1.3 0.3]
[4.4 3.2 1.3 0.2]
[5.  3.5 1.6 0.6]
[5.1 3.8 1.9 0.4]
[4.8 3.  1.4 0.3]
[5.1 3.8 1.6 0.2]
[4.6 3.2 1.4 0.2]
[5.3 3.7 1.5 0.2]
[5.  3.3 1.4 0.2]

1.0
[7.  3.2 4.7 1.4]
[6.4 3.2 4.5 1.5]
[6.9 3.1 4.9 1.5]
[5.5 2.3 4.  1.3]
[6.5 2.8 4.6 1.5]
[

# Requirement 2 :Calculation of Mean and Std_div for given column

     calculate the mean and std dev for each attribute/feature (column)

In [13]:
def cal_Mean(column):
    return sum(column)/len(column)

In [14]:
#Testing with Column 1
mean_col1 = cal_Mean(Dataset[:,0]) #[:,0] ':' all rows ; 0 : column 0(first column in dataset)
mean_col1                          #(could be verified form cell above 'iris.describe()')

5.843333333333335

 # std deviation formula:
       Std div = square root of varience
        varience = calculate the Mean of all numbers
                   Then for each number: subtract the Mean and square the Difference
                   Then add all the squared differences and divide by total numbers -1(mean of the Squared difference)
                   note :the std_div formula i saw had N-1 hence i am implementing according to formula

In [15]:
def cal_StdDiv(column):
    mean = cal_Mean(column)
    sum_sq_diff = 0
    for number in column:
        diff = number - mean
        sum_sq_diff += pow(diff,2)
    
    varience = sum_sq_diff/(len(column)-1)
    std_div = math.sqrt(varience)
    
    return std_div

In [16]:
#Testing with column 2
std_col2 = cal_StdDiv(Dataset[:,1]) #std_div for 2nd column in dataset(could be verified form cell above 'iris.describe()')
std_col2

0.4335943113621737

# Function to calculate Mean and std_deviation of a each column in dataset

        calculate mean and std_div for all columns excpect the last column (last column is class label)

In [17]:
def cal_Mean_StdDiv_Dataset(data): # here the 'data' must only contains features ; label is excluded (must be np array)
    d = len(data[1]) # get number of columns ; note col no. starts from 0; so for 4 cols: 0,1,2,3 ; len(data[1]) returns 4
    data = data[:,0:d]  # ':' means take all rows , 0:d means (d = 4); so from 0 to 4 (include col 0 exclude col 4)
    
    mean_stddiv = []
    for col in range (len(data[1])):
        mean_col = cal_Mean(data[:,col])
        std_div_col = cal_StdDiv(data[:,col])
        sublist = (mean_col,std_div_col)
        mean_stddiv.append(sublist)
        
    return mean_stddiv #return a list containing mean , std_div of each feature /column in the given dataset

# Prepare Data for Testing above Function

    as our previous data : 'Dataset' contains class label we cannot use it here 
    as the function cal_Mean_StdDiv_Dataset expects a dataset that contains only the features a parameter

In [18]:
no_of_features = len(Dataset[1])-1 # no of total columns -1
Dataset_features=Dataset[:,0:no_of_features]
Dataset_features[0:5]

array([[5.1, 3.5, 1.4, 0.2],
       [4.9, 3. , 1.4, 0.2],
       [4.7, 3.2, 1.3, 0.2],
       [4.6, 3.1, 1.5, 0.2],
       [5. , 3.6, 1.4, 0.2]])

In [19]:
Dataset_label = Dataset[:,-1] # ':' means all rows , -1 = means last column
Dataset_label

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 2., 2.,
       2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2.,
       2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2.,
       2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2.])

In [20]:
Mean_Std_dataset = cal_Mean_StdDiv_Dataset(Dataset_features)
Mean_Std_dataset # this is a list , whose elements are the mean and std deviation of each column in the dataset
#can be verified using the results of iris.describe() in above cell

[(5.843333333333335, 0.8280661279778629),
 (3.0540000000000007, 0.4335943113621737),
 (3.7586666666666693, 1.7644204199522617),
 (1.1986666666666672, 0.7631607417008414)]

# Requirement 3:

# Calculation of Mean and std_deviation for each column in dataset based on class_labels

        here we are going to use The dataset that contains Class_labels = 'Dataset'
        using the DivideByLabel function we will divide/ group the data besed on their labels
        then using the function cal_Mean_StdDiv_Dataset , for each class label we take the training examples and 
        calculate the mean and std_deviation of each column
        
        finally a dictionary is retured
            The keys represent class label
            The items Represents the mean and std deviation of each column(features) for that class label

In [21]:
def cal_Mean_StdDiv_by_class(data): # the function expects the parameter to be a dataset containg class_labels
    divided_dataset = DivideByLabel(data)
    mean_std_byLabel = dict() # will contain; keys = class label and each key contains a list of the mean stddiv and no of example in that class
    for label, rows in divided_dataset.items(): #shortcut to itertate a dictionary; label: key ; rows : the list of example 
        mean_std_byLabel[label] = cal_Mean_StdDiv_Dataset(np.array(rows,dtype=float))
        #np.array(rows,dtype=float) to covert the list into an array
        #as the function cal_Mean_StdDiv_Dataset expects argument in form of an array not list
    return mean_std_byLabel

In [22]:
Mean_StdDiv_of_EachLabel = cal_Mean_StdDiv_by_class(Dataset)
Mean_StdDiv_of_EachLabel

{0.0: [(5.005999999999999, 0.3524896872134512),
  (3.4180000000000006, 0.38102439795469095),
  (1.464, 0.1735111594364455),
  (0.2439999999999999, 0.10720950308167837)],
 1.0: [(5.936, 0.5161711470638635),
  (2.7700000000000005, 0.3137983233784114),
  (4.26, 0.46991097723995806),
  (1.3259999999999998, 0.197752680004544)],
 2.0: [(6.587999999999998, 0.635879593274432),
  (2.9739999999999998, 0.3224966381726376),
  (5.552, 0.5518946956639835),
  (2.026, 0.27465005563666733)]}

# Requirement 4 :
# Function to Calculate The Gaussian Probability Density.

    
    Probability density function (PDF) is a statistical expression that defines a probability distribution
    (the likelihood of an outcome) for a discrete random variable.
    
    A random variable X is said to be normally distributed with mean µ and variance (stddiv) σ
    probability density function  f(x) is given by:
    f(x) = (1 / sqrt(2 * PI) * σ) * exp(-((x-µ)^2 / (2 * σ^2)))
    
    Here, we need to calculate the probabilty density of each value Xk (Xk refers to value of attribute Ak for Tupple/Row X)
    the  mean of Attribute, A_mean = µ and the stddiv of Attribute A_stddiv = σ
    

In [23]:
# calculates the probability density of a value Xk 
# where mean= mean of  all values in the column and stddiv = standard deviation of all values in the column
def cal_Prob_Density(x, mean, stddiv): 
    exponent = math.exp(-((x-mean)**2 / (2 * stddiv**2 ))) #** means power
    
    return (1 / (math.sqrt(2 * math.pi) * stddiv)) * exponent

# Requirement 5: 
# Calculating Probabilty that X(a test case(row)) belongs to a class Ci

        To predict the class label of X(entire row) 
        P(X|Ci) * P(Ci) is evaluate =f for each class Ci
        the classifier predicts the class label of tupple X to be class Ci if and only if 
        P(X|Ci)*P(Ci) > P(X|Cj)*P(Cj)  for all i<=j<=m,j!=i (m=no of classes) 
        that is the predicted class label is the class label Ci for which P(X|Ci) * P(Ci) is maximum
        
        so we need to calculate
        say X = (x1,x2,.....xn)
        classLabels = C1,C2,...,Cn
        
        so for a class Ci P(X|Ci) = P(x1|ci)*P(x2|ci)*...*P(xn|Ci)

In [24]:
def cal_Class_Prob(Mean_std_bylabel, test_case): #Mean_std_bylabel is the dict we created using 'cal_Mean_StdDiv_by_class'
    probabilities = {} 
    for label, mean_std in Mean_std_bylabel.items(): 
        probabilities[label] = 1
        for i in range(len(mean_std)): 
            mean, std_div = mean_std[i] 
            x = test_case[i] 
            probabilities[label] *= cal_Prob_Density(x, mean, std_div) 
    return probabilities 

In [25]:
# first row from dataset is taken,to check which class gets max probabilty
#(test_case will never contain label hence Dataset_features is used)
X = Dataset_features[0] # X = first row or first example of the iris dataset 
#(first example has actual class = 0)
Probabilities = cal_Class_Prob(Mean_StdDiv_of_EachLabel,X)
Probabilities

{0.0: 8.210349205762657,
 1.0: 2.4967278599904396e-17,
 2.0: 1.8025267716032966e-24}

# Predicting Class lables:

    To predict the class label just select and return the class label that has max probability

In [26]:
def predict_label(Mean_std_bylabel, test_case): #returns class labels with hightest probability
    probabilities = cal_Class_Prob(Mean_std_bylabel, test_case) 
    Best_Label, Best_Prob = None, -1
    for key, probability in probabilities.items(): 
        if Best_Label is None or probability > Best_Prob: 
            Best_Prob = probability 
            Best_Label = key 
    return Best_Label

In [27]:
X = Dataset_features[0] # same example as above
#(first rows has actual class = 0 i.e iris - setosa)
prediction = predict_label(Mean_StdDiv_of_EachLabel,X)
prediction  # 0: iris setosa ; 1 : iris_versicolor ; 2 : iris - virginica

0.0

# Combining everything to build the classifier model 

    The Model : Naive_bayesian_classifier(Train,Test)
                parameter : Train = Training set contain features and class lebels
                            Test = Test set contains only the features
                            
    workflow:
            Using the class labels training set set divide/ group the data besed on class labels.
            For each group(class) we calculate the mean and std deviation of each feature.
            Using these mean and std deviation values :
            For each test case we calculate : The probabilites of that test case belonging to different classes.
            The class that gets the highest probabilty among all other classes is selected as the class label for that
            specific test case
            This is done for each test case in test_set
            The class with highest probability for each test case is saved in a list and this list is returned as the
            "Predicted labels" for the Test set

In [28]:
def Naive_bayesian_classifier(training_set,test_set):#note training set must contain the class label, test set only feature
    mean_std_eachlabel = cal_Mean_StdDiv_by_class(training_set)
    prediction = []
    for test_case in test_set:
        pred = predict_label(mean_std_eachlabel,test_case)
        prediction.append(pred)
        
    return prediction

# Testing the model: 
# preparing the dataset according to requirement of the Model
   
       we have created 'dataset_feature' & 'dataset_labels' previously
       so using train_test_split to divide them them into Train and Test sets
       
       The training set needs to contain features and labels together 
       Thus, we simple merge training_features and training_labels which we will get from Train_Test_split
       
       The Test set contains only features so no need to merge Test Features with Test labels
       The Test labels(actual labels): used to check accuracy of model by comparing with predicted labels(output of Model)

In [29]:
Training_features,Test_features,Training_labels,Test_labels = train_test_split(Dataset_features,Dataset_label,test_size = 0.3,random_state= 42)
print(len(Training_features))
print(len(Training_labels))
print(len(Test_features))
print(len(Test_labels))

105
105
45
45


In [30]:
Training_features[0:5]

array([[5.5, 2.4, 3.7, 1. ],
       [6.3, 2.8, 5.1, 1.5],
       [6.4, 3.1, 5.5, 1.8],
       [6.6, 3. , 4.4, 1.4],
       [7.2, 3.6, 6.1, 2.5]])

In [31]:
Training_labels[0:5]

array([1., 2., 2., 1., 2.])

In [32]:
# Create Training set by merging training features and labels
Training_set = np.column_stack((Training_features,Training_labels))
print(Training_set[0:5])
print()
print(len(Training_set))

[[5.5 2.4 3.7 1.  1. ]
 [6.3 2.8 5.1 1.5 2. ]
 [6.4 3.1 5.5 1.8 2. ]
 [6.6 3.  4.4 1.4 1. ]
 [7.2 3.6 6.1 2.5 2. ]]

105


# Using the classifier  model :

    Make Predictions on the Test_features and Evaluate performance using various Performance measures such as 
        accuracy,precison,recall,f1 score and the confusion Matrix

In [33]:
predicted_labels = Naive_bayesian_classifier(Training_set,Test_features)
predicted_labels

[1.0,
 0.0,
 2.0,
 1.0,
 1.0,
 0.0,
 1.0,
 2.0,
 1.0,
 1.0,
 2.0,
 0.0,
 0.0,
 0.0,
 0.0,
 2.0,
 2.0,
 1.0,
 1.0,
 2.0,
 0.0,
 2.0,
 0.0,
 2.0,
 2.0,
 2.0,
 2.0,
 2.0,
 0.0,
 0.0,
 0.0,
 0.0,
 1.0,
 0.0,
 0.0,
 2.0,
 1.0,
 0.0,
 0.0,
 0.0,
 2.0,
 1.0,
 1.0,
 0.0,
 0.0]

In [34]:
accuracy = accuracy_score(predicted_labels,Test_labels)
print("The accuracy of the Naive Bayesian Classifier on the Test set is = %.3f" %(accuracy))

The accuracy of the Naive Bayesian Classifier on the Test set is = 0.978


# Confusion Matrix And calculating Precision Recall and F1 Score

            In the Confusion Matrix : 'Iris-setosa': 0,'Iris-versicolor': 1,'Iris-virginica' : 2 (class Labels)
            Row represents actual Species while the columns represents the predicted Species

            1st row : 1st column :Actual Species = Iris-Setosa ; Predicted Species = Iris-Setosa
            1st row : 2nd column :Actual Species = Iris-Setosa ; Predicted Species = Iris-versicolor
            1st row : 3rd column :Actual Species = Iris-Setosa ; Predicted Species = Iris-virginica

            2nd row : 1st column :Actual Species = Iris-versicolor ; Predicted Species = Iris-Setosa
            2nd row : 2nd column :Actual Species = Iris-versicolor ; Predicted Species = Iris-versicolor
            2nd row : 3rd column :Actual Species = Iris-versicolor ; Predicted Species = Iris-virginica

            3rd row : 1st column :Actual Species = Iris-virginica ; Predicted Species = Iris-Setosa
            3rd row : 2nd column :Actual Species = Iris-virginica ; Predicted Species = Iris-versicolor
            3rd row : 3rd column :Actual Species = Iris-virginica ; Predicted Species = Iris-virginica

In [35]:
cm  = confusion_matrix(predicted_labels,Test_labels)
print(cm)

[[19  0  0]
 [ 0 12  0]
 [ 0  1 13]]


# Inference 

    The acurracy of the model on test set is 97.8% 
    from the Confusion matrix we see that only one prediction is incorrect
    one example of 'Iris-Versicolor' is wrongly predicted as 'Iris- Virginica'

# Precision Recall & F1 Score:

        Precison = True Postive/(Total predicted Positive) || True Postive/(True Positive + False Positive)
                   Precision talks about how precise/accurate the model is, i.e. out of total predicted positive,
                   how many of them are actual positive.

        Recall = True Positive/(Total actual Positive) || True Postive/(True Positive + False Negetive)
                 Recall calculates how many of the Actual Positives the model captures by labeling it 
                 as Positive (True Positive)

        F1 Score = (2 * Precision * Recall) / (Precision + Recall)

In [36]:
precision = precision_score(predicted_labels,Test_labels, average='macro')
print('Precision of the Naive Bayesian Classifier Model : %.3f' % precision)
recall = recall_score(predicted_labels,Test_labels, average='macro')
print('Recall of the Naive Bayesian Classifier Model : %.3f' %recall)
f1 = f1_score(predicted_labels,Test_labels, average='macro')
print('F1 Score of the Naive Bayesian Classifier Model : %.3f' %f1)

Precision of the Naive Bayesian Classifier Model : 0.974
Recall of the Naive Bayesian Classifier Model : 0.976
F1 Score of the Naive Bayesian Classifier Model : 0.974


# Making Side by Side Comparison of Actual And Predicted labels

# Coverting the numeric labels to their original form

In [37]:
species = {0.0 : 'Iris-setosa',1.0 : 'Iris-versicolor',2.0 : 'Iris-virginica'}
Test_labels = [species[item] for item in Test_labels]
predicted_labels = [species[item] for item in predicted_labels]

# Merging Actual and Predicted Species for side by side comparison

In [38]:
merged = list(zip(Test_labels,predicted_labels)) 
comparison = pd.DataFrame(merged, columns=['Actual_Specis(Test_set)','Predicted_Species(Test_set)'])
comparison #check index 15 (the only wrong prediction , same inference as confusion matrix )

Unnamed: 0,Actual_Specis(Test_set),Predicted_Species(Test_set)
0,Iris-versicolor,Iris-versicolor
1,Iris-setosa,Iris-setosa
2,Iris-virginica,Iris-virginica
3,Iris-versicolor,Iris-versicolor
4,Iris-versicolor,Iris-versicolor
5,Iris-setosa,Iris-setosa
6,Iris-versicolor,Iris-versicolor
7,Iris-virginica,Iris-virginica
8,Iris-versicolor,Iris-versicolor
9,Iris-versicolor,Iris-versicolor
