# This Code depicts how to evaluate the Performance Metrics of the Iris Dataset - Rubab Karim

Iris: 150 samples, 4 features, 3 classes

In [1]:
import sklearn
import numpy as np

from sklearn import datasets
from sklearn.model_selection import train_test_split

from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score, accuracy_score, f1_score

import pandas as pd
from collections import Counter

from sklearn.preprocessing import LabelEncoder

In [2]:
k = 5

def euclidean_distance(v1, v2):
    return np.sqrt(np.sum(     (v1 - v2) ** 2    ))

def predict(test_x):
    
    ## calculate distances between test_x and all data samples in X
    distances = [ euclidean_distance(test_x, x )  for x in X_train   ]
    
    ## distances is a vector of 30 distances 
    ## distances [23, 2,  145, 23  , 5,   17 , 890, ....]  =>>  []
               
    
    ## sort by distance and return the k closest neighbors
    ## argsort returns the indices of the k nearest neighbors
    k_neighbor_indeces =   np.argsort(   distances   )[:k]
    
    ## extract labels from y_train
    labels = [    y_train[i]  for i in k_neighbor_indeces   ]
    ## imagine labels = [1, 1, 1, 0, 1]
    

    ##select the most common label in labels
    most_common_label = Counter(labels).most_common(1)

    return most_common_label

### We then load the raw data for preprocessing

In [3]:
## dataset: Iris 150 samples, 4 features, 3 classes

df = pd.read_csv('data/iris.data.csv', header=None)

X = df.loc[1:, :3].values
X = X.astype(float)

y = df.loc[1:,  4 ].values

le = LabelEncoder()
y = le.fit_transform(   y   )  

In [4]:
print(df)

       0    1    2    3               4
0    5.1  3.5  1.4  0.2     Iris-setosa
1    4.9  3.0  1.4  0.2     Iris-setosa
2    4.7  3.2  1.3  0.2     Iris-setosa
3    4.6  3.1  1.5  0.2     Iris-setosa
4    5.0  3.6  1.4  0.2     Iris-setosa
..   ...  ...  ...  ...             ...
145  6.7  3.0  5.2  2.3  Iris-virginica
146  6.3  2.5  5.0  1.9  Iris-virginica
147  6.5  3.0  5.2  2.0  Iris-virginica
148  6.2  3.4  5.4  2.3  Iris-virginica
149  5.9  3.0  5.1  1.8  Iris-virginica

[150 rows x 5 columns]


In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state=42 )

print(y_test)
print(y_train)
print(  X_train   )
print(  X_test    )

[1 0 2 1 1 0 1 2 1 1 2 0 0 0 0 1 2 1 1 2 0 2 0 2 2 2 2 2 0 0 0 0 1 0 0 2 1
 0 0 0 2 1 1 0 0]
[1 2 2 1 2 1 2 1 0 2 1 0 0 0 1 2 0 0 0 1 0 1 2 0 1 2 0 1 2 1 1 2 1 0 2 2 1
 0 1 1 0 2 0 0 1 1 1 2 2 1 0 0 2 2 0 0 0 1 2 0 2 2 0 1 1 2 1 2 0 2 1 2 1 1
 1 0 1 1 0 1 2 2 0 1 2 2 0 2 0 1 2 2 2 2 1 1 2 2 0 1 2 0 1 2]
[[5.8 2.7 3.9 1.2]
 [6.3 2.8 5.1 1.5]
 [6.8 3.2 5.9 2.3]
 [6.8 2.8 4.8 1.4]
 [7.2 3.6 6.1 2.5]
 [5.7 2.9 4.2 1.3]
 [7.6 3.  6.6 2.1]
 [5.8 2.7 4.1 1. ]
 [4.9 3.  1.4 0.2]
 [5.8 2.7 5.1 1.9]
 [6.2 2.2 4.5 1.5]
 [4.7 3.2 1.6 0.2]
 [4.5 2.3 1.3 0.3]
 [4.8 3.  1.4 0.3]
 [5.9 3.  4.2 1.5]
 [6.3 2.7 4.9 1.8]
 [5.  3.  1.6 0.2]
 [5.  3.4 1.6 0.4]
 [4.8 3.4 1.9 0.2]
 [5.6 2.7 4.2 1.3]
 [5.  3.5 1.3 0.3]
 [5.7 3.  4.2 1.2]
 [6.9 3.1 5.4 2.1]
 [5.3 3.7 1.5 0.2]
 [6.2 2.9 4.3 1.3]
 [5.7 2.5 5.  2. ]
 [4.9 3.1 1.5 0.1]
 [5.7 2.8 4.1 1.3]
 [6.3 2.5 5.  1.9]
 [6.1 2.9 4.7 1.4]
 [6.  3.4 4.5 1.6]
 [6.9 3.2 5.7 2.3]
 [6.5 2.8 4.6 1.5]
 [4.6 3.4 1.4 0.3]
 [6.2 3.4 5.4 2.3]
 [6.  3.  4.8 1.8]
 [7.  3.2 4

### Defining the functions for Predicting

In [6]:
def accuracy(y_pred, y_test):
    accuracy_value = np.sum(y_pred == y_test) / len(y_test)
    return accuracy_value

In [7]:
def print_stats_percentage_train_test(algorithm_name, y_test, y_pred):    
     print("------------------------------------------------------")
     print("------------------------------------------------------")
    
     print("algorithm is: ", algorithm_name)
        
     print('Accuracy: %.2f' % accuracy_score(y_test,   y_pred) )
     
     confmat = confusion_matrix(y_true=y_test, y_pred=y_pred)
     print("confusion matrix")
     print(confmat)
     print('Precision: %.3f' % precision_score(y_true=y_test, y_pred=y_pred, average='weighted'))
     print('Recall: %.3f' % recall_score(y_true=y_test, y_pred=y_pred, average='weighted'))
     print('F1-measure: %.3f' % f1_score(y_true=y_test, y_pred=y_pred, average='weighted'))

In [8]:
list_of_pred_labels = []

for test_x in X_test:
    temp_pred = predict(test_x)   
    list_of_pred_labels.append(   temp_pred[0][0]  )
    
print(list_of_pred_labels)
print("true labels below")
print(y_test)

[1, 0, 2, 1, 2, 0, 1, 2, 1, 2, 2, 0, 0, 0, 0, 1, 2, 2, 1, 2, 0, 2, 0, 2, 2, 2, 2, 2, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 2, 1, 1, 0, 0]
true labels below
[1 0 2 1 1 0 1 2 1 1 2 0 0 0 0 1 2 1 1 2 0 2 0 2 2 2 2 2 0 0 0 0 1 0 0 2 1
 0 0 0 2 1 1 0 0]


In [9]:
y_pred = np.array(    list_of_pred_labels    )

print(y_pred)
print("true labels below")
print(y_test)

[1 0 2 1 2 0 1 2 1 2 2 0 0 0 0 1 2 2 1 2 0 2 0 2 2 2 2 2 0 0 0 0 1 0 0 1 1
 0 0 0 2 1 1 0 0]
true labels below
[1 0 2 1 1 0 1 2 1 1 2 0 0 0 0 1 2 1 1 2 0 2 0 2 2 2 2 2 0 0 0 0 1 0 0 2 1
 0 0 0 2 1 1 0 0]


### Here, we define the Regression Models that are going to be used in this analysis.
a) Linear Regression Model

In [10]:
def model_fn_linear():
    return nn.Linear(1, 1)

### b) Multi-Layer Perceptron

In [11]:
def model_fn_nn_1hidden():
    
    seq_model = nn.Sequential(
        nn.Linear(1599, 599),
        nn.ReLU(),
        
        nn.Linear(599, 1),
    
    )
    
   # optimizer = optim.SGD(
   #    model_fn_nn_1hidden.parameters(),
   #    lr=1e-3  )
    
    return seq_model

### c) Deep Neural Network with 2 hidden layers

In [12]:
def model_deep_learning_2hidden():
    seq_model = nn.Sequential(
        nn.Linear(1599, 1099),
        nn.ReLU(),
        
        nn.Linear(1099, 599),
        nn.Tanh(),
        
        nn.Linear(599, 1)
    )
    
    return seq_model

### d) Deep Neural Network with 4 hidden layers

In [13]:
def model_deep_learning_4hidden():
    seq_model = nn.Sequential(
        nn.Linear(1599, 1099),
        nn.ReLU(),
        
        nn.Linear(1099, 599),
        nn.Tanh(),
        
        nn.Linear(599, 199),
        nn.Tanh(),
        
        nn.Linear(199, 99),
        nn.Tanh(),
        
        nn.Linear(99, 1)
    )
    
    return seq_model

In [14]:
res = print_stats_percentage_train_test('model_fn_nn_1hidden', y_test, y_pred)      #knn

print(    res   )

------------------------------------------------------
------------------------------------------------------
algorithm is:  model_fn_nn_1hidden
Accuracy: 0.91
confusion matrix
[[19  0  0]
 [ 0 10  3]
 [ 0  1 12]]
Precision: 0.916
Recall: 0.911
F1-measure: 0.911
None
