# IRIS FLOWER CLASSIFICATION 

### IMPORTING THE LIBRARIES

In [1]:
from sklearn.datasets import load_iris  #SKLEARN IS FOR DATA SCIENCE APPLICATIONS. #LOAD_IRIS DATASET 
from sklearn.model_selection import train_test_split #method for splitting our dataset
from sklearn.neighbors import KNeighborsClassifier #method for classifying using the K-Nearest Neighbor approach.
import numpy as np

### IMPORTING THE DATASET from SKLEARN

In [2]:
iris_dataset = load_iris()

In [9]:
iris_dataset

{'data': array([[5.1, 3.5, 1.4, 0.2],
        [4.9, 3. , 1.4, 0.2],
        [4.7, 3.2, 1.3, 0.2],
        [4.6, 3.1, 1.5, 0.2],
        [5. , 3.6, 1.4, 0.2],
        [5.4, 3.9, 1.7, 0.4],
        [4.6, 3.4, 1.4, 0.3],
        [5. , 3.4, 1.5, 0.2],
        [4.4, 2.9, 1.4, 0.2],
        [4.9, 3.1, 1.5, 0.1],
        [5.4, 3.7, 1.5, 0.2],
        [4.8, 3.4, 1.6, 0.2],
        [4.8, 3. , 1.4, 0.1],
        [4.3, 3. , 1.1, 0.1],
        [5.8, 4. , 1.2, 0.2],
        [5.7, 4.4, 1.5, 0.4],
        [5.4, 3.9, 1.3, 0.4],
        [5.1, 3.5, 1.4, 0.3],
        [5.7, 3.8, 1.7, 0.3],
        [5.1, 3.8, 1.5, 0.3],
        [5.4, 3.4, 1.7, 0.2],
        [5.1, 3.7, 1.5, 0.4],
        [4.6, 3.6, 1. , 0.2],
        [5.1, 3.3, 1.7, 0.5],
        [4.8, 3.4, 1.9, 0.2],
        [5. , 3. , 1.6, 0.2],
        [5. , 3.4, 1.6, 0.4],
        [5.2, 3.5, 1.5, 0.2],
        [5.2, 3.4, 1.4, 0.2],
        [4.7, 3.2, 1.6, 0.2],
        [4.8, 3.1, 1.6, 0.2],
        [5.4, 3.4, 1.5, 0.4],
        [5.2, 4.1, 1.5, 0.1],
  

### INFORMATION REGARDING OUR DATASET

In [10]:
print("Target names: {}".format(iris_dataset['target_names'])) # the three types of iris flowers
print("Feature names: {}".format(iris_dataset['feature_names'])) # Like width , petal length and width.
print("Type of data: {}".format(type(iris_dataset['data']))) 
print("Shape of data: {}".format(iris_dataset['data'].shape)) # dimensions of our dataset 

Target names: ['setosa' 'versicolor' 'virginica']
Feature names: ['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']
Type of data: <class 'numpy.ndarray'>
Shape of data: (150, 4)


### TARGET ARRAY

In [12]:
print("Target:\n{}".format(iris_dataset['target'])) 

Target:
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 2]


#### We observe the target array has numbers ranging from 0 to 2 where 0 represents "SETOSA" ; 1 represents "Versicolor" ; 2 represents "Virginica"

### Dividing the dataset into training data and test data

In [14]:
X_train, X_test, y_train, y_test = train_test_split(iris_dataset['data'], iris_dataset['target'], random_state=0)

### Printing the shape of the training data

In [15]:
print("X_train shape: {}".format(X_train.shape))
print("y_train shape: {}".format(y_train.shape))

X_train shape: (112, 4)
y_train shape: (112,)


### Printing the shape of the test data

In [16]:
print("X_test shape: {}".format(X_test.shape))
print("y_test shape: {}".format(y_test.shape))

X_test shape: (38, 4)
y_test shape: (38,)


### KNN Classifier for the prediction

In [19]:
KNN = KNeighborsClassifier(n_neighbors=1)

In [20]:
KNN.fit(X_train, y_train)

KNeighborsClassifier(n_neighbors=1)

### Example of the prediction

In [31]:
new_data = np.array([[6.8, 2.5, 4, 1.7]])
print("X_new.shape: {}".format(X_new.shape))

X_new.shape: (1, 4)


In [32]:
prediction = KNN.predict(new_data)
print("Prediction: {}".format(prediction))
print("Predicted target name: {}".format(iris_dataset['target_names'][prediction]))

Prediction: [1]
Predicted target name: ['versicolor']


### Accuracy of the Model

In [33]:
y_pred = KNN.predict(X_test)
print("Test set predictions:\n {}".format(y_pred))
print("Test set score (np.mean): {:.2f}".format(np.mean(y_pred == y_test)))

Test set predictions:
 [2 1 0 2 0 2 0 1 1 1 2 1 1 1 1 0 1 1 0 0 2 1 0 0 2 0 0 1 1 0 2 1 0 2 2 1 0
 2]
Test set score (np.mean): 0.97


In [35]:
print("Test set score (knn.score): {:.2f}".format(KNN.score(X_test, y_test)))

Test set score (knn.score): 0.97


#### Means that the model made correct predictions for 97% of the data 