## Quick look at the iris_dataset

In [7]:
from sklearn.neighbors import KNeighborsClassifier
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn import datasets

# Split the iris_dataset into training data set(X_train: input, y_train: output), and test data set where X_test: input, y_test: output 

iris_dataset = datasets.load_iris()
X = iris_dataset.data
y = iris_dataset.target

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

In [11]:
# The target names of iris data 
print("Target names (the names of the 3 classes):{}".format(iris_dataset["target_names"]))
print()
# The feature names of the iris data 
print("Feature names (4 parameters of each row of X_train, X_test data):\n{}".format(iris_dataset["feature_names"]))
print()

# Display data for the training data: X_train and y_train. y_train = f(X_train)
print("TRAINING DATA...")
print("1. X_train data:\n", X_train[:5])
print()
print("2. y_train data (= 0, 1, or 2 which is associated to the above target names):\n", y_train[:5])
print()

# Display data for the testing data: X_test, y_test. Note: y_test = f(X_test)
print("TESTING DATA...")
print("1. X_test data:\n", X_test[:5])
print()
print("2. y_test data (= 0, 1, or 2 which is associated to the above target names):\n", y_test)

Target names (the names of the 3 classes):['setosa' 'versicolor' 'virginica']

Feature names (4 parameters of each row of X_train, X_test data):
['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']

TRAINING DATA...
1. X_train data:
 [[5.9 3.  4.2 1.5]
 [5.8 2.6 4.  1.2]
 [6.8 3.  5.5 2.1]
 [4.7 3.2 1.3 0.2]
 [6.9 3.1 5.1 2.3]]

2. y_train data (= 0, 1, or 2 which is associated to the above target names):
 [1 1 2 0 2]

TESTING DATA...
1. X_test data:
 [[5.8 2.8 5.1 2.4]
 [6.  2.2 4.  1. ]
 [5.5 4.2 1.4 0.2]
 [7.3 2.9 6.3 1.8]
 [5.  3.4 1.5 0.2]]

2. y_test data (= 0, 1, or 2 which is associated to the above target names):
 [2 1 0 2 0 2 0 1 1 1 2 1 1 1 1 0 1 1 0 0 2 1 0 0 2 0 0 1 1 0 2 1 0 2 2 1 0
 1]


## K-Nearest-Neighbors (KNN):
<li>Build the training data set
<li>Make a prediction on an input
<li>Evaluate the model


In [13]:
knn = KNeighborsClassifier(n_neighbors=1)

# Build the training set 
knn.fit(X_train, y_train)

# Input X_new in numpy format with unknown iris with sepal length, sepal width, petal length, and petal width. Note: sepal is the green part of the iris and petal the flower part of the iris 
X_new = np.array([[5, 2.9, 1, 0.2]])
print("X_new (an unknown iris flower) shape:{}".format(X_new.shape))

# Make a prediction of an unknown iris flower (X_new)
prediction = knn.predict(X_new)
print("Prediction (class):{}".format(prediction))
# print("Predicted target name:{}".format(iris_dataset["target_names"][prediction]))
print("Predicted target name:{}".format(iris_dataset.target_names[prediction]))
print()

# Predict 38 testing data set (38 flowers)
y_pred = knn.predict(X_test)

# Evaluate the model of 38 testing data 
# One way to determine the test score by using the numpy.mean 
print("Test set score using the np.mean method:{:.2f}".format(np.mean(y_pred == y_test)))

# The other way to determine the test score (commonly used) by using the knn.score 
print("Test set score using the knn.score method:{:.2f}".format(knn.score(X_test, y_test)))

# Notes: results with an accuray of 97%. P.S. to improve for better performance, we need to tune the model 

X_new (an unknown iris flower) shape:(1, 4)
Prediction (class):[0]
Predicted target name:['setosa']

Test set score using the np.mean method:0.97
Test set score using the knn.score method:0.97
