# Iris flower classification

In [162]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn import neighbors, datasets
from sklearn.metrics import accuracy_score

In [160]:
iris_dataset = datasets.load_iris()
iris_X = iris_dataset.data
iris_Y = iris_dataset.target
print('Number of classes: %d' %len(np.unique(iris_Y)))
print('Number of data points: %d' %len(iris_Y)) 

X0 = iris_X[iris_Y == 0,:]
print('\nSamples from class 0:\n', X0[:5,:])

X1 = iris_X[iris_Y == 1,:]
print('\nSamples from class 1:\n', X1[:5,:]) 

X2 = iris_X[iris_Y == 2,:]
print('\nSamples from class 2:\n', X2[:5,:]) 

Number of classes: 3
Number of data points: 150

Samples from class 0:
 [[5.1 3.5 1.4 0.2]
 [4.9 3.  1.4 0.2]
 [4.7 3.2 1.3 0.2]
 [4.6 3.1 1.5 0.2]
 [5.  3.6 1.4 0.2]]

Samples from class 1:
 [[7.  3.2 4.7 1.4]
 [6.4 3.2 4.5 1.5]
 [6.9 3.1 4.9 1.5]
 [5.5 2.3 4.  1.3]
 [6.5 2.8 4.6 1.5]]

Samples from class 2:
 [[6.3 3.3 6.  2.5]
 [5.8 2.7 5.1 1.9]
 [7.1 3.  5.9 2.1]
 [6.3 2.9 5.6 1.8]
 [6.5 3.  5.8 2.2]]


In [182]:
from sklearn.model_selection import train_test_split

for training in 30, 50, 70, 80, 90:
    X_train, X_test, Y_train, Y_test = train_test_split(iris_X, iris_Y, train_size=int(training*150/100))

    print("Training size: %d (%d %%)" %(len(Y_train), training))
    print("Test size    : %d (%d %%)" %(len(Y_test), 100 - training))
    print('')

    for num_NN in 1,5,10:
        KNN_model = neighbors.KNeighborsClassifier(n_neighbors = num_NN, p = 2, weights='distance')
        KNN_model.fit(X_train, Y_train)
        y_pred = KNN_model.predict(X_test[0:15])

        print("Print results for 15 test data points:")
        print("Predicted labels: ", y_pred)
        print("Ground truth    : ", Y_test[0:15])

        print("Accuracy of %d-NN: %.2f %% \n" %(num_NN,(100*accuracy_score(Y_test[0:15], y_pred))))
 
    print('')

Training size: 45 (30 %)
Test size    : 105 (70 %)

Print results for 15 test data points:
Predicted labels:  [0 0 0 1 0 0 0 0 2 2 1 2 1 2 0]
Ground truth    :  [0 0 0 1 0 0 0 0 2 1 1 2 1 2 0]
Accuracy of 1-NN: 93.33 % 

Print results for 15 test data points:
Predicted labels:  [0 0 0 1 0 0 0 0 2 2 1 2 1 2 0]
Ground truth    :  [0 0 0 1 0 0 0 0 2 1 1 2 1 2 0]
Accuracy of 5-NN: 93.33 % 

Print results for 15 test data points:
Predicted labels:  [0 0 0 1 0 0 0 0 2 2 1 2 1 2 0]
Ground truth    :  [0 0 0 1 0 0 0 0 2 1 1 2 1 2 0]
Accuracy of 10-NN: 93.33 % 


Training size: 75 (50 %)
Test size    : 75 (50 %)

Print results for 15 test data points:
Predicted labels:  [2 1 0 1 0 2 2 1 2 2 2 0 2 2 1]
Ground truth    :  [2 1 0 1 0 2 2 1 2 1 2 0 2 2 1]
Accuracy of 1-NN: 93.33 % 

Print results for 15 test data points:
Predicted labels:  [2 1 0 1 0 2 2 1 2 2 2 0 2 2 1]
Ground truth    :  [2 1 0 1 0 2 2 1 2 1 2 0 2 2 1]
Accuracy of 5-NN: 93.33 % 

Print results for 15 test data points:
Predicted l

# Hand-writting letters recognition

In [199]:
# %reset
import numpy as np 
from mnist import MNIST # require `pip install python-mnist`
# https://pypi.python.org/pypi/python-mnist/

import matplotlib.pyplot as plt
from sklearn import neighbors
from sklearn.metrics import accuracy_score
import time

# you need to download the MNIST dataset first
# at: http://yann.lecun.com/exdb/mnist/
mndata = MNIST('MNIST') # path to your MNIST folder 
train_data, train_label = mndata.load_training()
test_data, test_label = mndata.load_testing()
X_train = train_data
X_test = test_data
Y_train = np.asarray(train_label)
Y_test = np.asarray(test_label)

print('Training size: %d' %len(Y_train))
print('Testing size : %d' %len(Y_test))

Training size: 60000
Testing size : 10000


In [202]:
for num_NN in 1,5,10:
    start_time = time.time()
    KNN_model = neighbors.KNeighborsClassifier(n_neighbors = num_NN, p = 2, weights = 'distance')
    KNN_model.fit(X_train, Y_train)
    Y_pred = KNN_model.predict(X_test)
    end_time = time.time()
    print("Accuracy of %d-NN for MNIST: %.2f %%" %(num_NN,(100*accuracy_score(Y_test, Y_pred))))
    print("Running time: %.2f (s)" % (end_time - start_time))
    print('')

Accuracy of 1-NN for MNIST: 96.91 %
Running time: 37.77 (s)

Accuracy of 5-NN for MNIST: 96.91 %
Running time: 40.67 (s)

Accuracy of 10-NN for MNIST: 96.84 %
Running time: 41.54 (s)

