In [11]:
import numpy as np
import cv2 as cv
img = cv.imread('mnist.png')
gray = cv.cvtColor(img,cv.COLOR_BGR2GRAY)

# Now we split the image to 5000 cells, each 20x20 size
cells = [np.hsplit(row,100) for row in np.vsplit(gray,50)]

# Make it into a Numpy array: its size will be (50,100,20,20)
x = np.array(cells)
print(x.shape)

# Now we prepare the training data and test data
train = x[:,:50].reshape(-1,400).astype(np.float32) # Size = (2500,400)
test = x[:,50:100].reshape(-1,400).astype(np.float32) # Size = (2500,400)

# Create labels for train and test data
k = np.arange(10)
train_labels = np.repeat(k,250)[:,np.newaxis]
test_labels = train_labels.copy()

# Initiate kNN, train it on the training data, then test it with the test data with k=1
knn = cv.ml.KNearest_create()
knn.train(train, cv.ml.ROW_SAMPLE, train_labels)
ret,result,neighbours,dist = knn.findNearest(test,k=5)

# Now we check the accuracy of classification
matches = result==test_labels
correct = np.count_nonzero(matches)
accuracy = correct*100.0/result.size
print( accuracy )

(50, 100, 20, 20)
91.76


In [31]:
# Save the data
np.savez('knn_data.npz',train=train, train_labels=train_labels)

# Now load the data
with np.load('knn_data.npz') as data:
    print( data.files )
    train = data['train']
    train_labels = data['train_labels']

['train', 'train_labels']


In [17]:
x[:,:50].shape

(50, 50, 20, 20)

In [19]:
print(train_labels.shape)

(2500, 1)


In [32]:
ret,result,neighbours,dist = knn.findNearest(test,k=5)
print(dist)

[[ 361955.  598018.  606136.  608480.  610345.]
 [ 790595.  959768. 1055851. 1065401. 1068785.]
 [ 534520.  641045.  681043.  732508.  751514.]
 ...
 [ 632329.  646093.  712409.  723294.  774745.]
 [ 464467.  784272.  872704.  873413.  932973.]
 [1315263. 1358024. 1463505. 1521671. 1720079.]]
