In [31]:
import tensorflow as tf
import numpy as np

In [32]:
# input_data contains MNIST dataset which is present in tensorflow lib, we will access it using the below statement 
from tensorflow.examples.tutorials.mnist import input_data
# Below read_data_sets allows us to read 'GZIP' files directly, and the GZIP files stored in 'mnist_data' 
mnist = input_data.read_data_sets("mnist_data/", one_hot=True)

Extracting mnist_data/train-images-idx3-ubyte.gz
Extracting mnist_data/train-labels-idx1-ubyte.gz
Extracting mnist_data/t10k-images-idx3-ubyte.gz
Extracting mnist_data/t10k-labels-idx1-ubyte.gz


In [33]:
# out of 60,000 training set we are taking 5000 only for now
training_digits, training_labels = mnist.train.next_batch(5000)
test_digits, test_labels = mnist.test.next_batch(200)

In [34]:
# float is for grayscale images, since float will contain intensity values
# 'None' represent index of each image, since we have no idea how many images we will pass 
training_digits_pl = tf.placeholder("float", [None,784])
# below we represent a test image as a vector with size 784 
test_digits_pl = tf.placeholder("float", [784])    # WE ONLY HAVE 1 TEST DIGIT 

In [35]:
# Nearest Neighbour calcculation using L1 distance
# WE HAVE 1 TEST DIGIT WHICH WE NEED TO COMPARE WITH TRAINING DIGITS
# firstly we change the sign of all the pixel values of test digit then ADD to each of the training digit pixels
l1_distance = tf.abs(tf.add(training_digits_pl, tf.negative(test_digits_pl))) 


In [36]:
# we want to find those images from training set which are closest to test image
# tf.reduce_sum()->reduces each vector into a single value, that is the distance from test image
distance = tf.reduce_sum(l1_distance, axis=1)

In [37]:
# HERE WE ARE USING K=1 (IN KNN algo)
# prediction: get min distance index (nearest neighbor)
pred = tf.argmin(distance,0)  # tf.arg_min() for previous versions

In [38]:
accuracy=0.

#initializing the variables
init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    
    #loop over test data
    for i in range(len(test_digits)):
        # get nearest neighbor 
        nn_index = sess.run(pred,feed_dict={training_digits_pl: training_digits,test_digits_pl: test_digits[i, :]})
       # get nearest neighbor class label and compare it to its true label
        print("Test",i, "Prediction:", np.argmax(training_labels[nn_index]), "True Label:",np.argmax(test_labels[i]))
        # nn_index--> this will contain the nearest neighbor in training set (nearest to test set) , a no. b/w 0-9 which is 
        # present in 1-hot notation
        #np.argmax(test_labels[i])--> this gives the value of test digit (a value from 0-9) which is present in 1-hot notation
        #calculate accuracy
        if np.argmax(training_labels[nn_index])==np.argmax(test_labels[i]):
            accuracy +=1./len(test_digits)
    print("Done!")
    print("Accuracy:", accuracy)

Test 0 Prediction: 4 True Label: 4
Test 1 Prediction: 5 True Label: 5
Test 2 Prediction: 2 True Label: 2
Test 3 Prediction: 3 True Label: 3
Test 4 Prediction: 4 True Label: 5
Test 5 Prediction: 3 True Label: 3
Test 6 Prediction: 2 True Label: 2
Test 7 Prediction: 2 True Label: 2
Test 8 Prediction: 0 True Label: 0
Test 9 Prediction: 5 True Label: 5
Test 10 Prediction: 0 True Label: 0
Test 11 Prediction: 2 True Label: 2
Test 12 Prediction: 1 True Label: 1
Test 13 Prediction: 9 True Label: 9
Test 14 Prediction: 5 True Label: 5
Test 15 Prediction: 5 True Label: 3
Test 16 Prediction: 2 True Label: 2
Test 17 Prediction: 3 True Label: 3
Test 18 Prediction: 4 True Label: 4
Test 19 Prediction: 8 True Label: 8
Test 20 Prediction: 1 True Label: 1
Test 21 Prediction: 4 True Label: 4
Test 22 Prediction: 2 True Label: 2
Test 23 Prediction: 7 True Label: 7
Test 24 Prediction: 1 True Label: 1
Test 25 Prediction: 2 True Label: 2
Test 26 Prediction: 3 True Label: 3
Test 27 Prediction: 7 True Label: 7
Te