In [None]:
#Supervised learning algorithm

'''
Q: What class does the new data point belong to?

Ans: 
Check its distance from labeled data in R^n 
and using distance from other points, find the closest k points,
then classify it the same way those are classified.

If they happen to have diff classes, we can do two things:
    1. majority vote
    2. majority vote weighted by distance to the sample point


Distance metric is very IMPORTANT. Can make or break the algorithm..
    Euclidian distance: Sqrt{sum{(x-y)^2}}
    Manhattan distance:  sum{|x-y|}
For discrete values: make them continuous. Example, for text use hamming distance (how many positions they differ at)

'''

In [13]:
from __future__ import print_function

import numpy as np
import tensorflow as tf

# Import MNIST data
from tensorflow.examples.tutorials.mnist import input_data

K = 4
mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)

Extracting /tmp/data/train-images-idx3-ubyte.gz
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz


In [14]:
# In this example, we limit mnist data
Xtr, Ytr = mnist.train.next_batch(55000)  # whole training set
Xte, Yte = mnist.test.next_batch(10000)  # whole test set

# tf Graph Input
xtr = tf.placeholder("float", [None, 784])
ytr = tf.placeholder("float", [None, 10])
xte = tf.placeholder("float", [784])

In [15]:
# Euclidean Distance
distance = tf.neg(tf.sqrt(tf.reduce_sum(tf.square(tf.sub(xtr, xte)), reduction_indices=1)))
# Prediction: Get min distance neighbors
values, indices = tf.nn.top_k(distance, k=K, sorted=False)

nearest_neighbors = []
for i in range(K):
    nearest_neighbors.append(tf.argmax(ytr[indices[i]], 0))

neighbors_tensor = tf.pack(nearest_neighbors)
y, idx, count = tf.unique_with_counts(neighbors_tensor)
pred = tf.slice(y, begin=[tf.argmax(count, 0)], size=tf.constant([1], dtype=tf.int64))[0]

accuracy = 0.

In [16]:
# Initializing the variables
init = tf.initialize_all_variables()

# Launch the graph
with tf.Session() as sess:
    sess.run(init)

    # loop over test data
    for i in range(len(Xte)):
        # Get nearest neighbor
        nn_index = sess.run(pred, feed_dict={xtr: Xtr, ytr: Ytr, xte: Xte[i, :]})
        # Get nearest neighbor class label and compare it to its true label
        print("Test", i, "Prediction:", nn_index,
             "True Class:", np.argmax(Yte[i]))
        #Calculate accuracy
        if nn_index == np.argmax(Yte[i]):
            accuracy += 1. / len(Xte)
    print("Done!")
    print("Accuracy:", accuracy)

Instructions for updating:
Use `tf.global_variables_initializer` instead.
Test 0 Prediction: 7 True Class: 7
Test 1 Prediction: 2 True Class: 2
Test 2 Prediction: 1 True Class: 1
Test 3 Prediction: 0 True Class: 0
Test 4 Prediction: 4 True Class: 4
Test 5 Prediction: 1 True Class: 1
Test 6 Prediction: 4 True Class: 4
Test 7 Prediction: 9 True Class: 9
Test 8 Prediction: 5 True Class: 5
Test 9 Prediction: 9 True Class: 9
Test 10 Prediction: 0 True Class: 0
Test 11 Prediction: 6 True Class: 6
Test 12 Prediction: 9 True Class: 9
Test 13 Prediction: 0 True Class: 0
Test 14 Prediction: 1 True Class: 1


KeyboardInterrupt: 