In [1]:
# Function: It can be used for differen purposes and one common usage is when calculating the 
# accuracy of training/dev/test sets.
# If accuracy is calculated, the tyical usage is as following: tf.nn.in_top_k(logits, Y, 1)
# where logits is simply the ZL (the linear output of the output layer of neural network)

In [2]:
import tensorflow as tf

In [3]:
# NOTE that when calculating accuracy, the third argument is always 1 as we want to check if the prediction 
# is correct or not

In [4]:
# THIS EXAMPLE ILLUSTRATES THE USE OF tf.nn.in_top_k on 1 single training sample

# See that there are 6 classes and assume that the NN model is trying to estimate the right class...

# The assumption in this example is that there is only 1 training example.
# Assume that softmax regression is used in output layer.
# logit (ZL) if as the following:
logit = tf.constant([[1.0, 0.7, 2.0, 2.5, 2.7, -6.3]])
# Based on this given logit, "2.7" is the max value, with the index number=4.
# The second biggest value is 2.5, with the index = 3
# The third biggest value is 2.0, with the index = 2
# So, if we sort the index values of these samples (from largest to smallest), we find: [4, 3, 2, 0, 1, 5]

# label represents the correct value, that the model needs to predict correctly.
label = tf.constant([[  0,   0,   1,   0,   0,   0]])
# Now run tf.argmax to figure out the max index of the label array. The following will return [2] because
# the largest value of the array is at the index = 2 in the array of label
label_argmax = tf.argmax(label, 1)

# Time to run the tf.nn.in_top_k function.
# We know that label_argmax=[2].
# Since the third parameter = 1, 
# then we ask if the value at the index logit[0, 2] is the biggest value of all samples in logit[0]?
# The answer is NO. logit[0, 2] = 2.0 is the third biggest value of the logit
a = tf.nn.in_top_k(logit, label_argmax, 1)  # [False]

# Ok.. Since the third argument is 2 now.. Then we ask, if the logit[0,2] is one of the biggest 2 samples in the 
# logit[0]. The answer is still NO and it returns FALSE
b = tf.nn.in_top_k(logit, label_argmax, 2)  # [False]

# If logit[0, 2] = 2.0 one of the 3 biggest samples in logit[0]? 
# The answer is TRUE bcz logit[0, 2] is the third biggest
c = tf.nn.in_top_k(logit, label_argmax, 3)  # [True]

with tf.Session() as sess:
    print(sess.run(label_argmax))
    print(sess.run(a))
    print(sess.run(b))
    print(sess.run(c))

[2]
[False]
[False]
[ True]


In [5]:
# THIS EXAMPLE ILLUSTRATES THE USE OF tf.nn.in_top_k on 3 single training samples

# Based on following logits, our predictions is [2, 0, 1], where each number
# represents the index of max value in each given (training) example
logits = tf.constant([[1.0, 0.7, 2.0], [9.0, 8.0, 7.0], [4.0, 6.0, 5.0]])


# label represents the correct values that the model needs to predict correctly.
labels = tf.constant([[0, 0, 1], [0, 0, 1], [0, 1, 0]])
# Now run tf.argmax to figure out the max index of the label array. The following will return [2, 2, 1]
labels_argmax = tf.argmax(labels, 1)

# Which elements of [2, 0, 1] are equal to the corresponding elements of [2, 2, 1]
are_predictions_correct = tf.nn.in_top_k(logits, labels_argmax, 1)  # [True, False, True]
accuracy = tf.reduce_mean(tf.cast(are_predictions_correct, tf.float32))

with tf.Session() as sess:
    print(sess.run(labels_argmax))
    print(sess.run(are_predictions_correct))
    print(sess.run(accuracy))


[2 2 1]
[ True False  True]
0.6666667
