In [1]:
import tensorflow as tf
import numpy as np

# You have a process that outputs a list of values, where bigger is better.
# You output a list of numbers, and hope that your biggest values are in
# the same positions as the true biggest. The question is: amongst your max values,
# do any match the positions of the true top values? Are any amongst the top 2? Etc.

# For example, if the true values are [1, 4, 8, 9] and your values are [1, 0, 1, 1],
# then among your top values (in positions 0, 2, 3), position 3 does correspond
# to the highest true value. Position 2 corresponds to the second highest. And position
# 0 only matches the lowest true value. So the question "are any of your top answers 
# amongst the top 1?" is yes.

# If true is [0, 1, 0, 0] then your top answers are only amongst the true top 3.

# We want to run multiple tests at once. Given an array of true arrays, and a
# corresponding array of predicted arrays, and a k representing the question
# "are any of your top answers amongst the true top k?", return an array of
# answers.

In [115]:
true = np.array([[0, 0, 0, 2],
                 [1, 0, 1, 2],
                 [0, 0, 1, 0],
                 [0, 1, 0, 1]])

pred = np.array([[4, 6, 2, 7],  # True for k = 1+
                 [1, 4, 8, 9],  # True for k = 1+
                 [8, 2, 4, 3],  # True for k = 2+
                 [6, 2, 7, 1]]) # True for k = 3+

In [137]:
def select_indices(arrays, indices):
    # Given a 2D array, select the given indices from them.
    # For example, given:
    # [[True, True, False],
    #  [False, False, True]]
    #
    # [[0, 1],  # Select indices 0 and 1 from first array
    #  [1, 2]]  # 1 and 2 from second
    #
    # Result:
    # [[True, True],
    #  [False, True]]
    #
    # Because obviously.
    return arrays[np.arange(arrays.shape[0]), indices.transpose()].transpose()

a = np.array([[True, True, False], [False, False, True]])
b = np.array([[0, 1], [1, 2]])
selected = select_indices(a, b)
assert np.array_equal(np.array([[True, True], [False, True]]), selected)

def in_top_k(true, pred, k=2):
    with tf.Session() as sess:
        # Get top k indices from pred. Would you figure this out from the doc?
        # https://docs.scipy.org/doc/numpy/reference/generated/numpy.argpartition.html
        #
        # "Perform an indirect partition along the given axis using the algorithm 
        # specified by the kind keyword. It returns an array of indices of the 
        # same shape as a that index data along the given axis in partitioned order."
        #
        # I found it via SO: https://stackoverflow.com/a/23734295/5175433
        top_idx = np.argpartition(pred, -k)[:, -k:]        
        
        # Using TF:
        # pred_var = tf.constant(pred)
        # true_var = tf.constant(true)
        # _, top_idx = tf.nn.top_k(pred_var, k, sorted=True)
        # top_idx = sess.run(top_idx)
        
        # For each array in `true`, we want to replace each value with 
        # a bool indicating if it's the (/a) max value. For example, [0 1 2 2] should
        # become [False False True True].
        # First find the max values.
        true_max = true.max(axis=1)
        # Turn it into a 2D array.
        # Note that this can also be done by taking the slice true_max[:, None]
        true_max = np.expand_dims(true_max, 1)
        # Now compare each element in true with its max. true_max has shape
        # (n, 1), while true is (n, m). This will use broadcasting.
        true_max = true == true_max

        # From true_max, select out the indices that were top k in pred (top_idx).
        # If any of those are True (which we'll check later), then we're good.
        return select_indices(true_max, top_idx)

def _in_top_k(true, pred, k=2):
    tops = in_top_k(true, pred, k)
    return np.any((tops[:, :k]), axis=1)

In [138]:
# A correct category appeared amongst top 1
top = _in_top_k(true, pred, k=1)
assert np.array_equal([True, True, False, False], top)

# A correct category appeared amongst top 2
top = _in_top_k(true, pred, k=2)
assert np.array_equal([True, True, True, False], top)

# A correct category appeared amongst top 3
top = _in_top_k(true, pred, k=3)
assert np.array_equal([True, True, True, True], top)

[ True  True False False]
