In [2]:
import numpy as np
from numba import jit

In [3]:
@jit
def jit_loop(train, test):
    """
    Compute the distance between each test point in test and each training point
    in train using a nested loop over both the training data and the
    test data.

    Inputs:
    - test: A numpy array of shape (num_test, D) containing test data.

    Returns:
    - dists: A numpy array of shape (num_test, num_train) where dists[i, j]
      is the Euclidean distance between the ith test point and the jth training
      point. """
    num_test = test.shape[0]
    num_train = train.shape[0]
    dists = np.zeros((num_test, num_train))
    dim = test.shape[1]
    for i in range(num_test):
        for j in range(num_train):
            sum_ = 0
            for k in range(dim):
                x = train[j, k] - test[i, k]
                sum_ += x*x
            dists[i,j] = sum_
    return np.sqrt(dists)

def compute_distances_two_loops(data, X):
    """
    Compute the distance between each test point in X and each training point
    in data using a nested loop over both the training data and the
    test data.

    Inputs:
    - X: A numpy array of shape (num_test, D) containing test data.

    Returns:
    - dists: A numpy array of shape (num_test, num_train) where dists[i, j]
      is the Euclidean distance between the ith test point and the jth training
      point. """
    num_test = X.shape[0]
    num_train = data.shape[0]
    dists = np.zeros((num_test, num_train))
    for i in xrange(num_test):
        for j in xrange(num_train):
            dists[i,j] = np.sum((data[j] - X[i])**2)
    return np.sqrt(dists)

def compute_distances_one_loop(data, X):
    """
    Compute the distance between each test point in X and each training point
    in data using a single loop over the test data.

    Input / Output: Same as compute_distances_two_loops """
    num_test = X.shape[0]
    num_train = data.shape[0]
    dists = np.zeros((num_test, num_train))
    for i in xrange(num_test):
        dists[i,:] = np.sum((data - X[i,:])**2, axis=-1)
    return np.sqrt(dists)

def compute_distances_no_loops(data, X):
    """
    Compute the distance between each test point in X and each training point
    in data using no explicit loops.

    Input / Output: Same as compute_distances_two_loops"""
    return np.sqrt(np.sum((data - X[:, np.newaxis])**2,axis=-1))

In [14]:
i = 100
j = 30
data = np.random.randint(0,256,(i,j))
X = np.random.randint(0,256,(i/10,j))

In [15]:
%%timeit
jit_loop(data,X)

10000 loops, best of 3: 30.2 µs per loop


In [16]:
%%timeit
compute_distances_no_loops(data,X)

10000 loops, best of 3: 69.9 µs per loop


In [17]:
%%timeit
compute_distances_one_loop(data,X)

10000 loops, best of 3: 102 µs per loop


In [18]:
%%timeit
compute_distances_two_loops(data,X)

100 loops, best of 3: 2.45 ms per loop
