In [1]:
import math
import numpy as np

# Test data

In [2]:
points = np.array([[1, 2], [3, 4], [5, 6], [5, 4]])
references = np.array([[0, 0], [3, 3], [4, 4]])
distance = lambda x, y: math.sqrt(sum((X - Y) * (X - Y) for X, Y in zip(x, y)))
[min(distance(p, r) for r in references) for p in points]

[2.23606797749979, 1.0, 2.23606797749979, 1.0]

# Bigger test data for timing

In [3]:
points = np.random.rand(10000, 2)
references = np.random.rand(100, 2)

# My version: vectorized, uses more memory than needed

In [4]:
%%timeit
diff = (np.repeat(points.reshape(1, points.shape[0], 2), references.shape[0], axis=0) -
        np.repeat(references.reshape(references.shape[0], 1, 2), points.shape[0], axis=1))

dist_sq = np.sum(diff ** 2, axis=2)

min_dist_sq = dist_sq.min(axis=0)

10 loops, best of 3: 43.5 ms per loop


In [5]:
diff = (np.repeat(points.reshape(1, points.shape[0], 2), references.shape[0], axis=0) -
        np.repeat(references.reshape(references.shape[0], 1, 2), points.shape[0], axis=1))

dist_sq = np.sum(diff ** 2, axis=2)

min_dist_sq = dist_sq.min(axis=0)

min_dist = np.sqrt(min_dist_sq)
min_dist

array([ 0.08958463,  0.04770848,  0.06358966, ...,  0.086152  ,
        0.0249977 ,  0.06052602])

# Joschi's version: clever spatial structure

In [6]:
from scipy.spatial import KDTree

tree = KDTree(references)

In [7]:
%%timeit
distances, ixs = tree.query(points)

1 loop, best of 3: 1.07 s per loop


In [8]:
distances, ixs = tree.query(points)
distances

array([ 0.08958463,  0.04770848,  0.06358966, ...,  0.086152  ,
        0.0249977 ,  0.06052602])