# Distance Functions

In [None]:
import numpy as np

### Compute L2 distance between 2 vectors

In [None]:
# X = [1]   Y = [6]
#     [2]       [5]
#     [3]       [4]
X = np.array([[1],[2],[3]])
Y = np.array([[6],[5],[4]])
print("X: \n{}".format(X))
print("Y: \n{}".format(Y))

In [None]:
distance = np.sqrt(np.sum(np.square(X-Y)))
print("L2 distance: {}".format(distance))

### Compute distance between Y and each of many vectors in Xdata

In [None]:
# Xdata contains 3 data points
# Xdata =[1] [4] [7]
#        [2] [5] [8]
#        [3] [6] [9]
# create 2d array to hold the data
Xdata = np.array([[1,4,7],[2,5,8],[3,6,9]])
print("Xdata: \n{}".format(Xdata))
print("Y: \n{}".format(Y))

In [None]:
distance = np.sqrt(np.sum(np.square(Xdata - Y), axis=0, keepdims=True))
print("L2 distance between columns of Xdata and Y: {}".format(distance))

### Benefits of "Vectorization"

Compare timing of computing distance between Y and each entry of Xdata by looping versus using above vectorized approach

In [None]:
import time
import matplotlib.pyplot as plt

In [None]:
list_ndata = [500*i  for i in range(20)]
time_loop = []
time_vec = []
ndim = 2
Y = np.random.randn(ndim,1)
for ndata in list_ndata:
    # Generate Xdata
    Xdata = np.random.randn(ndim,ndata)
    # Compute L2 distance between Y and each datapoint by looping
    time_start_loop = time.time()
    distance_loop = np.zeros((1,ndata))
    for col in range(ndata):
        distance_loop[0,col] = np.sqrt(np.sum(np.square(Xdata[:,[col]] - Y)))
    time_loop.append(time.time() - time_start_loop)
    # Compute L2 distance between Y and each datapoint all at once
    time_start_vec = time.time()
    distance_vec = np.sqrt(np.sum(np.square(Xdata - Y),axis=0,keepdims=True))
    time_vec.append(time.time() - time_start_vec)
    error = np.sqrt(np.sum(np.absolute(distance_loop - distance_vec)))/np.sqrt(np.sum(np.square(Y)))
    if error> 1e-10:
        print("Difference between looping and vectorization: {}".format(error))
# plot timing results
plt.figure()
plt.plot(list_ndata,time_loop,"r-",label="loop")
plt.plot(list_ndata,time_vec,"b-",label="vectorization")
plt.legend(loc="upper left")