1. Read the code below and rewrite it using only numpy functions (without for loop,
if-else statements). Benchmark your implementation. How much is it faster than
the naive python implementation?

```
def func1(names, db):
    idx = []
    for i in names:
        if i in db:
            idx.append(db.index(i))
    return idx
```

In [1]:
import numpy as np
def problem1_benchmark(a, b):
    idx = []
    b = b.tolist()
    for i in a:
        if i in b:
            idx.append(b.index(i))
    return np.array(idx)  # Be careful with the order of idx

# This version works for small amount of data
def problem1_solution1(a, b):
    return(np.where(a[:,None] == b)[1])

# solution 2
def problem1_solution2(a, b):
    a_in_b_mask = np.isin(a, b)
    b_in_a_mask = np.isin(b, a)
    common_set_in_a = a[a_in_b_mask]
    common_set_in_b = b[b_in_a_mask]
    b_in_a_idx = np.where(b_in_a_mask)[0]
    a_in_b_idx = np.where(a_in_b_mask)[0]
    b_in_a_sorted_idx = b_in_a_idx[common_set_in_b.argsort()]
    a_in_b_sorted_idx = a_in_b_idx[common_set_in_a.argsort()]
    # b[b_in_a_sorted_idx] and a[a_in_b_sorted_idx] should give the same sorted common set
    return b_in_a_sorted_idx[a_in_b_sorted_idx.argsort()]

In [2]:
np.random.seed(1)
a = np.append(np.arange(5), np.arange(100, 110))
b = np.append(np.arange(5), np.arange(200, 210))
np.random.shuffle(a)
np.random.shuffle(b)
print(problem1_benchmark(a, b))
print(problem1_solution1(a, b))
print(problem1_solution2(a, b))

[ 1 12 14  6  4]
[ 1 12 14  6  4]
[ 1 12 14  6  4]


2. Use only numpy functions (without for loop, if-else statements) to find the
largest 3 distances between points if either points is inside the circle of
raidus r0. Then return these points. E.g.

```
>>> coordinates = np.array(
[[0.968, 0.313],
 [0.692, 0.876],
 [0.895, 0.085],
 [0.039, 0.17 ],
 [0.878, 0.098]]
)
>>> cirle_r0 = 0.9
>>> print(search_pairs(coordinates, cirle_r0))

# The indices of the relevant points in the original coordinates
[[2 1]
 [3 0]
 [3 1]]
```

In [3]:
def problem2_np_max_distances_solution1(coordinates, r0, n=3):
    pair_dists = np.linalg.norm(coordinates[:, np.newaxis, :] - coordinates, axis=2)
    points_in_r0 = np.linalg.norm(coordinates, axis=1) < r0
    mask_r0 = points_in_r0[:, np.newaxis] ^ points_in_r0
    mask = np.tril(mask_r0, k=-1)
    uniq_indices = np.where(mask)
    uniq_dists = pair_dists[uniq_indices]
    indices_array = np.array(uniq_indices).T
    return indices_array[np.argpartition(uniq_dists, -n)[-n:]]

def problem2_np_max_distances_solution2(coordinates, r0, n=3):
    pair_dists = np.linalg.norm(coordinates[:, np.newaxis, :] - coordinates, axis=2)
    points_in_r0 = np.linalg.norm(coordinates, axis=1) < r0
    mask_r0 = points_in_r0[:, np.newaxis] ^ points_in_r0
    mask = np.tril(mask_r0, k=-1)
    pair_dists[~mask] = 0  # Exclude points unwanted
    largest_indices = np.argpartition(pair_dists.ravel(), -n)[-n:]
    indices = np.unravel_index(largest_indices, pair_dists.shape)
    return np.array(indices).T

# This version works, but slower than the above two when n is large
def problem2_np_max_distances_solution3(coordinates, r0, n=3):
    pair_dists = np.linalg.norm(coordinates[:, np.newaxis, :] - coordinates, axis=2)
    pair_dists = np.tril(pair_dists, k=-1) # the lower-triangular part
    points_in_r0 = np.linalg.norm(coordinates, axis=1) < r0
    mask_r0 = points_in_r0[:, np.newaxis] ^ points_in_r0

    uniq_dists = pair_dists[mask_r0]
    largest_dists = np.partition(uniq_dists, -n)[-n:]

    pair_mask = np.isin(pair_dists, largest_dists)
    indices = np.array(np.where(pair_mask)).T
    return indices[:n]  # Truncate to the largest N points

In [4]:
coords = np.random.random((5, 2))
print(coords.round(3))
print(problem2_np_max_distances_solution1(coords, 0.9))
print(problem2_np_max_distances_solution2(coords, 0.9))
print(problem2_np_max_distances_solution3(coords, 0.9))

[[0.968 0.313]
 [0.692 0.876]
 [0.895 0.085]
 [0.039 0.17 ]
 [0.878 0.098]]
[[2 1]
 [3 0]
 [3 1]]
[[2 1]
 [3 0]
 [3 1]]
[[2 1]
 [3 0]
 [3 1]]
