# Self contained Nearest Neighbour Search example using annoy

In [1]:
import annoy
import numpy as np

In [2]:
embedding_weights = np.random.normal(0, 1, (1000, 100))

In [3]:
type(embedding_weights)

numpy.ndarray

In [4]:
embedding_weights.shape

(1000, 100)

In [5]:
t = annoy.AnnoyIndex(embedding_weights.shape[1], metric='angular')
for row in range(embedding_weights.shape[0]):
    t.add_item(row, embedding_weights[row, :])

Ntrees = 100
t.build(Ntrees)

True

## Search by vector

In [6]:
neighbours = t.get_nns_by_vector(embedding_weights[0], n=10, search_k=10, include_distances=False)

In [7]:
print(neighbours)

[0, 616, 998, 733, 886, 930, 459, 176, 991, 894]


In [8]:
neighbours = t.get_nns_by_vector(embedding_weights[0], n=10, search_k=10, include_distances=True) 
#smaller the distance better the match

In [9]:
print(neighbours)

([0, 616, 998, 733, 886, 930, 459, 176, 991, 894], [0.0, 1.2121661901474, 1.234415054321289, 1.249654769897461, 1.2596243619918823, 1.2633097171783447, 1.270654320716858, 1.2744632959365845, 1.2767930030822754, 1.2795202732086182])


## Search by index

In [10]:
neighbours = t.get_nns_by_item(0, n=10, search_k=10, include_distances=False)

In [11]:
print(neighbours)

[0, 616, 998, 733, 886, 930, 459, 176, 991, 894]


In [12]:
neighbours = t.get_nns_by_item(0, n=10, search_k=10, include_distances=True)

In [13]:
print(neighbours)

([0, 616, 998, 733, 886, 930, 459, 176, 991, 894], [0.0, 1.2121661901474, 1.234415054321289, 1.249654769897461, 1.2596243619918823, 1.2633097171783447, 1.270654320716858, 1.2744632959365845, 1.2767930030822754, 1.2795202732086182])


## Note
Example of interest - you have dish names represented by vectors and you want to do nearest neighbour search to find closest matches:
1. If N dishes are represented by N x d dimension matrix (i.e. each dish is represented by d dimension vector), create an mapping from index to dish name. E.g. if there are only 3 dishes represented by 100 dimension each the matrix size is (3 x 100) and the mapping may look like `{0: 'biryani',1: 'roti',2: 'pasta'}` and inverted mapping is `{'biryani': 0, 'roti': 1, 'pasta': 2}`
2. Now when doing search for a dish, use inverted mapping to get its index and then use `get_nns_by_item` to get neighbouring indices which again can be mapped back to item names using inverted index created in step 1