In [1]:
import pickle
import faiss

Load the stackexchange data

In [2]:
def load_data():
    with open('stackexchange.pickle', 'rb') as f:
        data = pickle.load(f)
    return data

data = load_data()
vectors = data["vector"]
names = data["name"]
data

{'name': array(['question_id:0', 'question_id:1', 'question_id:2', ...,
        'question_id:11280893', 'question_id:11280894',
        'question_id:11280895'], dtype='<U20'),
 'vector': array([[ 5.9850649e-03,  8.6651370e-02,  1.6981743e-02, ...,
         -7.0949502e-02,  1.1380416e-02, -5.2825913e-02],
        [-9.0043023e-02,  9.1169104e-02, -9.2082359e-03, ...,
          2.4855049e-02,  6.4666881e-03,  9.2397273e-02],
        [ 8.6565606e-02, -8.6956300e-02,  8.6640947e-02, ...,
         -8.6726159e-02, -8.6673632e-02,  8.6663313e-02],
        ...,
        [-1.0912500e-01, -8.7910935e-02, -4.0837310e-02, ...,
         -4.0971446e-01,  3.0658851e+00,  4.9759048e-01],
        [ 2.3090811e-01, -4.8821247e-03, -2.4868276e-02, ...,
         -1.0963874e-04,  5.8465521e-04,  1.3642834e-03],
        [ 2.2516649e+00,  2.1858168e+00, -4.2523792e-01, ...,
         -3.1400056e+00, -4.5083749e-01,  7.7036595e-01]], dtype=float32)}

In [3]:
class ExhaustiveSearch():
    def __init__(self, vectors, labels):
        self.vectors = vectors.astype('float32')
        self.labels = labels
        self.index = faiss.IndexFlatL2(vectors.shape[1])
        self.index.add(self.vectors)

    def query(self, vectors, k=10):
        distances, indices = self.index.search(vectors, k) 
        return [self.labels[i] for i in indices[0]]

In [4]:
index = ExhaustiveSearch(vectors, names)

In [5]:
from random import randrange
randIndex = randrange(len(vectors))-1
search_vector = vectors[randIndex:randIndex+1]
indices = index.query(search_vector, 10)
print(f"Top 10 similar questions to {names[randIndex]} are:\n")
print(indices)

Top 10 similar questions to question_id:3916934 are:

['question_id:3916934', 'question_id:5646988', 'question_id:311457', 'question_id:9196014', 'question_id:3056945', 'question_id:5728444', 'question_id:917655', 'question_id:9872754', 'question_id:490369', 'question_id:10224342']
