In [1]:
import faiss, uuid
import numpy as np

## Faiss examples
A couple of handy examples of how Faiss can be used to store and search for vectors. 

For Faceboook Research documentation, see: https://github.com/facebookresearch/faiss

By MGM

### Example 1: Small set of vectors

In [7]:
dimension = 3

v_1 = np.array([[1,2,3]]).astype('float32')
v_2 = np.array([[4,5,6]]).astype('float32')
v_3 = np.array([[7,8,9]]).astype('float32')

vectors = [v_1, v_2, v_3]

#### Construct Faiss index 

In [6]:
# In this example I use the IndexIDMAP2 in order to be able to map a vector to an (interger) Id. 
index = faiss.IndexIDMap2(faiss.IndexFlat(dimension))
# If you are fine with the Id number being the insertion number, using a flat index will give you better performance but comes with the l

#### Insert vectors into Faiss

In [8]:
index.add_with_ids(v_1, np.array([10]))
index.add_with_ids(v_2, np.array([20]))
index.add_with_ids(v_3, np.array([30]))

In [11]:
print("Total number of vectors in Faiss index")
print(index.ntotal)

Total number of vectors in Faiss index
3


#### Reconstructing a vector based on id

In [14]:
print("Reconstructed vector")
v_test = index.reconstruct(30)
print(v_test)

Reconstructed vector
[7. 8. 9.]


#### Search in vector space
query_vector: v_1,
amount: 3

In [18]:
D, I = index.search(v_1, 3)
print("Distances")
print(D[0])
print()
print("Indexes")
print(I[0])

Distances
[  0.  27. 108.]

Indexes
[10 20 30]


#### Changing a vector in the index

In [20]:
# We want to change the vector with Id: 30 in the index to:
v_new = np.array([[70,80,90]]).astype('float32')

In [21]:
# Step 1) Check if the vector exists in the index
try: 
    v_test = index.reconstruct(30)

    # Step 2) Delete vector with Id: 30
    index.remove_ids(np.array([30]))
    
    # We can check that the vector has been removed
    print("ntotal after deleting vector")
    print(index.ntotal)
    print()

    # Step 3) Add new vector and assign to the same Id: 30
    index.add_with_ids(v_new, np.array([30]))

# RuntimeError is thrown if the vector does not exist in the index
except RuntimeError:
    print("Vector with Id: " + str(30) + " does not exist")
    pass

# We can check that the vector has been removed
print("ntotal after adding the new vector")
print(index.ntotal)
print()    

ntotal after deleting vector
2

ntotal after adding the new vector
3



In [23]:
# We can search again, and vector with Id: 30 should now be significant farther away
print("Search after changing vector")
D, I = index.search(v_1, 3)
print("Distances")
print(D)
print()
print("Indexes")
print(I)

Search after changing vector
Distances
[[    0.    27. 18414.]]

Indexes
[[10 20 30]]


#### Search in index based on an average representation

In [24]:
# To search in the index based on an averaged representation of two or more vectors (embeddings): 

to_reconstruct_ids = [10,30] # In this example we want to get a representation based on vector_with_id 10 and 30.
reconstructed_vectors = []

# Reconstruct the vectors
for vector_id in to_reconstruct_ids:
    reconstructed_vectors.append(index.reconstruct(vector_id))

average_input_vector = np.array([np.average(reconstructed_vectors, axis=0)])

print("Searching with average input vector representation")
D, I = index.search(average_input_vector, 3)
print("Distances")
print(D[0])
print()
print("Indexes")
print(I[0])

Searching with average input vector representation
Distances
[3928.5 4603.5 4603.5]

Indexes
[20 30 10]


#### Saving the index

In [None]:
# faiss.write_index(index, "./index")