In [1]:
import faiss
import numpy as np
import pickle

class SingleIndexManager:
    def __init__(self, embedding_dim):
        self.embedding_dim = embedding_dim
        self.index = faiss.IndexFlatL2(embedding_dim)
        self.index_to_doc_id = {}

    def add_documents(self, embeddings, doc_ids):
        if len(embeddings) != len(doc_ids):
            raise ValueError("Number of embeddings and document IDs must be the same.")
        
        # Add embeddings to FAISS index
        self.index.add(embeddings)
        
        # Map the index positions to document IDs
        start_pos = len(self.index_to_doc_id)
        for i, doc_id in enumerate(doc_ids):
            self.index_to_doc_id[start_pos + i] = doc_id

    def search(self, query_embedding, top_k=5):
        # Perform the search
        distances, indexes = self.index.search(np.array([query_embedding]), top_k)
        
        # Filter out invalid indices
        valid_indices = [i for i in indexes[0] if i >= 0]
        
        # Retrieve document IDs from the mapping
        retrieved_doc_ids = [self.index_to_doc_id[i] for i in valid_indices]
        
        return distances[0, :len(valid_indices)], retrieved_doc_ids

    def save_index(self, index_filepath, mapping_filepath):
        faiss.write_index(self.index, index_filepath)
        with open(mapping_filepath, "wb") as f:
            pickle.dump(self.index_to_doc_id, f)

    def load_index(self, index_filepath, mapping_filepath):
        self.index = faiss.read_index(index_filepath)
        with open(mapping_filepath, "rb") as f:
            self.index_to_doc_id = pickle.load(f)

# Example usage
if __name__ == "__main__":
    embedding_dim = 128
    
    # Create instances for different indexes
    manager1 = SingleIndexManager(embedding_dim)
    manager2 = SingleIndexManager(embedding_dim)
    manager3 = SingleIndexManager(embedding_dim)
    
    # Example document embeddings and IDs for manager1
    doc_ids_1 = ["doc1", "doc2", "doc3"]
    embeddings_1 = np.random.random((len(doc_ids_1), embedding_dim)).astype('float32')
    manager1.add_documents(embeddings_1, doc_ids_1)
    
    # Example document embeddings and IDs for manager2
    doc_ids_2 = ["doc4", "doc5", "doc6"]
    embeddings_2 = np.random.random((len(doc_ids_2), embedding_dim)).astype('float32')
    manager2.add_documents(embeddings_2, doc_ids_2)
    
    # Perform a search on manager1
    query_embedding_1 = np.random.random((embedding_dim,)).astype('float32')
    distances_1, retrieved_doc_ids_1 = manager1.search(query_embedding_1)

    print("Distances from manager1:", distances_1)
    print("Retrieved Document IDs from manager1:", retrieved_doc_ids_1)
    
    # Perform a search on manager2
    query_embedding_2 = np.random.random((embedding_dim,)).astype('float32')
    distances_2, retrieved_doc_ids_2 = manager2.search(query_embedding_2)

    print("Distances from manager2:", distances_2)
    print("Retrieved Document IDs from manager2:", retrieved_doc_ids_2)
    
    # Save indexes to disk
    manager1.save_index("index1.index", "mapping1.pkl")
    manager2.save_index("index2.index", "mapping2.pkl")
    manager3.save_index("index3.index", "mapping3.pkl")
    
    # Load indexes from disk
    manager1.load_index("index1.index", "mapping1.pkl")
    manager2.load_index("index2.index", "mapping2.pkl")
    manager3.load_index("index3.index", "mapping3.pkl")
    
    # Perform the search again to verify
    distances_1, retrieved_doc_ids_1 = manager1.search(query_embedding_1)
    print("Distances from manager1 after loading:", distances_1)
    print("Retrieved Document IDs from manager1 after loading:", retrieved_doc_ids_1)

Distances from manager1: [19.903706 22.064589 23.42381 ]
Retrieved Document IDs from manager1: ['doc2', 'doc3', 'doc1']
Distances from manager2: [18.92828  22.156542 22.338604]
Retrieved Document IDs from manager2: ['doc6', 'doc5', 'doc4']
Distances from manager1 after loading: [19.903706 22.064589 23.42381 ]
Retrieved Document IDs from manager1 after loading: ['doc2', 'doc3', 'doc1']
