In [8]:
import numpy as np
from dotenv import load_dotenv
from openai import OpenAI
import pickle
import faiss

In [9]:
load_dotenv()
client = OpenAI()

In [10]:
def create_vector_embedding_from_query(query):
    query_embedding = client.embeddings.create(
        model="text-embedding-ada-002", input=query
    )
    return np.array(query_embedding.data[0].embedding, float).reshape(1, -1)

In [11]:
def load_index(index_path: str):
    with open(index_path, "rb") as f:
        index_object = pickle.load(f)
    return index_object

In [12]:
def create_flat_index(embeddings:np.ndarray):
    dimension = len(embeddings[0])
    index = faiss.IndexFlatL2(dimension)
    index.add(embeddings)
    return index

def search_index(domain, index_list, query_embedding):
    domain_number = int(domain[-1])
    index_object = index_list[domain_number - 1]
    index = create_flat_index(embeddings=index_object["embeddings"])
    return index.search(query_embedding, 10)


In [13]:
index1 = load_index(f"db/indexes/domain1.pickle")
index2 = load_index(f"db/indexes/domain2.pickle")
index3 = load_index(f"db/indexes/domain3.pickle")
index4 = load_index(f"db/indexes/domain4.pickle")
index5 = load_index(f"db/indexes/domain5.pickle")
index_list = [index1, index2, index3, index4, index5]

In [14]:
domain = "domain1"
query = "Sample question"
query_embedding = create_vector_embedding_from_query(query)
sentences = index_list[int(domain[-1]) - 1]["sentences"]

In [36]:
D, I = search_index(domain=domain, index_list=index_list, query_embedding=query_embedding)
for i, index in enumerate(I[0]):
    print(f"{i+1}. Distance:{D[0][i]} || Sentence:{sentences[index]}\n")

1. Distance:0.4229208827018738 || Sentence:Before a measure is requested, the AI Office may initiate a structured dialogue with the provider of the  general-pur pose AI model.

2. Distance:0.4235139489173889 || Sentence:A summar y of the main charact eristics of the plan for testing in real world conditions; 5.

3. Distance:0.42375752329826355 || Sentence:Such sandbo xes may include testing in real world  conditions super vised therein.

4. Distance:0.4407006502151489 || Sentence:Before sending the request for information, the AI Offi ce may initiate a structured dialogue with the provid er of the  general-pur pose AI model.

5. Distance:0.4447948932647705 || Sentence:Any subjects of the testing in real world conditions, or their legally designated representative, as appropr iate, may,  without any resulting detr iment and without having to provide any justifi cation, withdra w from the testing at any time by  revok ing their informed consent and may request the immediate and permanent