In [2]:
import numpy as np
from dotenv import load_dotenv
from openai import OpenAI
import pickle
import faiss

In [3]:
load_dotenv()
client = OpenAI()

In [4]:
def create_vector_embedding_from_query(query):
    query_embedding = client.embeddings.create(
        model="text-embedding-ada-002", input=query
    )
    return np.array(query_embedding.data[0].embedding, float).reshape(1, -1)

In [5]:
def load_index(index_path: str):
    with open(index_path, "rb") as f:
        index_object = pickle.load(f)
    return index_object

In [6]:
def create_flat_index(embeddings:np.ndarray):
    norms = np.linalg.norm(embeddings,axis=1,keepdims=True)
    normalized_embeddings = embeddings/norms
    dimension = len(normalized_embeddings[0])
    index = faiss.IndexFlatIP(dimension)
    index.add(normalized_embeddings)
    return index

def search_index(domain, index_list, query_embedding):
    domain_number = int(domain[-1])
    index_object = index_list[domain_number - 1]
    index = create_flat_index(embeddings=index_object["embeddings"])
    return index.search(query_embedding, 10)


In [7]:
index1 = load_index(f"db/indexes/domain1.pickle")
index2 = load_index(f"db/indexes/domain2.pickle")
index3 = load_index(f"db/indexes/domain3.pickle")
index4 = load_index(f"db/indexes/domain4.pickle")
index5 = load_index(f"db/indexes/domain5.pickle")
index_list = [index1, index2, index3, index4, index5]

In [10]:
domain = "domain3"
query = "What are the model selection steps"
query_embedding = create_vector_embedding_from_query(query)
sentences = index_list[int(domain[-1]) - 1]["sentences"]

In [11]:
D, I = search_index(domain=domain, index_list=index_list, query_embedding=query_embedding)
for i, index in enumerate(I[0]):
    print(f"{i+1}. Similaritty:{D[0][i]} || Sentence:{sentences[index]}\n")

1. Distance:0.8685411214828491 || Sentence:Mode  Selection Strategy

2. Distance:0.8293463587760925 || Sentence:In our  model , this s tep is  divid ed into two sub-steps .

3. Distance:0.8226112127304077 || Sentence:Model  and actual result s on the sample

4. Distance:0.8123400211334229 || Sentence:The first step is defining hybrid and  electri c mode selection  and the secon d step i s ICE and EM  power calcul ation  which will further b e used as the inputs for  fuel consumption and remaining SoC calculations .

5. Distance:0.7996418476104736 || Sentence:Residency of the mode selection in the comple te data        Accordin g to the  selected mo de in the previous step, the  necessary  power outpu ts will be obtained .

6. Distance:0.7987512350082397 || Sentence:Like it is mentioned in the CS  mode behavior se ction , the model run is repea ted three times  to  see the e ffect of the r andomness.

7. Distance:0.7949002385139465 || Sentence:Section 3 explains the vehicle modeling  ap