In [1]:
from dotenv import load_dotenv
import openai
import os
from llama_index.core import (
    VectorStoreIndex, 
    SimpleDirectoryReader, 
    StorageContext, 
    load_index_from_storage
)
load_dotenv()
# openai.api_key = os.getenv("OPENAI_API_KEY")

True

In [2]:
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core import Settings

embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
Settings.embed_model = embed_model

In [3]:
PERSIST_DIR = "storage_local_embeddings"

if not os.path.exists(PERSIST_DIR):
    documents = SimpleDirectoryReader("data").load_data()
    index = VectorStoreIndex.from_documents(documents, show_progress=True)
    index.storage_context.persist(persist_dir=PERSIST_DIR)
else:
    storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR) 
    index = load_index_from_storage(storage_context)

Parsing nodes:   0%|          | 0/104 [00:00<?, ?it/s]

Generating embeddings:   0%|          | 0/115 [00:00<?, ?it/s]

In [4]:
query_engine = index.as_query_engine()

In [5]:
response = query_engine.query("Cuantos casos de embarazo adolescente se presentaron en 2022 en Bolivia?")


In [6]:
response


Response(response='En 2022, se registraron un total de 35,250 casos de embarazo adolescente en Bolivia.', source_nodes=[NodeWithScore(node=TextNode(id_='0fef4907-67e8-4874-8e25-689285e32e87', embedding=None, metadata={'page_label': '33', 'file_name': 'informe_avance_bolivia.pdf', 'file_path': '/Users/pepe/dev/upb/topicos/ai-topics-2-2024/4.nlp/4.4.rag/data/informe_avance_bolivia.pdf', 'file_type': 'application/pdf', 'file_size': 5326934, 'creation_date': '2024-10-09', 'last_modified_date': '2024-10-09'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='22895737-880e-4356-972f-28ed0e1f8a26', node_type=<ObjectType.DOCUMENT: '4'>, metadata={'page_label': '33', 'file_name': 'informe_avance_bolivia.pdf', 'file_pa

In [7]:
embeddings = embed_model.get_text_embedding("hola bola")

In [10]:
len(embeddings)
print(embeddings[:5])

[-0.046661898493766785, 0.047238241881132126, 0.001964316936209798, -0.05784512311220169, 0.010918747633695602]


In [11]:
prompts_dict = query_engine.get_prompts()

In [12]:
prompts_dict

{'response_synthesizer:text_qa_template': SelectorPromptTemplate(metadata={'prompt_type': <PromptType.QUESTION_ANSWER: 'text_qa'>}, template_vars=['context_str', 'query_str'], kwargs={}, output_parser=None, template_var_mappings={}, function_mappings={}, default_template=PromptTemplate(metadata={'prompt_type': <PromptType.QUESTION_ANSWER: 'text_qa'>}, template_vars=['context_str', 'query_str'], kwargs={}, output_parser=None, template_var_mappings=None, function_mappings=None, template='Context information is below.\n---------------------\n{context_str}\n---------------------\nGiven the context information and not prior knowledge, answer the query.\nQuery: {query_str}\nAnswer: '), conditionals=[(<function is_chat_model at 0x151f256c0>, ChatPromptTemplate(metadata={'prompt_type': <PromptType.CUSTOM: 'custom'>}, template_vars=['context_str', 'query_str'], kwargs={}, output_parser=None, template_var_mappings=None, function_mappings=None, message_templates=[ChatMessage(role=<MessageRole.SYS

In [13]:
from IPython.display import Markdown, display

In [14]:
for k, p in prompts_dict.items():
    text_md = f"**Prompt Key**: {k}<br>" f"**Text:** <br>"
    display(Markdown(text_md))
    print(p.get_template())
    display(Markdown("<br><br>"))

**Prompt Key**: response_synthesizer:text_qa_template<br>**Text:** <br>

Context information is below.
---------------------
{context_str}
---------------------
Given the context information and not prior knowledge, answer the query.
Query: {query_str}
Answer: 


<br><br>

**Prompt Key**: response_synthesizer:refine_template<br>**Text:** <br>

The original query is as follows: {query_str}
We have provided an existing answer: {existing_answer}
We have the opportunity to refine the existing answer (only if needed) with some more context below.
------------
{context_msg}
------------
Given the new context, refine the original answer to better answer the query. If the context isn't useful, return the original answer.
Refined Answer: 


<br><br>

In [None]:
"Context information is below.\n"
"---------------------\n"
"{context_str}\n"
"---------------------\n"
"Given the context information and not prior knowledge, "
"answer the query in the style of a Shakespeare play.\n"
"Query: {query_str}\n"
"Answer: "