# **Iteration 3 Demo**: RAG using LLamaIndex SubQuestionQueryEngine

Installs:\
%pip install llama-index\
%pip install llama-index-llms-groq\
%pip install llama-index-vector-stores-docarray\
%pip install llama-index-embeddings-huggingface\
%pip install docx2txt

# API Keys

In [2]:
import os
from google.colab import userdata

LLAMA_CLOUD_API_KEY = userdata.get('LLAMA_CLOUD_API_KEY')
GROQ_API_KEY = userdata.get('GROQ_API_KEY')

os.environ['LLAMA_CLOUD_API_KEY'] = LLAMA_CLOUD_API_KEY
os.environ['GROQ_API_KEY'] = GROQ_API_KEY

# LlamaIndex Settings
Global configurations

In [3]:
from llama_index.core import Settings
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.llms.groq import Groq

In [4]:
Settings.llm = Groq(model="llama-3.1-70b-versatile", api_key=os.getenv("GROQ_OPENAI_API_KEY"), temperature=0.4)
Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")

# Load documents

In [5]:
from llama_index.core.node_parser import SentenceSplitter

embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
splitter = SentenceSplitter(chunk_size=200, chunk_overlap=100, separator="\n")
Settings.text_splitter = splitter

In [7]:
from llama_index.core import SimpleDirectoryReader

outdated_docs = SimpleDirectoryReader(input_files=["demo_guide.pdf"]).load_data()
reference_docs = SimpleDirectoryReader(input_files=["demo_reference.docx"]).load_data()

# Chunking


In [8]:
outdated_nodes = splitter.get_nodes_from_documents(outdated_docs)
reference_nodes = splitter.get_nodes_from_documents(reference_docs)

In [9]:
print(len(reference_nodes))

for node in reference_nodes:
  print(node.get_content() + "\n\n")

In [10]:
print(len(outdated_docs))
print(len(reference_docs))

# Index

In [11]:
from llama_index.core import StorageContext
from llama_index.core import VectorStoreIndex
from llama_index.vector_stores.docarray import DocArrayInMemoryVectorStore

In [12]:
outdated_index = VectorStoreIndex.from_documents(outdated_docs)

In [13]:
reference_index = VectorStoreIndex.from_documents(reference_docs)

Create queryengines over each index.

In [14]:
outdated_engine = outdated_index.as_query_engine(similarity_top_k=2)

In [15]:
reference_engine = reference_index.as_query_engine(similarity_top_k=2)

In [16]:
# demo: retrieval over the outdated_index
response = outdated_engine.retrieve("Access the System: Open your web browser and navigate to www.oldcompanyportal.com to access the internal project management system.")

print(str(response))

Create QueryEngineTools for the SubQuestionQueryEngine Agent\
https://docs.llamaindex.ai/en/stable/module_guides/deploying/agents/tools/#queryenginetool
https://docs.llamaindex.ai/en/stable/examples/query_engine/sub_question_query_engine/

In [17]:
from llama_index.core.tools import QueryEngineTool, ToolMetadata
from llama_index.core.query_engine import SubQuestionQueryEngine

query_engine_tools = [
    QueryEngineTool(
        query_engine=outdated_engine,
        metadata=ToolMetadata(
            name="Outdated Guide",
            description="Contains the material from an Outdated User Guide",
        ),
    ),
    QueryEngineTool(
        query_engine=reference_engine,
        metadata=ToolMetadata(
            name="Reference Material",
            description="Contains the material for updating the Outdated User Guide",
        ),
    ),
]

s_engine = SubQuestionQueryEngine.from_defaults(query_engine_tools=query_engine_tools)

In [None]:
import nest_asyncio

nest_asyncio.apply()

The Agent generates sub-queries from the initial query using the provided QueryEngineTools. The final prompt will include both the context from the sub-queries and the initial query.

In [26]:
query="Update the sections of the Outdated Guide using the content in the Reference Material as reference"

response = s_engine.query(query)

Response to the initial query + context

In [27]:
print(response)