In [4]:
import os
from dotenv import load_dotenv
import nest_asyncio
nest_asyncio.apply()
load_dotenv()

True

In [16]:
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')


In [20]:
from llama_index.core import SimpleDirectoryReader
from llama_index.core.node_parser import SentenceSplitter
# load lora_paper.pdf documents
documents = SimpleDirectoryReader(input_files=["./1_Agentic_Rag/basics/data/Lora.pdf"]).load_data()
# chunk_size of 1024 is a good default value
splitter = SentenceSplitter(chunk_size=1024)
# Create nodes from documents
nodes = splitter.get_nodes_from_documents(documents)

### Creating LLM and Embedding Model

In [13]:
from llama_index.core import Settings
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.openai import OpenAIEmbedding
# LLM model
Settings.llm = OpenAI(model="gpt-3.5-turbo")
# embedding model
Settings.embed_model = OpenAIEmbedding(model="text-embedding-ada-002")

### Creating Summary Index and Vector Store


In [21]:
from llama_index.core import SummaryIndex, VectorStoreIndex

# summary index
summary_index = SummaryIndex(nodes)
# vector store index
vector_index = VectorStoreIndex(nodes)

### Turning Vector Indexes To Query Engines
Once that we now have the vector indexes created and stored, we’ll now need to move ahead to creating the query engines that we’ll convert to tools aka query tools that our agents can use later on.

In [22]:
# summary query engine
summary_query_engine = summary_index.as_query_engine(
    response_mode="tree_summarize",
    use_async=True,
)

# vector query engine
vector_query_engine = vector_index.as_query_engine()

### Query Tools
A query tool is simply a query engine with metadata, specifically a description of what the query tool can be used for or is for. This helps the router query engine to then be able to decide what query engine tool to route to depending on the query it receives.

In [23]:
from llama_index.core.tools import QueryEngineTool


summary_tool = QueryEngineTool.from_defaults(
query_engine=summary_query_engine,
description=(
    "Useful for summarization questions related to the Lora paper."
),
)

vector_tool = QueryEngineTool.from_defaults(
query_engine=vector_query_engine,
description=(
    "Useful for retrieving specific context from the the Lora paper."
),
)

### Router Query Engine
Finally, we can go on ahead to creating the router query engine tool. This will enable us to use all the query tools we created from the query engines we defined above, specifically the summary_tool and the vector_tool

In [24]:
from llama_index.core.query_engine.router_query_engine import RouterQueryEngine
from llama_index.core.selectors import LLMSingleSelector

query_engine = RouterQueryEngine(
    selector=LLMSingleSelector.from_defaults(),
    query_engine_tools=[
        summary_tool,
        vector_tool,
    ],
    verbose=True
)

### Testing

In [25]:
response = query_engine.query("What is the summary of the document?")
print(str(response))

[1;3;38;5;200mSelecting query engine 0: The question is asking for a summary of the document, which is typically related to summarization questions..
[0mThe document introduces LoRA, a novel approach for adapting large language models to downstream tasks efficiently. LoRA involves freezing pre-trained model weights and incorporating trainable rank decomposition matrices into each layer of the Transformer architecture, reducing the number of trainable parameters. The method maintains high model quality without adding inference latency or limiting input sequence length. Empirical investigations demonstrate LoRA's effectiveness across tasks and models like RoBERTa, DeBERTa, GPT-2, and GPT-3. The study explores optimal rank selection for LoRA, subspace similarity between ranks, and the relationship between adaptation matrices and pre-trained weights. Overall, the document suggests that LoRA presents a competitive alternative to full fine-tuning, offering valuable insights into adapting p

In [26]:
print(len(response.source_nodes))

39


In [27]:
#Let’s ask another question that does not involve the use of the summary tool.
response = query_engine.query("What is the long from of Lora?")
print(str(response))

[1;3;38;5;200mSelecting query engine 1: The question is asking for the long form of Lora, which is specific context related to the Lora paper..
[0mThe long form of LoRA is Local Representation Adaptation.


### Putting It All Together
Now that we have understood this basic pipeline, let’s move ahead into converting this into a pipeline function that we call utilize later.

In [28]:
async def create_router_query_engine(
    document_fp: str,
    verbose: bool = True,
) -> RouterQueryEngine:
    # load lora_paper.pdf documents
    documents = SimpleDirectoryReader(input_files=[document_fp]).load_data()
    
    # chunk_size of 1024 is a good default value
    splitter = SentenceSplitter(chunk_size=1024)
    # Create nodes from documents
    nodes = splitter.get_nodes_from_documents(documents)
    
    # LLM model
    Settings.llm = OpenAI(model="gpt-3.5-turbo")
    # embedding model
    Settings.embed_model = OpenAIEmbedding(model="text-embedding-ada-002")
    
    # summary index
    summary_index = SummaryIndex(nodes)
    # vector store index
    vector_index = VectorStoreIndex(nodes)
    
    # summary query engine
    summary_query_engine = summary_index.as_query_engine(
        response_mode="tree_summarize",
        use_async=True,
    )

    # vector query engine
    vector_query_engine = vector_index.as_query_engine()
    
    summary_tool = QueryEngineTool.from_defaults(
        query_engine=summary_query_engine,
        description=(
            "Useful for summarization questions related to the Lora paper."
        ),
    )

    vector_tool = QueryEngineTool.from_defaults(
        query_engine=vector_query_engine,
        description=(
            "Useful for retrieving specific context from the the Lora paper."
        ),
    )
    
    
    query_engine = RouterQueryEngine(
        selector=LLMSingleSelector.from_defaults(),
        query_engine_tools=[
            summary_tool,
            vector_tool,
        ],
        verbose=verbose
    )
    
    
    return query_engine

In [29]:
query_engine = await create_router_query_engine("./1_Agentic_Rag/basics/data/Lora.pdf")
response = query_engine.query("What is the summary of the document?")
print(str(response))

[1;3;38;5;200mSelecting query engine 0: The question is asking for a summary of the document, which is typically related to summarization questions..
[0mThe document introduces a method called LoRA (Low-Rank Adaptation) that efficiently adapts large language models for specific tasks by incorporating trainable rank decomposition matrices into each layer of the Transformer architecture. This method outperforms or matches traditional fine-tuning approaches on models like RoBERTa, DeBERTa, GPT-2, and GPT-3, while reducing GPU memory requirements and training throughput. The document also explores empirical investigations on rank-deficiency in language model adaptation, providing insights into low-rank matrices, model adaptation, subspace similarity in neural networks, correlation between layers, varying rank parameters, and task-specific directions in model adaptation. Experiments conducted on datasets like E2E NLG Challenge and MNLI under low-data regimes shed light on the performance 

Let’s move on ahead and create a utils.py file and have the following inside of it:



In [33]:
import sys
print(sys.path)

['/teamspace/studios/this_studio', '/home/zeus/miniconda3/envs/cloudspace/lib/python310.zip', '/home/zeus/miniconda3/envs/cloudspace/lib/python3.10', '/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/lib-dynload', '', '/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages']


In [38]:
import os
os.getcwd() 
# from utils import create_router_query_engine
from 1_Agentic_Rag.utils import create_router_query_engine

SyntaxError: invalid decimal literal (2560404057.py, line 4)