In [13]:
%set_env 

UsageError: usage is 'set_env var=val'


In [1]:
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex, SummaryIndex
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core.tools import FunctionTool, QueryEngineTool
from llama_index.core.vector_stores import MetadataFilters, FilterCondition
from typing import List, Optional

In [2]:
def get_doc_tools(
    file_path: str,
    name: str,
) -> str:
    """Get vector query and summary query tools from a document."""
    documents = SimpleDirectoryReader(input_files=[file_path]).load_data()
    splitter = SentenceSplitter(chunk_size=1024)
    nodes = splitter.get_nodes_from_documents(documents)
    vector_index = VectorStoreIndex(nodes)

    def vector_query(
        query: str, 
        page_numbers: Optional[List[str]] = None
    ) -> str:
        """Use to answer questions over a given paper.
    
        Useful if you have specific questions over the paper.
        Always leave page_numbers as None UNLESS there is a specific page you want to search for.
    
        Args:
            query (str): the string query to be embedded.
            page_numbers (Optional[List[str]]): Filter by set of pages. Leave as NONE 
                if we want to perform a vector search
                over all pages. Otherwise, filter by the set of specified pages.
        
        """
        page_numbers = page_numbers or []
        metadata_dicts = [
            {"key": "page_label", "value": p} for p in page_numbers
        ]
        query_engine = vector_index.as_query_engine(
            similarity_top_k=2,
            filters=MetadataFilters.from_dicts(
                metadata_dicts,
                condition=FilterCondition.OR
            )
        )
        response =  query_engine(query)
        return response
    vector_query_tool = FunctionTool.from_defaults(
        name=f"vector_tool_{name}",
        fn=vector_query
    )
    
    summary_index = SummaryIndex(nodes)
    summary_query_engine = summary_index.as_query_engine(
        response_mode="tree_summarize",
        use_async=True,
    )
    summary_tool = QueryEngineTool.from_defaults(
        name=f"summary_tool_{name}",
        query_engine=summary_query_engine,
        description=(
            f"Useful for summarization questions related to {name}"
        ),
    )

    return vector_query_tool, summary_tool

In [4]:
paper_to_tools_dict = {}
papers = [
    "UnionBudget2023-24.pdf",
    "UnionBudget2021-22.pdf",
    "UnionBudget2022-23.pdf",
]

In [5]:
for paper in papers:
    vector_tool,summary_tool= get_doc_tools(paper,paper.split('-')[0])
    paper_to_tools_dict[paper] = [vector_tool,summary_tool]

In [6]:
paper_to_tools_dict

{'UnionBudget2023-24.pdf': [<llama_index.core.tools.function_tool.FunctionTool at 0x308b27090>,
  <llama_index.core.tools.query_engine.QueryEngineTool at 0x3077ac290>],
 'UnionBudget2021-22.pdf': [<llama_index.core.tools.function_tool.FunctionTool at 0x308ba7410>,
  <llama_index.core.tools.query_engine.QueryEngineTool at 0x173064d10>],
 'UnionBudget2022-23.pdf': [<llama_index.core.tools.function_tool.FunctionTool at 0x172f94450>,
  <llama_index.core.tools.query_engine.QueryEngineTool at 0x172f8ee90>]}

In [7]:
initial_tools = [t for paper in papers for t in paper_to_tools_dict[paper]]

In [8]:
from llama_index.llms.openai import OpenAI
llm = OpenAI(model="gpt-3.5-turbo")

In [9]:
from llama_index.core.agent import FunctionCallingAgentWorker
from llama_index.core.agent import AgentRunner

agent_worker = FunctionCallingAgentWorker.from_tools(
    initial_tools,
    llm=llm
)
agent = AgentRunner(agent_worker)

In [10]:
response =  agent.chat("what is AatmaNirbhar Bharat-Production Linked Incentive (PLI) Scheme from UnionBudget2021-22?")
print(response)

  return ToolOutput(
  return ToolOutput(


assistant: The AatmaNirbhar Bharat-Production Linked Incentive (PLI) Scheme is a government initiative aimed at boosting domestic manufacturing in India by providing incentives to companies to increase their production in certain sectors.


In [12]:
response =  agent.chat("what are those certain sectors ?")
print(response)

assistant: The sectors covered under the AatmaNirbhar Bharat-Production Linked Incentive (PLI) Scheme include electronics, pharmaceuticals, automobiles and auto components, telecom and networking products, textile products, food products, high-efficiency solar PV modules, white goods like air conditioners and LED lights, specialty steel, and advanced chemistry cell (ACC) battery.
