In [55]:
from llama_index.core import StorageContext, load_index_from_storage
from constants import embed_model

storage_context = StorageContext.from_defaults(persist_dir="index/")
index = load_index_from_storage(
    storage_context, embed_model=embed_model
)



Loading llama_index.core.storage.kvstore.simple_kvstore from index/docstore.json.
Loading llama_index.core.storage.kvstore.simple_kvstore from index/index_store.json.


In [56]:
from constants import llm_model
from llama_index.core.tools import QueryEngineTool

query_engine = index.as_query_engine(llm=llm_model, similarity_top_k=5)
rag_tool = QueryEngineTool.from_defaults(
    query_engine=query_engine,
    name="research_paper_query_engine_tool",
    description="A RAG engine with recent research papers",
)

In [57]:
from IPython.display import display, Markdown

def display_prompt_dictionary(prompt_dict):
    for key, prompt in prompt_dict.items():
        display(Markdown(f"**Prompt key** {key}"))
        print(prompt.get_template())

In [58]:
prompt_dict = query_engine.get_prompts()
display_prompt_dictionary(prompt_dict)

**Prompt key** response_synthesizer:text_qa_template

Context information is below.
---------------------
{context_str}
---------------------
Given the context information and not prior knowledge, answer the query.
Query: {query_str}
Answer: 


**Prompt key** response_synthesizer:refine_template

The original query is as follows: {query_str}
We have provided an existing answer: {existing_answer}
We have the opportunity to refine the existing answer (only if needed) with some more context below.
------------
{context_msg}
------------
Given the new context, refine the original answer to better answer the query. If the context isn't useful, return the original answer.
Refined Answer: 


In [59]:
from llama_index.core.tools import FunctionTool
from tools import download_pdf, fetch_arxiv_papers

download_pdf_tool = FunctionTool.from_defaults(
    download_pdf,
    name="download_pdf_file_tool",
    description="python function that downloads a pdf file from a url link"
)

fetch_arxiv_tool = FunctionTool.from_defaults(
    fetch_arxiv_papers,
    name="fetch_arxiv_papers_tool",
    description="downloads the {max_results} recent papers regarding the topic {title} from arXiv"
)


In [60]:
from llama_index.core.agent import ReActAgent 

agent = ReActAgent.from_tools(
    tools=[rag_tool, download_pdf_tool, fetch_arxiv_tool],
    llm=llm_model,
    verbose=True,
    max_iterations=6,
    )

In [61]:
query_template = """
I am interested in the topic {title} 
Find papers in your knowlege base relating to this topic.
Use the following template to query research_paper_query_engine_tool tool: 'Provide the summary, title, summary authors, and link to download papers related to {title}.'
If there are no corresponding papers, fetch the recent ones from arXiv.
IMPORTANT: Do not download papers unless the user explicitly asks for it.
"""

In [62]:
answer = agent.chat(query_template.format(title="NLP"))

> Running step 08d72ac4-af15-4687-95a6-3bf2d0691395. Step input: 
I am interested in the topic NLP 
Find papers in your knowlege base relating to this topic.
Use the following template to query research_paper_query_engine_tool tool: 'Provide the summary, title, summary authors, and link to download papers related to NLP.'
If there are no corresponding papers, fetch the recent ones from arXiv.
IMPORTANT: Do not download papers unless the user explicitly asks for it.

[1;3;38;5;200mThought: The current language of the user is English. I need to use a tool to help me find papers related to NLP in my knowledge base.
Action: research_paper_query_engine_tool
Action Input: {'input': 'Provide the summary, title, summary authors, and link to download papers related to NLP.'}
[0m[1;3;34mObservation: Empty Response
[0m> Running step 1298b3f3-de41-4b87-b078-7140c5bd4d4e. Step input: None
[1;3;38;5;200mThought: Since there was no response from the research paper query engine tool, I will fetch

In [63]:
Markdown(answer.response)

1. **Title**: Reassessing Code Authorship Attribution in the Era of Language Models
   - **Authors**: Atish Kumar Dipongkor, Ziyu Yao, Kevin Moran
   - **Summary**: This paper explores Code Authorship Attribution (CAA) using transformer-based Language Models (LMs) on diverse datasets. It highlights the challenges and effectiveness of LMs in understanding stylometric code patterns.
   - **Link**: [PDF](http://arxiv.org/pdf/2506.17120v1)

2. **Title**: eSapiens: A Real-World NLP Framework for Multimodal Document Understanding and Enterprise Knowledge Processing
   - **Authors**: Isaac Shi, Zeyuan Li, Wenli Wang, Lewei He, Yang Yang, Tianyu Shi
   - **Summary**: eSapiens is a question-answering system for enterprise settings, integrating structured databases and unstructured textual corpora. It outperforms baselines in contextual relevance and generation quality.
   - **Link**: [PDF](http://arxiv.org/pdf/2506.16768v1)

3. **Title**: Initial Investigation of LLM-Assisted Development of Rule-Based Clinical NLP System
   - **Authors**: Jianlin Shi, Brian T. Bucher
   - **Summary**: This study proposes using LLMs in developing rule-based NLP systems for clinical settings, demonstrating high recall in identifying relevant text snippets and extracting key terms.
   - **Link**: [PDF](http://arxiv.org/pdf/2506.16628v1)

4. **Title**: Modeling Public Perceptions of Science in Media
   - **Authors**: Jiaxin Pei, Dustin Wright, Isabelle Augenstin, David Jurgens
   - **Summary**: The paper introduces a framework for modeling public perception of science in media, using NLP models to predict public engagement with scientific content.
   - **Link**: [PDF](http://arxiv.org/pdf/2506.16622v1)

5. **Title**: Do We Talk to Robots Like Therapists, and Do They Respond Accordingly? Language Alignment in AI Emotional Support
   - **Authors**: Sophie Chiang, Guy Laban, Hatice Gunes
   - **Summary**: This study examines the thematic alignment between robot and human therapist interactions, finding strong semantic overlap in subjects' disclosures and responses.
   - **Link**: [PDF](http://arxiv.org/pdf/2506.16473v1)

If you need more information or wish to download any of these papers, please let me know!

In [None]:
download_query_template = """Download the following papers:
for each paper:
1. Process one paper at a time
2. State which paper number you are processing out of the total number of papers
3. Complete a full download cycle before moving to the next paper
4. Explicitely state when moving to the next paper
5. Provide a final summary only when all papers are downloaded
"""

In [None]:
try:
    agent.chat(download_query_template)
except Exception as e:
    if "max iterations" in str(e).lower():
        print("Error: Maximum number of iterations reached.")
    else:
        raise