In [9]:
import nest_asyncio
nest_asyncio.apply()

In [10]:
from llama_index.core import StorageContext, load_index_from_storage
from constants import embed_model 

# 1. Khôi phục lại "Bộ nhớ" (Index) đã lưu từ ổ cứng
# StorageContext giúp định vị nơi chứa dữ liệu (folder "index/")
storage_context = StorageContext.from_defaults(persist_dir="index/")

# 2. Tải Index lên bộ nhớ (RAM)
# Cần truyền đúng embed_model đã dùng lúc tạo index (Gemini Embedding)
index = load_index_from_storage(
    storage_context=storage_context,
    embed_model=embed_model
)

In [11]:
from llama_index.core.tools import QueryEngineTool 
from constants import llm_model

# 3. Tạo "Động cơ tìm kiếm" (Query Engine)
# Đây là bộ máy giúp tra cứu thông tin trong Index và dùng LLM để tổng hợp câu trả lời
query_engine = index.as_query_engine(
    llm=llm_model,      # Dùng model Gemini 2.5 Flash để suy luận
    similarity_top_k=5  # Lấy 5 đoạn văn bản liên quan nhất để đọc mỗi lần hỏi
)

# 4. Đóng gói thành Công cụ (Tool) cho Agent
# Agent sẽ dùng tool này khi cần tra cứu các bài báo khoa học
rag_engine = QueryEngineTool.from_defaults(
    query_engine,
    name="research_paper_query_engine_tool", # Tên định danh của tool
    description="A RAG engine with recent research papers.", # Mô tả giúp Agent hiểu công dụng
)

In [12]:
from IPython.display import display, Markdown   

# Hàm hỗ trợ hiển thị (Helper function)
# Dùng để in các Prompt (câu lệnh ngầm) ra màn hình với định dạng Markdown đẹp mắt
def display_prompt_dict(prompt_dict):
    for key, prompt in prompt_dict.items():
        display(Markdown(f"**Prompt key:** {key}")) # Tên loại prompt
        print(prompt.get_template())                # Nội dung template của prompt

In [13]:
# 5. Xem "Nội tạng" các Prompt mặc định
# Lệnh này giúp bạn kiểm tra xem LlamaIndex đang dùng câu lệnh gì để giao tiếp với LLM
prompts_dict = query_engine.get_prompts()
display_prompt_dict(prompts_dict)

**Prompt key:** response_synthesizer:text_qa_template

Context information is below.
---------------------
{context_str}
---------------------
Given the context information and not prior knowledge, answer the query.
Query: {query_str}
Answer: 


**Prompt key:** response_synthesizer:refine_template

The original query is as follows: {query_str}
We have provided an existing answer: {existing_answer}
We have the opportunity to refine the existing answer (only if needed) with some more context below.
------------
{context_msg}
------------
Given the new context, refine the original answer to better answer the query. If the context isn't useful, return the original answer.
Refined Answer: 


In [14]:
from tools import download_pdf, fetch_arxiv_papers
from llama_index.core.tools import FunctionTool

# 6. Chuyển đổi hàm Python thường thành Tools cho Agent
# FunctionTool giúp bọc các hàm mình tự viết lại để Agent có thể hiểu và gọi được

# Tool giúp download file PDF từ link
download_pdf_tool = FunctionTool.from_defaults(
    download_pdf,
    name="download_pdf_file_tool",
    description="python function that downloads a pdf file by link",
)

# Tool giúp tìm kiếm các bài báo mới trên Arxiv
fetch_arxiv_papers_tool = FunctionTool.from_defaults(
    fetch_arxiv_papers,
    name="fetch_from_arxiv",
    description="download the {max_results} recent research papers regarding the topic {title} from arXiv"
)

In [15]:
from llama_index.core.agent import ReActAgent
from llama_index.core.memory import ChatMemoryBuffer
from llama_index.llms.gemini import Gemini
from constants import GOOGLE_API_KEY

# Cập nhật lại LLM với giới hạn token lớn hơn để tránh lỗi "Response terminated early"
llm_model = Gemini(
    api_key=GOOGLE_API_KEY,
    model_name="models/gemini-2.5-flash",
    max_tokens=8192, # Tăng lên để Agent có thể trả lời câu hỏi dài
    # transport="rest" # Tạm bỏ REST để fix lỗi await
)

# 7. Tạo bộ nhớ đệm (Chat Memory)
# Vì Agent Workflow mặc định là "không trạng thái" (stateless), ta cần tự tạo bộ nhớ 
# để lưu lịch sử hội thoại, giúp Agent nhớ được các câu hỏi trước đó.
memory = ChatMemoryBuffer.from_defaults(token_limit=20000)

# 8. Khởi tạo Agent ReAct (Reasoning + Acting)
# Đây là "bộ não" chính, biết suy luận và quyết định dùng công cụ nào
agent = ReActAgent(
    tools=[download_pdf_tool, rag_engine, fetch_arxiv_papers_tool], # Danh sách "vũ khí" của Agent
    llm=llm_model,     # Mô hình ngôn ngữ dùng để suy nghĩ
    verbose=True,      # In ra quá trình suy nghĩ (Thinking process) để dễ debug
    streaming=False    # Tắt streaming để tránh lỗi "IndexError: list index out of range" với Gemini khi gọi tool
)

  llm_model = Gemini(


In [16]:
# 9. CHẠY THỬ CHẾ ĐỘ CHATBOT (Fix lỗi Loop)
# Để chatbot hoạt động ổn định trong Notebook, ta khởi tạo lại LLM/Agent ngay tại thời điểm chạy
# giúp nó bám vào đúng Event Loop hiện tại.

# Tạo lại LLM & Agent cục bộ
local_llm = Gemini(
    api_key=GOOGLE_API_KEY,
    model_name="models/gemini-2.5-flash",
    max_tokens=8192
)

local_agent = ReActAgent(
    tools=[download_pdf_tool, rag_engine, fetch_arxiv_papers_tool],
    llm=local_llm,
    verbose=True,
    memory=memory # Vẫn dùng chung bộ nhớ global để nhớ lịch sử
)

# Chạy chat
response = await local_agent.run(
    user_msg="Summarize the paper about GLM-130B", 
    max_iterations=10
)
print(response)

  local_llm = Gemini(


The paper "Evaluating Accounting Reasoning Capabilities of Large Language Models" discusses the integration of large language models into professional domains like accounting. It defines vertical domain accounting reasoning and proposes evaluation criteria. The paper evaluates several models, including GLM-130B (along with GLM-6B, GLM-4, and OpenAI GPT-4), on accounting reasoning tasks. The findings indicate that prompt design significantly impacts performance, with GPT-4 showing the strongest capabilities. However, the paper concludes that current models, including GLM-130B, are not yet sufficient for real-world enterprise accounting and require further optimization.


In [17]:
# 10. Định nghĩa Template cho câu hỏi phức tạp hơn
# Hướng dẫn Agent cụ thể: Nếu tìm trong database có thì lấy, không có thì lên Arxiv tải về
query_template = """ I am instered in {topic}.
 Find papers in your knowledge database related to this topic.
 Use the following template to query research_paper_query_engine_tool tool: 'Provide title,summary,authors and link to download for paper related to {topic}'. 
 If there are not, could you fetch the recent one from arXiv?
 IMPORTANT: do not download papers unless the user asks for it explitcily.
 """

In [18]:
# 11. Thử nghiệm với chủ đề "Visual Generation..." (Fix lỗi Loop bằng cách tạo lại agent)
local_llm_visual = Gemini(
    api_key=GOOGLE_API_KEY,
    model_name="models/gemini-2.5-flash",
    max_tokens=8192
)

# Tạo Agent mới cho mỗi lần chạy để đảm bảo loop sạch
local_agent_visual = ReActAgent(
    tools=[download_pdf_tool, rag_engine, fetch_arxiv_papers_tool],
    llm=local_llm_visual,
    verbose=True,
    memory=memory
)

answer = await local_agent_visual.run(
    user_msg=query_template.format(topic="Visual Generation Unlocks Human-Like Reasoning through Multimodal World Models"),
    max_iterations=10
)
print(answer)

  local_llm_visual = Gemini(


Title: Visual Generation Unlocks Human-Like Reasoning through Multimodal World Models
Authors: Jialong Wu, Xiaoying Zhang, Hongyi Yuan, Xiangcheng Zhang, Tianhao Huang, Changjing He, Chaoyi Deng, Renrui Zhang, Youbin Wu, Mingsheng Long
Summary: Humans construct internal world models and reason by manipulating the concepts within these models. Recent advances in AI, particularly chain-of-thought (CoT) reasoning, approximate such human cognitive abilities, where world models are believed to be embedded within large language models. Expert-level performance in formal and abstract domains such as mathematics and programming has been achieved in current systems by relying predominantly on verbal reasoning. However, they still lag far behind humans in domains like physical and spatial intelligence, which require richer representations and prior knowledge. The emergence of unified multimodal models (UMMs) capable of both verbal and visual generation has therefore sparked interest in more huma

In [19]:
from IPython.display import display, Markdown

# Hiển thị câu trả lời dưới dạng Markdown cho dễ đọc
display(Markdown(str(answer)))

Title: Visual Generation Unlocks Human-Like Reasoning through Multimodal World Models
Authors: Jialong Wu, Xiaoying Zhang, Hongyi Yuan, Xiangcheng Zhang, Tianhao Huang, Changjing He, Chaoyi Deng, Renrui Zhang, Youbin Wu, Mingsheng Long
Summary: Humans construct internal world models and reason by manipulating the concepts within these models. Recent advances in AI, particularly chain-of-thought (CoT) reasoning, approximate such human cognitive abilities, where world models are believed to be embedded within large language models. Expert-level performance in formal and abstract domains such as mathematics and programming has been achieved in current systems by relying predominantly on verbal reasoning. However, they still lag far behind humans in domains like physical and spatial intelligence, which require richer representations and prior knowledge. The emergence of unified multimodal models (UMMs) capable of both verbal and visual generation has therefore sparked interest in more human-like reasoning grounded in complementary multimodal pathways, though their benefits remain unclear. From a world-model perspective, this paper presents the first principled study of when and how visual generation benefits reasoning. Our key position is the visual superiority hypothesis: for certain tasks--particularly those grounded in the physical world--visual generation more naturally serves as world models, whereas purely verbal world models encounter bottlenecks arising from representational limitations or insufficient prior knowledge. Theoretically, we formalize internal world modeling as a core component of CoT reasoning and analyze distinctions among different forms of world models. Empirically, we identify tasks that necessitate interleaved visual-verbal CoT reasoning, constructing a new evaluation suite, VisWorld-Eval. Controlled experiments on a state-of-the-art UMM show that interleaved CoT significantly outperforms purely verbal CoT on tasks that favor visual world modeling, but offers no clear advantage otherwise. Together, this work clarifies the potential of multimodal world modeling for more powerful, human-like multimodal AI.
Link to download: https://arxiv.org/pdf/2601.19834v1

In [20]:
# 12. Thử nghiệm tiếp với chủ đề "EgoHandICL..."
# Tiếp tục quy trình tìm kiếm và tổng hợp thông tin
answer = await agent.run(
    user_msg=query_template.format(topic="EgoHandICL: Egocentric 3D Hand Reconstruction with In-Context Learning"),
    max_iterations=10,
    memory=memory
)
print(answer)

I found one paper related to "EgoHandICL: Egocentric 3D Hand Reconstruction with In-Context Learning" from arXiv:

**Title:** EgoHandICL: Egocentric 3D Hand Reconstruction with In-Context Learning
**Authors:** Binzhu Xie, Shi Qiu, Sicheng Zhang, Yinqiao Wang, Hao Xu, Muzammal Naseer, Chi-Wing Fu, Pheng-Ann Heng
**Summary:** Robust 3D hand reconstruction in egocentric vision is challenging due to depth ambiguity, self-occlusion, and complex hand-object interactions. Prior methods mitigate these issues by scaling training data or adding auxiliary cues, but they often struggle in unseen contexts. We present EgoHandICL, the first in-context learning (ICL) framework for 3D hand reconstruction that improves semantic alignment, visual consistency, and robustness under challenging egocentric conditions. EgoHandICL introduces complementary exemplar retrieval guided by vision-language models (VLMs), an ICL-tailored tokenizer for multimodal context, and a masked autoencoder (MAE)-based architectu

In [21]:
# Hiển thị kết quả lần 2
display(Markdown(str(answer)))

I found one paper related to "EgoHandICL: Egocentric 3D Hand Reconstruction with In-Context Learning" from arXiv:

**Title:** EgoHandICL: Egocentric 3D Hand Reconstruction with In-Context Learning
**Authors:** Binzhu Xie, Shi Qiu, Sicheng Zhang, Yinqiao Wang, Hao Xu, Muzammal Naseer, Chi-Wing Fu, Pheng-Ann Heng
**Summary:** Robust 3D hand reconstruction in egocentric vision is challenging due to depth ambiguity, self-occlusion, and complex hand-object interactions. Prior methods mitigate these issues by scaling training data or adding auxiliary cues, but they often struggle in unseen contexts. We present EgoHandICL, the first in-context learning (ICL) framework for 3D hand reconstruction that improves semantic alignment, visual consistency, and robustness under challenging egocentric conditions. EgoHandICL introduces complementary exemplar retrieval guided by vision-language models (VLMs), an ICL-tailored tokenizer for multimodal context, and a masked autoencoder (MAE)-based architecture trained with hand-guided geometric and perceptual objectives. Experiments on ARCTIC and EgoExo4D show consistent gains over state-of-the-art methods. We also demonstrate real-world generalization and improve EgoVLM hand-object interaction reasoning by using reconstructed hands as visual prompts.
**Link to arXiv:** http://arxiv.org/abs/2601.19850v1

In [22]:
# 13. Kiểm tra khả năng ghi nhớ và hành động
# Yêu cầu tải tất cả các file PDF đã tìm được ở các bước trước.
# Nhờ có tham số `memory=memory`, Agent sẽ nhớ được các bài báo đã thảo luận.
answer = await agent.run(
    user_msg="Download all the papers you mentioned in previous turns.",
    max_iterations=10,
    memory=memory
)
print(answer)

The paper "EgoHandICL: Egocentric 3D Hand Reconstruction with In-Context Learning" has been successfully downloaded and saved as `papers\EgoHandICL_Egocentric_3D_Hand_Reconstruction_with_In-Context_Learning.pdf`.


In [23]:
# 14. Thử nghiệm thêm chủ đề "Quantum Computing"
# Agent tiếp tục quy trình: Search -> Fetch (nếu cần) -> Answer
answer = await agent.run(
    user_msg=query_template.format(topic="Quantum Computing"),
    max_iterations=10,
    memory=memory
)
print(answer)

I found the following recent papers related to "Quantum Computing" from arXiv:

**1. Title:** Quantum Memory and Autonomous Computation in Two Dimensions
**Authors:** Gesa Dünnweber, Georgios Styliaris, Rahul Trivedi
**Summary:** Standard approaches to quantum error correction (QEC) require active maintenance using measurements and classical processing. The possibility of passive QEC has so far only been established in an unphysical number of spatial dimensions. In this work, we present a simple method for autonomous QEC in two spatial dimensions, formulated as a quantum cellular automaton with a fixed, local and translation-invariant update rule. The construction uses hierarchical, self-simulating control elements based on the classical schemes from the seminal results of Gács (1986, 1989) together with a measurement-free concatenated code. We analyze the system under a local noise model and prove a noise threshold below which the logical errors are suppressed arbitrarily with increas

In [24]:
# 15. Hiển thị kết quả tìm kiếm cho chủ đề Quantum Computing
display(Markdown(str(answer)))

I found the following recent papers related to "Quantum Computing" from arXiv:

**1. Title:** Quantum Memory and Autonomous Computation in Two Dimensions
**Authors:** Gesa Dünnweber, Georgios Styliaris, Rahul Trivedi
**Summary:** Standard approaches to quantum error correction (QEC) require active maintenance using measurements and classical processing. The possibility of passive QEC has so far only been established in an unphysical number of spatial dimensions. In this work, we present a simple method for autonomous QEC in two spatial dimensions, formulated as a quantum cellular automaton with a fixed, local and translation-invariant update rule. The construction uses hierarchical, self-simulating control elements based on the classical schemes from the seminal results of Gács (1986, 1989) together with a measurement-free concatenated code. We analyze the system under a local noise model and prove a noise threshold below which the logical errors are suppressed arbitrarily with increasing system size and the memory lifetime diverges in the thermodynamic limit. The scheme admits a continuous-time implementation as a time-independent, translation-invariant local Lindbladian with engineered dissipative jump operators. Further, the recursive nature of our protocol allows for the fault-tolerant encoding of arbitrary quantum circuits and thus constitutes a self-correcting universal quantum computer.
**Link to download:** https://arxiv.org/pdf/2601.20818v1

**2. Title:** Echo Cross Resonance gate error budgeting on a superconducting quantum processor
**Authors:** Travers Ward, Russell P. Rundle, Richard Bounds, Norbert Deak, Gavin Dold, Apoorva Hegde, William Howard, Ailsa Keyser, George B. Long, Benjamin Rogers, Jonathan J. Burnett, Bryn A. Bell
**Summary:** High fidelity quantum operations are key to enabling fault-tolerant quantum computation. Superconducting quantum processors have demonstrated high-fidelity operations, but on larger devices there is commonly a broad distribution of qualities, with the low-performing tail affecting near-term performance and applications. Here we present an error budgeting procedure for the native two-qubit operation on a 32-qubit superconducting-qubit-based quantum computer, the OQC Toshiko gen-1 system. We estimate the prevalence of different forms of error such as coherent error and control qubit leakage, then apply error suppression strategies based on the most significant sources of error, making use of pulse-shaping and additional compensating gates. These techniques require no additional hardware overhead and little additional calibration, making them suitable for routine adoption. An average reduction of 3.7x in error rate for two qubit operations is shown across a chain of 16 qubits, with the median error rate improving from 4.6$\%$ to 1.2$\%$ as measured by interleaved randomized benchmarking. The largest improvements are seen on previously under-performing qubit pairs, demonstrating the importance of practical error suppression in reducing the low-performing tail of gate qualities and achieving consistently good performance across a device.
**Link to download:** https://arxiv.org/pdf/2601.20458v1

**3. Title:** Scalable Multi-QPU Circuit Design for Dicke State Preparation: Optimizing Communication Complexity and Local Circuit Costs
**Authors:** Ziheng Chen, Junhong Nie, Xiaoming Sun, Jialin Zhang, Jiadong Zhu
**Summary:** Preparing large-qubit Dicke states is of broad interest in quantum computing and quantum metrology. However, the number of qubits available on a single quantum processing unit (QPU) is limited -- motivating the distributed preparation of such states across multiple QPUs as a practical approach to scalability. In this article, we investigate the distributed preparation of $n$-qubit $k$-excitation Dicke states $D(n,k)$ across a general number $p$ of QPUs, presenting a distributed quantum circuit (each QPU hosting approximately $\\lceil n/p \\rceil$ qubits) that prepares the state with communication complexity $O(p \\log k)$, circuit size $O(nk)$, and circuit depth $O\\left(p^2 k + \\log k \\log (n/k)\\right)$. To the best of our knowledge, this is the first construction to simultaneously achieve logarithmic communication complexity and polynomial circuit size and depth. We also establish a lower bound on the communication complexity of $p$-QPU distributed state preparation for a general target state. This lower bound is formulated in terms of the canonical polyadic rank (CP-rank) of a tensor associated with the target state. For the special case $p = 2$, we explicitly compute the CP-rank corresponding to the Dicke state $D(n,k)$ and derive a lower bound of $\\lceil\\log (k + 1)\\rceil$, which shows that the communication complexity of our construction matches this fundamental limit.
**Link to download:** https://arxiv.org/pdf/2601.20393v1

**4. Title:** A Quantum Photonic Approach to Graph Coloring
**Authors:** Jesua Epequin, Pascale Bendotti, Joseph Mikael
**Summary:** Gaussian Boson Sampling (GBS) is a quantum computational model that leverages linear optics to solve sampling problems believed to be classically intractable. Recent experimental breakthroughs have demonstrated quantum advantage using GBS, motivating its application to real-world combinatorial optimization problems. In this work, we reformulate the graph coloring problem as an integer programming problem using the independent set formulation. This enables the use of GBS to identify cliques in the complement graph, which correspond to independent sets in the original graph. Our method is benchmarked against classical heuristics and exact algorithms on two sets of instances: Erdős-Rényi random graphs and graphs derived from a smart-charging use case. The results demonstrate that GBS can provide competitive solutions, highlighting its potential as a quantum-enhanced heuristic for graph-based optimization.
**Link to download:** https://arxiv.org/pdf/2601.20263v1

**5. Title:** Quantum capacitance and parity switching of a quantum-dot-based Kitaev chain
**Authors:** Chun-Xiao Liu
**Summary:** An array of quantum dots coupled via superconductivity provides a new platform for creating Kitaev chains with Majorana zero modes, offering a promising avenue toward topological quantum computing. In this work, we theoretically study the quantum capacitance of a minimal Kitaev chain weakly coupled to an external normal lead. We find that in the open regime, charge stability diagrams of quantum capcaitance can help to identify the sweet spot of a Kitaev chain, consistent with tunnel spectroscopy. Moreover, the quantum capacitance of a single quantum dot coupled to Andreev bound states reveals the interplay between two distinct parity switching mechanisms: coupling to an external normal lead and intrinsic quasiparticle poisoning. Our work provides useful physical insights into the quantum capacitance and parity dynamics in a quantum-dot-based Kitaev chain device.
**Link to download:** https://arxiv.org/pdf/2601.20252v1

In [25]:
# 16. Yêu cầu tải hàng loạt với hướng dẫn chi tiết (Chain of Thought)
# Prompt này yêu cầu Agent xử lý từng bước (step-by-step) để tránh bị quá tải hoặc bỏ sót file
answer = await agent.run(
    user_msg="""Download the following papers:
    For each paper: 
    1. Process one paper at a time 
    2. State which paper number you are processing out of the total 
    3. Complete a full download cycle before moving to the next paper 
    4. Explicitly state when moving to the next paper 
    5. Provide a final summary only after all papers are download """,
    max_iterations=10,
    memory=memory
)
print(answer)

All 5 papers have been successfully downloaded. Here is a summary of the downloaded papers:

1.  **Quantum Memory and Autonomous Computation in Two Dimensions**
    *   **File Name:** `papers\Quantum_Memory_and_Autonomous_Computation_in_Two_Dimensions.pdf`
    *   **Summary:** This paper presents a method for autonomous quantum error correction in two spatial dimensions using a quantum cellular automaton. It aims to achieve passive QEC, unlike standard approaches requiring active maintenance, and proves a noise threshold for suppressing logical errors.

2.  **Echo Cross Resonance gate error budgeting on a superconducting quantum processor**
    *   **File Name:** `papers\Echo_Cross_Resonance_gate_error_budgeting_on_a_superconducting_quantum_processor.pdf`
    *   **Summary:** This work details an error budgeting procedure for two-qubit operations on a 32-qubit superconducting quantum computer. It identifies error sources and applies suppression strategies, resulting in a significant re