In [1]:
import os
import sys
from dotenv import load_dotenv
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain.agents import create_agent
from pathlib import Path

load_dotenv()
api_key=os.environ.get("GOOGLE_API_KEY")

model = ChatGoogleGenerativeAI(model="gemini-2.5-flash")
embeddings = GoogleGenerativeAIEmbeddings(model="models/text-embedding-004")

In [2]:
root_path = str(Path(os.getcwd()).resolve().parent) 
if root_path not in sys.path:
    sys.path.append(root_path)

In [3]:
# Create the agent
from tools.retrieve_and_reply import fast_search_engine
from langchain.agents import create_agent

tools = [fast_search_engine]
# If desired, specify custom instructions
prompt = (
    "You have access to a tool that retrieves context from the financial report. "
    "Use the tool to help answer user queries."
)
agent = create_agent(model, tools, system_prompt=prompt)

In [4]:
query = (
    "What is the net income of GOOG in three months ending 2025 Q3? Find it in the income statement."
)

for event in agent.stream(
    {"messages": [{"role": "user", "content": query}]},
    stream_mode="values",
):
    event["messages"][-1].pretty_print()


What is the net income of GOOG in three months ending 2025 Q3? Find it in the income statement.
Tool Calls:
  fast_search_engine (5310cd28-a306-4d8b-8dcc-c4c73cff5e57)
 Call ID: 5310cd28-a306-4d8b-8dcc-c4c73cff5e57
  Args:
    query: net income of GOOG in three months ending 2025 Q3
Name: fast_search_engine

The specific net income value for GOOG in the three months ending September 30, 2025, is not available in the provided context. The context indicates that a condensed consolidated statement of operations exists which details net income for that period, but the actual financial figure is not presented.

[{'type': 'text', 'text': "I'm sorry, but I couldn't find the specific net income value for GOOG in the three months ending 2025 Q3 within the financial report. While the report mentions a condensed consolidated statement of operations that includes net income for that period, the actual financial figure is not presented in the information I have access to.", 'extras': {'signature':

In [None]:
import json
from pathlib import Path

# 指向你的存储目录
persist_dir = "../chroma_db"
docstore_path = Path(persist_dir) / "docstore.json"

with open(docstore_path, "r", encoding="utf-8") as f:
    data = json.load(f)

# LlamaIndex 的 docstore 结构通常是 data['docstore/data'][node_id]
nodes_data = data.get("docstore/data", {})

print(f"总共有 {len(nodes_data)} 个节点。正在寻找表格节点...\n")



In [None]:
for node_id, node_dict in nodes_data.items():
    # node_dict 里的 '__obj__' 包含实际内容
    obj = node_dict.get("__obj__", {})
    metadata = obj.get("metadata", {})
    
    # 寻找包含 table_df 或 table_output 的节点
    if "table_df" in metadata or "table_output" in metadata:
        print(f"--- 发现表格节点 (ID: {node_id}) ---")
        print(f"1. 自定义元数据 (你的公司名等):")
        # 打印除了大表格以外的其他元数据，方便观察
        for k, v in metadata.items():
            if k not in ['table_df', 'table_output']:
                print(f"   - {k}: {v}")
        
        print(f"\n2. 表格内容预览 (table_output):")
        print(metadata.get("table_output", "无文本源码")[:200] + "...")
        
        print(f"\n3. 结构化对象状态 (table_df):")
        if "table_df" in metadata:
            print("   - 状态: 已存在 (通常在 JSON 中会序列化为一种特殊格式)")
        
        # 我们只看第一个例子
        break 

In [None]:
for node_with_score in response.source_nodes:
    metadata = node_with_score.node.metadata
    
    # 检查节点元数据中是否有 Docling/MarkdownElementParser 生成的 table_df
    if "table_df" in metadata:
        found_df = metadata["table_df"]
        print("Found the table in metadata!")
        break
    elif "table_output" in metadata:
        print("Find the table output")
    else:
        print("Find nothing")
    

Now that we have a basic Agent, we are shifting focus to two pillars: Memory Management and Context Engineering. The former ensures the Agent remembers the user's conversation history, while the latter refines the input context, providing the Agent with the necessary data to deliver precise and informed responses.

In [16]:
from langchain.agents import create_agent
from langchain.agents.middleware import SummarizationMiddleware
from langgraph.checkpoint.memory import InMemorySaver
from langchain_core.runnables import RunnableConfig

checkpointer = InMemorySaver()

agent_memory = create_agent(
    model, 
    tools,
    middleware=[
        SummarizationMiddleware(
            model,
            trigger=("tokens", 4000),
            keep=("messages", 20)
        )
    ],
    checkpointer=checkpointer,
)

config: RunnableConfig = {"configurable": {"thread_id": "2"}}

In [17]:
queries = [
    "hi, my name is bob",
    "What is the R&D expense of Alphabet in three months ended 2025Q3?",
    "How about the share repurchase?",
    "What did I asked about Google in the last 2 conversations?"
]

responses = []

for q in queries:
    print(f"\n--- The question is: {q} ---")
    res = agent_memory.invoke({"messages": q}, config)
    responses.append(res)
    res["messages"][-1].pretty_print()


--- The question is: hi, my name is bob ---

Hello Bob, how can I assist you today?

--- The question is: What is the R&D expense of Alphabet in three months ended 2025Q3? ---

The R&D expense of Alphabet for the three months ended 2025Q3 was $15,151 million.

--- The question is: How about the share repurchase? ---

Alphabet repurchased $11.6 billion of its Class A and Class C shares in the three months ended September 30, 2025.

--- The question is: What did I asked about Google in the last 2 conversations? ---

[{'type': 'text', 'text': "In the last two conversations, you asked about the R&D expense and the share repurchase of Alphabet (Google's parent company) for the three months ended 2025Q3.", 'extras': {'signature': 'Cv0HAXLI2nzUrp6aNBOOBSRAFF20HeyrIQzgubws4qhiIzjS5/swJP8uNkBqTHnC27iqIuQ2262nGhA9BbGv/pkNGWx4L2Nyn5Tw8rrl2l4sLoExLWmBmbGvyznUY5J/IWcsy2iAFFrPTKhgslL9tD64dzI7b/oX64cfElwa2LPR0hremplCP/vz87qjx2+efktiItLPDv8iHNcVscNLsF+2Dz1XvAlh9VT04/twyYbmc0rt5Mft6g2L8kLbYCUQXbd0zWXV