In [None]:
import nest_asyncio

# 需要同步jupyter运行异步事件循环
nest_asyncio.apply()

In [None]:
from app.enums.embedding import EmbeddingDriverEnum
from app.rag.embedding.embeeding_model import EmbeddingFactory

EmbeddingFactory.init(
    {
        "provider": "huggingface",
        "model": "BAAI/bge-m3",
        "driver": EmbeddingDriverEnum.MAC,
    }
)

In [None]:
import re
from typing import Literal

import orjson
import rich
from chromadb.config import Settings as ChromaSettings
from langchain_community.document_loaders import PlaywrightURLLoader, WebBaseLoader
from langchain_community.tools.ddg_search.tool import DuckDuckGoSearchResults
from langchain_community.vectorstores import Chroma
from langchain_core.documents import Document
from langchain_core.tools import tool
from langchain_text_splitters import RecursiveCharacterTextSplitter

from app.rag.llm.tokenizers import TokenCounter

web_loader_type: Literal["playwright", "webbase"] = "webbase"


def get_web_loader(web_loader_type: Literal["playwright", "webbase"], urls: list[str]) -> WebBaseLoader:
    match web_loader_type:
        case "playwright":
            return PlaywrightURLLoader(urls, headless=True)
        case "webbase":
            return WebBaseLoader(urls)
        case _:
            raise ValueError(f"Invalid web loader type: {web_loader_type}")


def get_num_tokens(docs: list[Document]) -> int:
    return sum(TokenCounter.estimate_tokens(doc.page_content) for doc in docs)


def clean_text(text: str) -> str:
    # 替换形如 "\n \n \n" 的混合空行为单个 \n
    RE_MIXED_NEWLINES = re.compile(r"(?:\s*\n\s*){2,}")
    RE_SPACES = re.compile(r"[ \t]+")

    text = text.strip()
    text = RE_MIXED_NEWLINES.sub("\n", text)  # 替换混合换行
    text = RE_SPACES.sub(" ", text)  # 合并空格和 tab
    return text


@tool
async def duck_search(query: str) -> str:
    """Search the web for information."""
    search_results = await DuckDuckGoSearchResults(output_format="json", max_results=10).arun(query)
    if not search_results:
        return "No search results found"
    search_results = orjson.loads(search_results)
    web_loader = get_web_loader(web_loader_type, [x["link"] for x in search_results])
    docs: list[Document] = []
    async for doc in web_loader.alazy_load():
        docs.append(doc)
    docs = [Document(page_content=clean_text(doc.page_content), metadata=doc.metadata) for doc in docs]
    docs = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200).split_documents(docs)
    embedding = EmbeddingFactory.get()
    vectorstore = Chroma.from_documents(
        docs, embedding=embedding, client_settings=ChromaSettings(anonymized_telemetry=False)
    )
    retrieved_docs = vectorstore.similarity_search(query, k=10)
    rich.print(retrieved_docs)
    return search_results


await duck_search.ainvoke("What is the capital of France?")

In [None]:
import asyncio

import aiohttp
from langchain_core.tools import tool
from langgraph.graph import END, StateGraph

# --------------------------
# Step 1: 定义异步 Tool
# --------------------------


@tool
async def get_ip() -> str:
    """获取公网 IP"""
    async with aiohttp.ClientSession() as session:
        async with session.get("https://api.ipify.org?format=json") as resp:
            data = await resp.json()
            return f"Your IP is {data['ip']}"


# --------------------------
# Step 2: 节点函数中调用异步 Tool
# --------------------------


async def tool_node(state: dict) -> dict:
    print("[tool_node] Running async tool...")
    result = await get_ip.arun("What's my IP?")
    print(f"[tool_node] Tool result: {result}")
    state["ip_result"] = result
    return state


# --------------------------
# Step 3: 构建异步 LangGraph
# --------------------------


def build_graph():
    builder = StateGraph(dict)
    builder.add_node("get_ip", tool_node)
    builder.set_entry_point("get_ip")
    builder.add_edge("get_ip", END)
    return builder.compile()


# --------------------------
# Step 4: 异步运行图
# --------------------------


async def main():
    graph = build_graph()
    final_state = await graph.ainvoke({})
    print("[main] Final state:", final_state)


asyncio.run(main())