In [28]:
#!/usr/bin/env python
# coding: utf-8

# ## Step 1: 环境准备与配置

import os
from dotenv import load_dotenv
load_dotenv()  # 自动加载 .env 文件中配置的环境变量

# 验证环境变量是否正确加载（可选）
print("OPENAI_API_KEY:", os.getenv("OPENAI_API_KEY"))
print("LANGCHAIN_TRACING_V2:", os.getenv("LANGCHAIN_TRACING_V2"))
print("LANGCHAIN_ENDPOINT:", os.getenv("LANGCHAIN_ENDPOINT"))
print("LANGCHAIN_API_KEY:", os.getenv("LANGCHAIN_API_KEY"))
print("LANGCHAIN_PROJECT:", os.getenv("LANGCHAIN_PROJECT"))

from langchain_openai import ChatOpenAI
from langchain.embeddings import OpenAIEmbeddings
from langchain_core.vectorstores import InMemoryVectorStore
from langchain_core.documents import Document
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain import hub
from typing_extensions import List, TypedDict
from langgraph.graph import START, StateGraph

OPENAI_API_KEY: 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
LANGCHAIN_TRACING_V2: true
LANGCHAIN_ENDPOINT: https://api.smith.langchain.com
LANGCHAIN_API_KEY: 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
LANGCHAIN_PROJECT: 20241209_Lang_Travel_V2


In [29]:
# ## Step 2: 初始化LLM、嵌入及向量数据库

# 初始化LLM
llm = ChatOpenAI(temperature=0)

# 初始化embeddings与向量存储
embeddings = OpenAIEmbeddings(model="text-embedding-ada-002")


In [30]:
def intent_classification(question: str) -> bool:
    """
    使用LLM判断用户的问题是否与寻找酒店相关。
    简单示例：通过prompt让LLM回答“是”或“否”。
    实际中需更丰富的Prompt设计与测试。
    """
    prompt_text = f"用户的问题：{question}\n这个问题是否与寻找酒店相关？回答是或否。"
    result = llm.invoke(prompt_text)
    answer = result.content.strip()
    return "是" in answer  # 简单判断包含"是"即判断为酒店相关

In [31]:
import getpass

#os.environ["OPENAI_API_KEY"] = getpass.getpass()

from langchain_openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(model="text-embedding-3-large")

In [32]:
from langchain_core.vectorstores import InMemoryVectorStore

vector_store = InMemoryVectorStore(embeddings)

In [None]:
import bs4
from langchain import hub
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.documents import Document
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langgraph.graph import START, StateGraph
from typing_extensions import List, TypedDict

# Load and chunk contents of the blog
loader = WebBaseLoader(
    web_paths=("https://travel.usnews.com/Washington_DC/Things_To_Do/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            # Update the class names based on the new website's structure
            class_=("content-container", "title", "header")
        )
    ),
)
docs = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
all_splits = text_splitter.split_documents(docs)

# Index chunks
_ = vector_store.add_documents(documents=all_splits)

# Define prompt for question-answering
prompt = hub.pull("rlm/rag-prompt")


# Define state for application
class State(TypedDict):
    question: str
    context: List[Document]
    answer: str


# Define application steps
def retrieve(state: State):
    retrieved_docs = vector_store.similarity_search(state["question"])
    return {"context": retrieved_docs}


def generate(state: State):
    docs_content = "\n\n".join(doc.page_content for doc in state["context"])
    messages = prompt.invoke({"question": state["question"], "context": docs_content})
    response = llm.invoke(messages)
    return {"answer": response.content}


# Compile application and test
graph_builder = StateGraph(State).add_sequence([retrieve, generate])
graph_builder.add_edge(START, "retrieve")

OPENAI_API_KEY: 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
LANGCHAIN_TRACING_V2: true
LANGCHAIN_ENDPOINT: https://api.smith.langchain.com
LANGCHAIN_API_KEY: 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
LANGCHAIN_PROJECT: 20241209_Lang_Travel_V2


In [None]:
response = graph.invoke({"question": "What is Task Decomposition?"})
print(response["answer"])

Task decomposition is a technique used to break down complex tasks into smaller and simpler steps. It can be done through prompting techniques like Chain of Thought or Tree of Thoughts, which help models decompose hard tasks into manageable steps. Task decomposition can also be achieved through simple prompting, task-specific instructions, or human inputs.


用户输入
    ↓
意图识别与分类
    ↓
┌───────────────┐
│  是酒店相关？  │
└─────┬─────────┘
      │
     是│否
      │
      ▼
酒店数据库搜索  景点数据库搜索
      │               │
      ▼               ▼
获取匹配信息        获取相关内容
      │               │
      └─────┬─────────┘
            │
            ▼
结合上下文生成回答
            │
            ▼
        返回用户


## Ref:
https://lilianweng.github.io/posts/2023-06-23-agent/
https://python.langchain.com/docs/tutorials/rag/
