In [1]:
from dotenv import load_dotenv


load_dotenv()

True

In [2]:
from langchain_core.prompts import PromptTemplate
from langchain_openai import ChatOpenAI

# 查询重写

In [3]:
re_write_llm = ChatOpenAI(model="qwen-max", temperature=0, max_tokens=4000)

# Create a prompt template for query rewriting
query_rewrite_template = """You are an AI assistant tasked with reformulating user queries to improve retrieval in a RAG system. 
Given the original query, rewrite it to be more specific, detailed, and likely to retrieve relevant information.

Original query: {original_query}

Rewritten query:"""

query_rewrite_prompt = PromptTemplate(
    input_variables=["original_query"], template=query_rewrite_template
)

# Create an LLMChain for query rewriting
query_rewriter = query_rewrite_prompt | re_write_llm


def rewrite_query(original_query):
    """
    Rewrite the original query to improve retrieval.

    Args:
    original_query (str): The original user query

    Returns:
    str: The rewritten query
    """
    response = query_rewriter.invoke(original_query)
    return response.content

In [5]:
# example query over the understanding climate change dataset
original_query = "气候变化对环境的影响是什么？"
rewritten_query = rewrite_query(original_query)
print("原始查询: ", original_query)
print("\n查询重写: ", rewritten_query)

原始查询:  气候变化对环境的影响是什么？

查询重写:  气候变化对环境的具体影响有哪些，包括但不限于极端天气事件、海平面上升以及生态系统的变化？


# 回溯提示

In [9]:
step_back_llm = ChatOpenAI(model="qwen-max", temperature=0, max_tokens=4000)


# Create a prompt template for step-back prompting
step_back_template = """You are an AI assistant tasked with generating broader, more general queries to improve context retrieval in a RAG system.
Given the original query, generate a step-back query that is more general and can help retrieve relevant background information.

Original query: {original_query}

Step-back query:"""

step_back_prompt = PromptTemplate(
    input_variables=["original_query"], template=step_back_template
)

# Create an LLMChain for step-back prompting
step_back_chain = step_back_prompt | step_back_llm


def generate_step_back_query(original_query):
    """
    Generate a step-back query to retrieve broader context.

    Args:
    original_query (str): The original user query

    Returns:
    str: The step-back query
    """
    response = step_back_chain.invoke(original_query)
    return response.content

In [10]:
original_query = "气候变化对环境的影响是什么？"
rewritten_query = generate_step_back_query(original_query)
print("原始查询: ", original_query)
print("\n回溯提示: ", rewritten_query)

原始查询:  气候变化对环境的影响是什么？

回溯提示:  气候变化对地球有哪些主要影响？


# 子查询分解

In [11]:
sub_query_llm = ChatOpenAI(model="qwen-max", temperature=0, max_tokens=4000)

# Create a prompt template for sub-query decomposition
subquery_decomposition_template = """You are an AI assistant tasked with breaking down complex queries into simpler sub-queries for a RAG system.
Given the original query, decompose it into 2-4 simpler sub-queries that, when answered together, would provide a comprehensive response to the original query.

Original query: {original_query}

example: What are the impacts of climate change on the environment?

Sub-queries:
1. What are the impacts of climate change on biodiversity?
2. How does climate change affect the oceans?
3. What are the effects of climate change on agriculture?
4. What are the impacts of climate change on human health?"""


subquery_decomposition_prompt = PromptTemplate(
    input_variables=["original_query"], template=subquery_decomposition_template
)

# Create an LLMChain for sub-query decomposition
subquery_decomposer_chain = subquery_decomposition_prompt | sub_query_llm


def decompose_query(original_query: str):
    """
    Decompose the original query into simpler sub-queries.

    Args:
    original_query (str): The original complex query

    Returns:
    List[str]: A list of simpler sub-queries
    """
    response = subquery_decomposer_chain.invoke(original_query).content
    sub_queries = [
        q.strip()
        for q in response.split("\n")
        if q.strip() and not q.strip().startswith("Sub-queries:")
    ]
    return sub_queries

In [12]:
# example query over the understanding climate change dataset
original_query = "气候变化对环境的影响是什么？"
sub_queries = decompose_query(original_query)
print("\n子查询:")
for i, sub_query in enumerate(sub_queries, 1):
    print(sub_query)


子查询:
Original query: 气候变化对环境的影响是什么？
1. 气候变化如何影响生物多样性？
2. 气候变化对海洋生态系统有什么影响？
3. 气候变化对农业生产有何影响？
4. 气候变化对自然栖息地和森林有哪些影响？
