In [11]:
import os
import sys
from dotenv import load_dotenv
from typing import List, Dict, Any, Optional, Tuple
import json

load_dotenv()

True

In [2]:
os.chdir(r"C:\Users\TempAccess\Documents\Dhruv\RAG")
print(os.getcwd())

C:\Users\TempAccess\Documents\Dhruv\RAG


In [3]:
from helper_function_openai import OpenAIChat

# Single LLM instance for all transformations
llm = OpenAIChat(
    model_name="gpt-4o",
    temperature=0.0,
    max_tokens=4000
)

# Query Rewriting

In [6]:
def rewrite_query(original_query: str) -> str:

    messages = [
        {
            "role": "system",
            "content": (
                "You are an AI assistant tasked with reformulating user queries "
                "to improve retrieval in a RAG system. Given the original query, "
                "rewrite it to be more specific, detailed, and likely to retrieve "
                "relevant information."
            )
        },
        {
            "role": "user",
            "content": f"Original query: {original_query}\n\nRewritten query:"
        }
    ]
    
    return llm.chat(messages)

In [7]:
original_query = "What are the impacts of climate change on the environment?"
rewritten = rewrite_query(original_query)

print(f"Original query: {original_query}")
print(f"\nRewritten query: {rewritten}")

Original query: What are the impacts of climate change on the environment?

Rewritten query: How does climate change specifically affect ecosystems, biodiversity, and natural resources, including changes in temperature, precipitation patterns, and sea levels?


# Step-back Prompting

In [8]:
def generate_step_back_query(original_query: str) -> str:

    messages = [
        {
            "role":"system",
            "content":(
                "You are an AI assistant tasked with generating broader, more general "
                "queries to improve context retrieval in a RAG system. Given the original "
                "query, generate a step back query that is more general and can help "
                "retrieve relevant background information."
            )
        },
        {
            "role":"user",
            "content":f"Original query: {original_query} \n\n Step-Back query:"
        }
    ]

    return llm.chat(messages=messages)

In [9]:
original_query = "What are the impacts of climate change on the environment?"
step_back = generate_step_back_query(original_query=original_query)

print(f"original query: {original_query}")
print(f"step back query: {step_back}")

original query: What are the impacts of climate change on the environment?
step back query: What are the general effects of climate change on natural systems and human societies?


# Sub Query Decomposition

In [22]:
def decompose_query(original_query:str)-> List:
    """
    Decompose a complex query into 2-4 simpler sub-queries.

    """
    messages = [
        {
            "role": "system",
            "content": (
                "You are an AI assistant tasked with breaking down complex queries into "
                "simpler sub-queries for a RAG system. Given the original query, decompose "
                "it into 2-4 simpler sub-queries that, when answered together, would provide "
                "a comprehensive response to the original query.\n\n"
                "Example:\n"
                "Original: What are the impacts of climate change on the environment?\n"
                '{"sub_queries": ['
                '"What are the impacts of climate change on biodiversity?", '
                '"How does climate change affect the oceans?", '
                '"What are the effects of climate change on agriculture?", '
                '"What are the impacts of climate change on human health?"]}\n\n'
                'Respond with JSON: {"sub_queries": ["query1", "query2", ...]}'
            )
        },
        {
            "role": "user",
            "content": f"Original query: {original_query}"
        }
    ]

    result = llm.chat_json(messages)
    # print(f"Generated output: {result}")
    return result.get("sub_queries", [])

In [23]:
original_query = "What are the impacts of climate change on the environment?"
sub_queries = decompose_query(original_query)

print(f"Original query: {original_query}")
print(f"\nSub-queries:")
for i, sq in enumerate(sub_queries, 1):
    print(f"  {i}. {sq}")

Original query: What are the impacts of climate change on the environment?

Sub-queries:
  1. What are the impacts of climate change on biodiversity?
  2. How does climate change affect the oceans?
  3. What are the effects of climate change on agriculture?
  4. What are the impacts of climate change on human health?


In [24]:
sub_queries

['What are the impacts of climate change on biodiversity?',
 'How does climate change affect the oceans?',
 'What are the effects of climate change on agriculture?',
 'What are the impacts of climate change on human health?']

# Combined Transformer


In [25]:
def transform_query(original_query:str)->Dict:
    """
    Apply all three transformation techniques to a query.

    Returns:
        Dict with rewritten, step_back, and sub_queries

    """

    return {
        "original": original_query,
        "rewritten": rewrite_query(original_query=original_query),
        "step_back": generate_step_back_query(original_query=original_query),
        "sub_queries": decompose_query(original_query=original_query)
    }


In [28]:
result = transform_query("How does deforestation contribute to global warming?")

print(f"Original:  {result['original']}", end="\n\n")
print(f"Rewritten: {result['rewritten']}", end="\n\n")
print(f"Step-back: {result['step_back']}", end="\n\n")
print(f"Sub-queries:")
for i, sq in enumerate(result['sub_queries'], 1):
    print(f"  {i}. {sq}")

Original:  How does deforestation contribute to global warming?

Rewritten: In what ways does deforestation impact the increase of greenhouse gases and contribute to the acceleration of global warming?

Step-back: What are the environmental impacts of deforestation?

Sub-queries:
  1. What is deforestation?
  2. How does deforestation affect carbon dioxide levels in the atmosphere?
  3. What role do trees play in regulating the Earth's temperature?
  4. How does the loss of forests impact global warming?
