In [36]:
from dotenv import load_dotenv
import os

load_dotenv()

openai_api_key = os.getenv("OPENAI_API_KEY")

if not openai_api_key:
    raise ValueError("OPENAI_API_KEY is not set in the environment variables.")
else:
    print("OPENAI_API_KEY is set.")

OPENAI_API_KEY is set.


In [37]:
examples = """
Example 1 single information need
Input query:
How does dropout prevent overfitting in neural networks
Output:
{"sub_queries":["dropout regularization reducing neural network overfitting"]}

Example 2 two distinct information needs
Input query:
effects of microplastics on marine food webs and human health risks
Output:
{"sub_queries":["microplastics impacts on marine food webs","human health risks from microplastic exposure"]}

Example 3 avoid over splitting broad impact
Input query:
What are the latest advancements in natural language processing and how do they impact machine learning models
Output:
{"sub_queries":["recent advancements in natural language processing","impact of recent NLP advances on machine learning models"]}

Example 4 three distinct information needs
Input query:
role of gut microbiome in obesity and type 2 diabetes and dietary interventions
Output:
{"sub_queries":["gut microbiome links to obesity","gut microbiome links to type 2 diabetes","dietary interventions modulating gut microbiome in metabolic disease"]}
""".strip()


PROMPT = """
You are an academic query decomposition and refinement agent for vector retrieval.
You will receive a query from a user which may contain single or multiple distinct information needs.

Here are the examples which are done well:
{examples}

Goal:
Return a list of refined sub-queries for retrieving relevant academic context from a vector database.

Output (strict):
- Return ONLY an object with key "sub_queries" containing a list of strings.
- Each string must be a refined retrieval query.
- Do NOT add any other keys or any extra text.

Decomposition rules:
- If the input expresses ONE coherent information need, return exactly 1 refined query.
- If the input contains multiple distinct information needs different questions topics or aspects return 2 to 5 sub-queries.
- Each sub-query must be atomic one concept or aspect only.
- Avoid overlap no near-duplicate sub-queries.

Refinement rules apply to every sub-query:
- Preserve the user intent exactly do not change meaning.
- Do NOT add new concepts not present or clearly implied.
- Use precise academic or technical terminology.
- Prefer noun phrases avoid questions and full sentences.
- Remove filler words stopwords and conversational phrasing.
- Length 5 to 12 words per sub-query max 12.
- Do NOT use punctuation quotes Boolean operators or special syntax.
- Do NOT include years author names or datasets unless explicitly present in the input.
- Properly identify distinct aspects if the input is broad or vague.
- Do NOT over-split sub-queries unnecessarily.
- Try to keep related concepts together.

Return the structured output now.
""".format(examples=examples)


In [38]:
from typing_extensions import TypedDict
from typing import List

class SubQueries(TypedDict):
    """Subqueries generated from a main query"""
    sub_queries: List[str]

In [39]:
from langchain.agents import create_agent
from langchain_openai import ChatOpenAI

In [40]:
model = ChatOpenAI(
    model="gpt-5-nano",
    temperature=0.0,
)
agent = create_agent(
    model, 
    response_format = SubQueries,
    system_prompt=PROMPT
)

In [41]:
from langchain_core.prompts import PromptTemplate

In [42]:
QUESTION = "Explain me the concept of Scaled Dot-Product Attention and Multi-Head Attention and how they are used in the Model Architecture of Transformers"

In [43]:
result = agent.invoke({"messages": [
    {
        "role": "user",
        "content": QUESTION
    }
]})

In [44]:
result['structured_response']['sub_queries']

['concept of scaled dot-product attention in transformers',
 'concept of multi-head attention in transformers',
 'role of attention in transformer architecture']