# Research API: Query Refinement

Refine vague research queries through interactive clarification before executing research.

**What you'll learn:**

- Use an LLM to identify underspecified queries
- Build an interactive clarification loop
- Generate refined, detailed research prompts


## Setup


In [None]:
%pip install -U tavily-python langchain-openai --quiet

In [None]:
import os
import getpass
import time

if not os.environ.get("TAVILY_API_KEY"):
    os.environ["TAVILY_API_KEY"] = getpass.getpass("TAVILY_API_KEY:\n")

if not os.environ.get("OPENAI_API_KEY"):
    os.environ["OPENAI_API_KEY"] = getpass.getpass("OPENAI_API_KEY:\n")

In [None]:
from tavily import TavilyClient
from langchain_openai import ChatOpenAI
from pydantic import BaseModel, Field
from IPython.display import display, Markdown

client = TavilyClient()
llm = ChatOpenAI(model="gpt-5.1")

## Define Clarification Logic


In [None]:
class ClarificationResponse(BaseModel):
    """Structured response for query clarification."""
    needs_clarification: bool = Field(description="True if more info needed")
    message: str = Field(description="Follow-up questions OR refined query")

PROMPT = """You are a research assistant refining a query through conversation.

Original topic: {query}
Conversation: {conversation}

If you need more details, set needs_clarification=True and ask 2-3 questions.
If you have enough context, set needs_clarification=False and provide the refined query.
"""

def clarify(query: str, conversation: list, force_final: bool = False) -> ClarificationResponse:
    """Get clarification or refined query from LLM."""
    conv_text = "\n".join(f"{m['role'].title()}: {m['content']}" for m in conversation) or "(none)"
    prompt = PROMPT.format(query=query, conversation=conv_text)
    if force_final:
        prompt += "\nProvide the best possible refined query now."
    return llm.with_structured_output(ClarificationResponse).invoke(prompt)

## Interactive Query Refinement

> **Note:** Uses `input()` for interactive prompts. Replace with hardcoded strings if your environment doesn't support stdin.


In [None]:
max_iterations = 3

initial_query = input("What would you like to research?\n> ")
conversation = []

for i in range(max_iterations):
    response = clarify(initial_query, conversation)
    
    if not response.needs_clarification:
        refined_query = response.message
        print(f"\nâœ… Refined query:\n{refined_query}")
        break
    
    print(f"\nðŸ¤– Assistant:\n{response.message}")
    conversation.append({"role": "assistant", "content": response.message})
    
    user_input = input("\n> ")
    conversation.append({"role": "user", "content": user_input})
else:
    response = clarify(initial_query, conversation, force_final=True)
    refined_query = response.message
    print(f"\nâœ… Refined query:\n{refined_query}")

## Execute Research


In [None]:
result = client.research(input=refined_query, model="mini")
request_id = result["request_id"]

response = client.get_research(request_id)

while response["status"] not in ["completed", "failed"]:
    print(f"Status: {response['status']}... polling again in 10 seconds")
    time.sleep(10)
    response = client.get_research(request_id)

if response["status"] == "failed":
    raise RuntimeError(f"Research failed: {response.get('error', 'Unknown error')}")

print("\nâœ… Research Complete!")
display(Markdown(response["content"]))

In [None]:
response.get("sources", [])

## Next Steps

- See [Hybrid Research](./hybrid_research.ipynb) to combine with internal data
- See [Structured Output](./structured_output.ipynb) for custom response schemas
