# Deep Research Pipeline with Web Search Integration

This notebook implements an end-to-end autonomous research system that:
1. **Plans**: Generates a search strategy using structured outputs
2. **Searches**: Executes parallel web searches via OpenAI's WebSearchTool
3. **Synthesizes**: Writes a comprehensive markdown report
4. **Delivers**: Sends the final report via email

This pattern is applicable to competitive intelligence, market research, and technical due diligence workflows.

In [None]:
# Import dependencies
from agents import Agent, WebSearchTool, trace, Runner, function_tool
from agents.model_settings import ModelSettings
from pydantic import BaseModel, Field
from dotenv import load_dotenv
import asyncio
import sendgrid
import os
from sendgrid.helpers.mail import Mail, Email, To, Content
from typing import Dict
from IPython.display import display, Markdown

In [None]:
# Initialize Environment
load_dotenv(override=True)

## Phase 1: Define Search Agent

This agent performs individual web searches and returns concise summaries.

In [None]:
# Configure Search Agent
SEARCH_INSTRUCTIONS = """You are a research assistant. Given a search term, search the web and produce a concise 2-3 paragraph summary (under 300 words). 
Capture the essence and key facts. Write succinctlyâ€”this will be consumed by a report synthesizer. Return only the summary."""

search_agent = Agent(
    name="Search Agent",
    instructions=SEARCH_INSTRUCTIONS,
    tools=[WebSearchTool(search_context_size="low")],
    model="gpt-4o-mini",
    model_settings=ModelSettings(tool_choice="required"),
)

In [None]:
# Test Search Agent
async def test_search():
    message = "Latest AI Agent frameworks in 2025"
    with trace("Single Search Test"):
        result = await Runner.run(search_agent, message)
        display(Markdown(result.final_output))

await test_search()

## Phase 2: Define Planning Agent with Structured Outputs

Uses Pydantic schemas to enforce structured, parseable outputs.

In [None]:
# Define Schema for Search Plan
class WebSearchItem(BaseModel):
    reason: str = Field(description="Reasoning for why this search is important")
    query: str = Field(description="Search term")

class WebSearchPlan(BaseModel):
    searches: list[WebSearchItem] = Field(description="List of web searches to perform")

# Planner Agent
HOW_MANY_SEARCHES = 3

PLANNER_INSTRUCTIONS = f"""You are a research planner. Given a query, generate {HOW_MANY_SEARCHES} distinct search terms to comprehensively answer the query."""

planner_agent = Agent(
    name="Planner Agent",
    instructions=PLANNER_INSTRUCTIONS,
    model="gpt-4o-mini",
    output_type=WebSearchPlan,
)

In [None]:
# Test Planner
async def test_planner():
    message = "Latest AI Agent frameworks in 2025"
    with trace("Planner Test"):
        result = await Runner.run(planner_agent, message)
        print(result.final_output)

await test_planner()

## Phase 3: Define Report Writer

Synthesizes search results into a comprehensive markdown report.

In [None]:
# Define Report Schema
class ReportData(BaseModel):
    short_summary: str = Field(description="2-3 sentence executive summary")
    markdown_report: str = Field(description="Full markdown report (1000+ words)")
    follow_up_questions: list[str] = Field(description="Suggested further research topics")

# Writer Agent
WRITER_INSTRUCTIONS = """You are a senior researcher. Given a query and initial research, create a cohesive, detailed report.
First outline the structure, then generate a 5-10 page markdown report (1000+ words). Be thorough and analytical."""

writer_agent = Agent(
    name="Writer Agent",
    instructions=WRITER_INSTRUCTIONS,
    model="gpt-4o-mini",
    output_type=ReportData,
)

## Phase 4: Define Email Delivery Agent

In [None]:
# Email Sending Tool
@function_tool
def send_email(subject: str, html_body: str) -> Dict[str, str]:
    """ Send HTML email via SendGrid """
    api_key = os.environ.get('SENDGRID_API_KEY')
    if not api_key:
        return {"status": "simulated"}
        
    sg = sendgrid.SendGridAPIClient(api_key=api_key)
    from_email = Email("ed@edwarddonner.com")  # Replace
    to_email = To("ed.donner@gmail.com")  # Replace
    content = Content("text/html", html_body)
    mail = Mail(from_email, to_email, subject, content).get()
    try:
        sg.client.mail.send.post(request_body=mail)
        return {"status": "success"}
    except Exception as e:
        return {"status": "error", "message": str(e)}

# Email Agent
EMAIL_INSTRUCTIONS = """Convert a markdown report to clean HTML and send it via email with an appropriate subject line."""

email_agent = Agent(
    name="Email Agent",
    instructions=EMAIL_INSTRUCTIONS,
    tools=[send_email],
    model="gpt-4o-mini",
)

## Phase 5: Orchestration Functions

In [None]:
# Orchestration Logic

async def plan_searches(query: str):
    """Generate search plan"""
    print("Planning searches...")
    result = await Runner.run(planner_agent, f"Query: {query}")
    print(f"Will perform {len(result.final_output.searches)} searches")
    return result.final_output

async def perform_searches(search_plan: WebSearchPlan):
    """Execute parallel searches"""
    print("Executing searches...")
    tasks = [asyncio.create_task(search(item)) for item in search_plan.searches]
    results = await asyncio.gather(*tasks)
    print("Searches complete")
    return results

async def search(item: WebSearchItem):
    """Single search execution"""
    input_text = f"Search term: {item.query}\nReason: {item.reason}"
    result = await Runner.run(search_agent, input_text)
    return result.final_output

async def write_report(query: str, search_results: list[str]):
    """Synthesize research into report"""
    print("Writing report...")
    input_text = f"Original query: {query}\nSummarized search results: {search_results}"
    result = await Runner.run(writer_agent, input_text)
    print("Report complete")
    return result.final_output

async def deliver_report(report: ReportData):
    """Send report via email"""
    print("Sending email...")
    result = await Runner.run(email_agent, report.markdown_report)
    print("Email sent")
    return report

## Execute Full Pipeline

In [None]:
# Full Research Workflow
async def run_research_pipeline(query: str):
    with trace("Deep Research Pipeline"):
        print(f"Starting research: {query}")
        
        # Step 1: Plan
        search_plan = await plan_searches(query)
        
        # Step 2: Search
        search_results = await perform_searches(search_plan)
        
        # Step 3: Write
        report = await write_report(query, search_results)
        
        # Step 4: Deliver
        await deliver_report(report)
        
        print("Research complete!")
        return report

# Execute
query = "Latest AI Agent frameworks in 2025"
final_report = await run_research_pipeline(query)