# Research API: Structured Output

Return research results in a custom JSON schema.

**What you'll learn:**
- Define `output_schema` with typed properties
- Nested objects and arrays in schemas
- Parse structured JSON responses

## Setup

In [None]:
%pip install -U tavily-python --quiet

In [None]:
import os
import getpass
import time

if not os.environ.get("TAVILY_API_KEY"):
    os.environ["TAVILY_API_KEY"] = getpass.getpass("TAVILY_API_KEY:\n")

In [None]:
from tavily import TavilyClient
from IPython.display import JSON

client = TavilyClient()

## Define Output Schema

Each property requires `type` and `description`. Supported types: `string`, `integer`, `number`, `object`, `array`.

In [None]:
output_schema = {
    "properties": {
        "company_name": {
            "type": "string",
            "description": "Company name"
        },
        "ceo": {
            "type": "string",
            "description": "Chief Executive Officer"
        },
        "cto": {
            "type": "string",
            "description": "Chief Technology Officer"
        },
        "compute_and_cloud_offerings": {
            "type": "string",
            "description": "Compute platforms, AI chips, and key cloud partners"
        },
        "gpu_sales_report": {
            "type": "object",
            "description": "GPU sales performance for the last two years",
            "properties": {
                "2024": {
                    "type": "object",
                    "description": "GPU sales for 2024",
                    "properties": {
                        "overview": {"type": "string", "description": "2-3 sentence overview"},
                        "units_sold": {"type": "integer", "description": "Approximate GPUs sold"},
                        "revenue": {"type": "number", "description": "Revenue (USD billions)"}
                    },
                    "required": ["units_sold", "revenue"]
                },
                "2025": {
                    "type": "object",
                    "description": "GPU sales for 2025",
                    "properties": {
                        "overview": {"type": "string", "description": "2-3 sentence overview"},
                        "units_sold": {"type": "integer", "description": "Approximate GPUs sold"},
                        "revenue": {"type": "number", "description": "Revenue (USD billions)"}
                    },
                    "required": ["revenue", "overview"]
                },
                "comparative_summary": {
                    "type": "string",
                    "description": "Comparison of 2024 vs 2025, growth and outlook"
                }
            },
            "required": ["2024", "2025", "comparative_summary"]
        }
    },
    "required": ["company_name", "ceo", "cto", "gpu_sales_report", "compute_and_cloud_offerings"]
}

## Run Research with Schema

In [None]:
query = "Create a detailed company research report about Nvidia. Focus on leadership, financials, AI initiatives, infrastructure, and include a deep two-year GPU sales analysis."

result = client.research(input=query, model="pro", output_schema=output_schema)
request_id = result["request_id"]

response = client.get_research(request_id)

while response["status"] not in ["completed", "failed"]:
    print(f"Status: {response['status']}... polling again in 10 seconds")
    time.sleep(10)
    response = client.get_research(request_id)

if response["status"] == "failed":
    raise RuntimeError(f"Research failed: {response.get('error', 'Unknown error')}")

print("Research Complete!")
JSON(response["content"])

## Next Steps

- See [Hybrid Research](./hybrid_research.ipynb) to combine with internal data