diff --git a/DeepResearchAgent/Dockerfile b/DeepResearchAgent/Dockerfile index e84b5e34ae..cbec24bf5a 100644 --- a/DeepResearchAgent/Dockerfile +++ b/DeepResearchAgent/Dockerfile @@ -5,9 +5,9 @@ ARG IMAGE_REPO=opea ARG BASE_TAG=latest FROM opea/comps-base:$BASE_TAG -COPY ./deep_researcher.yaml $HOME/deep_researcher.yaml -COPY ./utils.py $HOME/utils.py +COPY ./research_agents $HOME/research_agents COPY ./requirements.txt $HOME/requirements.txt +COPY ./agent_factory.py $HOME/agent_factory.py COPY ./research_agent.py $HOME/research_agent.py USER root diff --git a/DeepResearchAgent/README.md b/DeepResearchAgent/README.md index 7e88f310f1..38ec10f72f 100644 --- a/DeepResearchAgent/README.md +++ b/DeepResearchAgent/README.md @@ -4,17 +4,14 @@ Deep Research Agents are a new class of autonomous AI systems designed to perfor ## Overview -In this application, we leverage the deep research agent implementation of [langchain-ai/open_deep_research](https://github.com/langchain-ai/open_deep_research), and deploy it on the Intel platform with opea microserice. - -![Architecture Overview](assets/img/opea-deep-research-agent.png) +In this application, we leverage the deep research agent implementation of [langchain-ai/deepagents](https://github.com/langchain-ai/deepagents), and deploy it on the Intel platform with opea microserice. ## Setup Deployment Environment -``` -# Configure deep_researcher.yaml with your llm model served by the vllm - +```shell # get your TAVILY_API_KEY from https://app.tavily.com/ export TAVILY_API_KEY="" + # get your HuggingFace Access Token from https://huggingface.co/docs/transformers.js/en/guides/private#step-1-generating-a-user-access-token export HF_TOKEN="" @@ -31,9 +28,8 @@ source ./set_env.sh To deploy the Deep Research Agent services, execute the docker compose up command with the appropriate arguments. For a default deployment, execute: -``` +```shell docker compose -f docker_compose/intel/hpu/gaudi/compose.yaml up -d - ``` ## Validate Microservice diff --git a/DeepResearchAgent/agent_factory.py b/DeepResearchAgent/agent_factory.py new file mode 100644 index 0000000000..1361e58179 --- /dev/null +++ b/DeepResearchAgent/agent_factory.py @@ -0,0 +1,75 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import os +from datetime import datetime +from typing import Any + +from langchain_openai import ChatOpenAI + + +def create_deepagents_research_agent() -> Any: + from deepagents import create_deep_agent + from research_agents.deepagents.prompts import ( + RESEARCH_WORKFLOW_INSTRUCTIONS, + RESEARCHER_INSTRUCTIONS, + SUBAGENT_DELEGATION_INSTRUCTIONS, + ) + from research_agents.deepagents.tools import tavily_search, think_tool + + # Limits + max_concurrent_research_units = os.environ.get("MAX_CONCURRENT_RESEARCH_UNITS", 3) + max_researcher_iterations = os.environ.get("MAX_RESEARCHER_ITERATIONS", 3) + + # Custom instructions (if any) + instructions_researcher = os.environ.get("RESEARCHER_INSTRUCTIONS", RESEARCHER_INSTRUCTIONS) + instructions_research_workflow = os.environ.get("RESEARCH_WORKFLOW_INSTRUCTIONS", RESEARCH_WORKFLOW_INSTRUCTIONS) + instructions_subagent_delegation = os.environ.get( + "SUBAGENT_DELEGATION_INSTRUCTIONS", SUBAGENT_DELEGATION_INSTRUCTIONS + ) + + # Combine orchestrator instructions (RESEARCHER_INSTRUCTIONS only for sub-agents) + INSTRUCTIONS = ( + instructions_research_workflow + + "\n\n" + + "=" * 80 + + "\n\n" + + instructions_subagent_delegation.format( + max_concurrent_research_units=max_concurrent_research_units, + max_researcher_iterations=max_researcher_iterations, + ) + ) + + # Get current date + current_date = datetime.now().strftime("%Y-%m-%d") + + # Research agent definition + research_sub_agent = { + "name": "research-agent", + "description": "Delegate research to the sub-agent researcher. Only give this researcher one topic at a time.", + "system_prompt": instructions_researcher.format(date=current_date), + "tools": [tavily_search, think_tool], + } + + # LLM serving endpoint + model = ChatOpenAI( + openai_api_base=os.environ.get("OPENAI_BASE_URL", "http://0.0.0.0:8000/v1/"), + openai_api_key=os.environ.get("OPENAI_API_KEY", "empty-api-key"), + model_name=os.environ.get("LLM_MODEL_ID", "meta-llama/Llama-3.3-70B-Instruct"), + temperature=0.0, + ) + + # Create the agent + return create_deep_agent( + model=model, + tools=[tavily_search, think_tool], + system_prompt=INSTRUCTIONS, + subagents=[research_sub_agent], + ) + + +def create_agent(impl="DeepAgents") -> Any: + if impl == "DeepAgents": + return create_deepagents_research_agent() + else: + raise ValueError(f"Unknown agent implementation: {impl}") diff --git a/DeepResearchAgent/assets/img/opea-deep-research-agent.png b/DeepResearchAgent/assets/img/opea-deep-research-agent.png deleted file mode 100644 index 318f461f46..0000000000 Binary files a/DeepResearchAgent/assets/img/opea-deep-research-agent.png and /dev/null differ diff --git a/DeepResearchAgent/deep_researcher.yaml b/DeepResearchAgent/deep_researcher.yaml deleted file mode 100644 index eb5b9c1640..0000000000 --- a/DeepResearchAgent/deep_researcher.yaml +++ /dev/null @@ -1,11 +0,0 @@ -# Copyright (C) 2025 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -agent: - type: langchain_deep_researcher - search_api: "tavily" - planner_provider: "openai" - planner_model: "meta-llama/Llama-3.3-70B-Instruct" - writer_provider: "openai" - writer_model: "meta-llama/Llama-3.3-70B-Instruct" - max_search_depth: 2 diff --git a/DeepResearchAgent/docker_compose/intel/hpu/gaudi/compose.yaml b/DeepResearchAgent/docker_compose/intel/hpu/gaudi/compose.yaml index d49af13a94..77ef688502 100644 --- a/DeepResearchAgent/docker_compose/intel/hpu/gaudi/compose.yaml +++ b/DeepResearchAgent/docker_compose/intel/hpu/gaudi/compose.yaml @@ -8,22 +8,12 @@ x-common-environment: http_proxy: ${http_proxy} https_proxy: ${https_proxy} -x-common-agent-environment: - &common-agent-env - <<: *common-env - HF_TOKEN: ${HF_TOKEN} - model: ${LLM_MODEL_ID} - TAVILY_API_KEY: ${TAVILY_API_KEY} - OPENAI_API_KEY: ${OPENAI_API_KEY} - OPENAI_BASE_URL: ${OPENAI_BASE_URL} - services: - vllm-service: image: opea/vllm-gaudi:1.22.0 container_name: vllm-gaudi-server ports: - - "8000:8000" + - ${VLLM_PORT:-8000}:8000 volumes: - ${HF_CACHE_DIR:-./data}:/data environment: @@ -34,10 +24,10 @@ services: OMPI_MCA_btl_vader_single_copy_mechanism: none LLM_MODEL_ID: ${LLM_MODEL_ID} VLLM_TORCH_PROFILER_DIR: "/mnt" - VLLM_SKIP_WARMUP: true + VLLM_SKIP_WARMUP: ${VLLM_SKIP_WARMUP:-true} PT_HPU_ENABLE_LAZY_COLLECTIVES: true healthcheck: - test: ["CMD-SHELL", "curl -f http://$HOST_IP:8000/health || exit 1"] + test: ["CMD-SHELL", "curl -f http://${HOST_IP}:${VLLM_PORT:-8000}/health || exit 1"] interval: 10s timeout: 10s retries: 100 @@ -45,7 +35,7 @@ services: cap_add: - SYS_NICE ipc: host - command: --model ${LLM_MODEL_ID} --tensor-parallel-size ${NUM_CARDS} --host 0.0.0.0 --port 8000 --max-seq-len-to-capture $MAX_LEN + command: --model ${LLM_MODEL_ID} --tensor-parallel-size ${NUM_CARDS} --enable-auto-tool-choice --tool-call-parser ${TOOL_CALL_PARSER} --host 0.0.0.0 --port 8000 --max-seq-len-to-capture ${MAX_LEN} deep-research-agent-server: image: ${REGISTRY:-opea}/deep-research-agent:${TAG:-latest} @@ -56,4 +46,14 @@ services: - "8022:8022" ipc: host environment: - <<: *common-agent-env + <<: *common-env + HF_TOKEN: ${HF_TOKEN} + model: ${LLM_MODEL_ID} + TAVILY_API_KEY: ${TAVILY_API_KEY} + OPENAI_API_KEY: ${OPENAI_API_KEY} + OPENAI_BASE_URL: ${OPENAI_BASE_URL} + MAX_CONCURRENT_RESEARCH_UNITS: ${MAX_CONCURRENT_RESEARCH_UNITS:-3} + MAX_RESEARCHER_ITERATIONS: ${MAX_RESEARCHER_ITERATIONS:-3} + RESEARCHER_INSTRUCTIONS: ${RESEARCHER_INSTRUCTIONS:-""} + RESEARCH_WORKFLOW_INSTRUCTIONS: ${RESEARCH_WORKFLOW_INSTRUCTIONS:-""} + SUBAGENT_DELEGATION_INSTRUCTIONS: ${SUBAGENT_DELEGATION_INSTRUCTIONS:-""} diff --git a/DeepResearchAgent/docker_compose/intel/hpu/gaudi/set_env.sh b/DeepResearchAgent/docker_compose/intel/hpu/gaudi/set_env.sh index 9df0330f46..facb46f085 100644 --- a/DeepResearchAgent/docker_compose/intel/hpu/gaudi/set_env.sh +++ b/DeepResearchAgent/docker_compose/intel/hpu/gaudi/set_env.sh @@ -3,45 +3,102 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -# Navigate to the parent directory and source the environment +# ============================================================================== +# Environment Configuration for DeepResearchAgent on Intel Gaudi HPU +# ============================================================================== + +# Get the directory where this script is located SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &> /dev/null && pwd)" +# Source the parent environment configuration file pushd "$SCRIPT_DIR/../../../../../" > /dev/null source .set_env.sh popd > /dev/null -# Function to check if a variable is set +# ------------------------------------------------------------------------------ +# Helper Functions +# ------------------------------------------------------------------------------ + +# Validates that a required environment variable is set check_var() { local var_name="$1" local var_value="${!var_name}" if [ -z "${var_value}" ]; then echo "Error: ${var_name} is not set. Please set ${var_name}." - return 1 # Return an error code but do not exit the script + return 1 # Return error but don't exit to allow other checks to run fi } -# Check critical variables -check_var "HF_TOKEN" +# ------------------------------------------------------------------------------ +# Validate Required API Keys +# ------------------------------------------------------------------------------ + +check_var "HF_TOKEN" # HuggingFace token for model access +check_var "TAVILY_API_KEY" # Tavily API key for web search functionality + +# ------------------------------------------------------------------------------ +# Network Configuration +# ------------------------------------------------------------------------------ + +# Detect the primary IP address of the host machine export ip_address=$(hostname -I | awk '{print $1}') +export HOST_IP=${ip_address} -# VLLM configuration +# Update proxy settings to include the host IP +export no_proxy=${no_proxy},${ip_address} +export http_proxy=${http_proxy} +export https_proxy=${https_proxy} + +# ------------------------------------------------------------------------------ +# vLLM Service Configuration +# ------------------------------------------------------------------------------ + +# Port where vLLM service will be accessible export VLLM_PORT="${VLLM_PORT:-8000}" -export VLLM_VOLUME="${VLLM_VOLUME:-/data2/huggingface}" -export VLLM_IMAGE="${VLLM_IMAGE:-opea/vllm-gaudi:latest}" + +# ------------------------------------------------------------------------------ +# Language Model Configuration +# ------------------------------------------------------------------------------ + +# LLM model to use for the Deep Research Agent +# See supported models and tool call parsers at: +# https://docs.vllm.ai/en/stable/features/tool_calling/#automatic-function-calling export LLM_MODEL_ID="${LLM_MODEL_ID:-meta-llama/Llama-3.3-70B-Instruct}" + +# Parser for handling function/tool calls (must match the model) +export TOOL_CALL_PARSER="${TOOL_CALL_PARSER:-llama3_json}" + +# Maximum sequence length for model context (131072 = ~128K tokens) export MAX_LEN="${MAX_LEN:-131072}" + +# Number of Gaudi accelerator cards to use export NUM_CARDS="${NUM_CARDS:-4}" + +# Directory for caching HuggingFace models export HF_CACHE_DIR="${HF_CACHE_DIR:-"./data"}" -export OPENAI_BASE_URL="http://${ip_address}:8000/v1" -export OPENAI_API_KEY="empty" -export no_proxy=${no_proxy} -export http_proxy=${http_proxy} -export https_proxy=${https_proxy} +# OpenAI-compatible API endpoint URL for vLLM +export OPENAI_BASE_URL="http://${ip_address}:${VLLM_PORT}/v1" + +# ------------------------------------------------------------------------------ +# API Keys and Authentication +# ------------------------------------------------------------------------------ + +export HF_TOKEN="${HF_TOKEN}" # HuggingFace authentication token +export OPENAI_API_KEY="empty-api-key" # Placeholder for vLLM compatibility +export TAVILY_API_KEY="${TAVILY_API_KEY}" # Tavily search API key + +# ------------------------------------------------------------------------------ +# Deep Research Agent Configuration +# ------------------------------------------------------------------------------ + +# Maximum number of research units that can run concurrently +export MAX_CONCURRENT_RESEARCH_UNITS="${MAX_CONCURRENT_RESEARCH_UNITS:-3}" -# Hugging Face API token -export HF_TOKEN="${HF_TOKEN}" +# Maximum iterations per researcher before stopping +export MAX_RESEARCHER_ITERATIONS="${MAX_RESEARCHER_ITERATIONS:-3}" -# API keys -check_var "TAVILY_API_KEY" -export TAVILY_API_KEY="${TAVILY_API_KEY}" +# Custom instructions for agent behavior (leave empty for defaults) +export RESEARCHER_INSTRUCTIONS="" # Instructions for individual researchers +export RESEARCH_WORKFLOW_INSTRUCTIONS="" # Instructions for overall research workflow +export SUBAGENT_DELEGATION_INSTRUCTIONS="" # Instructions for task delegation between agents diff --git a/DeepResearchAgent/requirements.in b/DeepResearchAgent/requirements.in new file mode 100644 index 0000000000..8042efea5e --- /dev/null +++ b/DeepResearchAgent/requirements.in @@ -0,0 +1,8 @@ +deepagents +httpx +langchain_openai +langchain-tavily +langgraph-cli[inmem] +markdownify +rich +tavily-python \ No newline at end of file diff --git a/DeepResearchAgent/requirements.txt b/DeepResearchAgent/requirements.txt index ac50164672..7d11b0bde6 100644 --- a/DeepResearchAgent/requirements.txt +++ b/DeepResearchAgent/requirements.txt @@ -1 +1,394 @@ -open-deep-research==0.0.16 +# This file was autogenerated by uv via the following command: +# uv pip compile ./DeepResearchAgent/requirements.in --universal -o ./DeepResearchAgent/requirements.txt +aiofiles==24.1.0 + # via daytona +aiohappyeyeballs==2.6.1 + # via aiohttp +aiohttp==3.13.2 + # via + # aiohttp-retry + # daytona-api-client-async + # daytona-toolbox-api-client-async + # langchain-tavily + # tavily +aiohttp-retry==2.9.1 + # via + # daytona-api-client-async + # daytona-toolbox-api-client-async +aiosignal==1.4.0 + # via aiohttp +annotated-types==0.7.0 + # via pydantic +anthropic==0.74.0 + # via langchain-anthropic +anyio==4.11.0 + # via + # anthropic + # httpx + # openai + # runloop-api-client + # sse-starlette + # starlette + # watchfiles +attrs==25.4.0 + # via aiohttp +beautifulsoup4==4.14.2 + # via markdownify +blockbuster==1.5.25 + # via langgraph-runtime-inmem +bracex==2.6 + # via wcmatch +certifi==2025.11.12 + # via + # httpcore + # httpx + # requests +cffi==2.0.0 ; platform_python_implementation != 'PyPy' + # via cryptography +charset-normalizer==3.4.4 + # via requests +click==8.3.1 + # via + # langgraph-cli + # uvicorn +cloudpickle==3.1.2 + # via langgraph-api +colorama==0.4.6 ; sys_platform == 'win32' + # via + # click + # tqdm +cryptography==44.0.3 + # via langgraph-api +daytona==0.115.0 + # via deepagents +daytona-api-client==0.115.0 + # via daytona +daytona-api-client-async==0.115.0 + # via daytona +daytona-toolbox-api-client==0.115.0 + # via daytona +daytona-toolbox-api-client-async==0.115.0 + # via daytona +deepagents==0.2.7 + # via -r ./DeepResearchAgent/requirements.in +deprecated==1.3.1 + # via daytona +distro==1.9.0 + # via + # anthropic + # openai + # runloop-api-client +docstring-parser==0.17.0 + # via anthropic +environs==14.5.0 + # via daytona +forbiddenfruit==0.1.4 ; implementation_name == 'cpython' + # via blockbuster +frozenlist==1.8.0 + # via + # aiohttp + # aiosignal +googleapis-common-protos==1.72.0 + # via opentelemetry-exporter-otlp-proto-http +grpcio==1.76.0 + # via + # grpcio-tools + # langgraph-api +grpcio-tools==1.75.1 + # via langgraph-api +h11==0.16.0 + # via + # httpcore + # uvicorn +httpcore==1.0.9 + # via httpx +httpx==0.28.1 + # via + # -r ./DeepResearchAgent/requirements.in + # anthropic + # daytona + # langgraph-api + # langgraph-sdk + # langsmith + # openai + # runloop-api-client + # tavily-python +idna==3.11 + # via + # anyio + # httpx + # requests + # yarl +importlib-metadata==8.7.0 + # via opentelemetry-api +jiter==0.12.0 + # via + # anthropic + # openai +jsonpatch==1.33 + # via langchain-core +jsonpointer==3.0.0 + # via jsonpatch +jsonschema-rs==0.29.1 + # via langgraph-api +langchain==1.0.7 + # via + # deepagents + # langchain-tavily +langchain-anthropic==1.1.0 + # via deepagents +langchain-core==1.0.5 + # via + # deepagents + # langchain + # langchain-anthropic + # langchain-openai + # langchain-tavily + # langgraph + # langgraph-api + # langgraph-checkpoint + # langgraph-prebuilt +langchain-openai==1.0.3 + # via -r ./DeepResearchAgent/requirements.in +langchain-tavily==0.2.13 + # via -r ./DeepResearchAgent/requirements.in +langgraph==1.0.3 + # via + # langchain + # langgraph-api + # langgraph-runtime-inmem +langgraph-api==0.5.16 + # via langgraph-cli +langgraph-checkpoint==3.0.1 + # via + # langgraph + # langgraph-api + # langgraph-prebuilt + # langgraph-runtime-inmem +langgraph-cli==0.4.7 + # via -r ./DeepResearchAgent/requirements.in +langgraph-prebuilt==1.0.4 + # via langgraph +langgraph-runtime-inmem==0.18.0 + # via + # langgraph-api + # langgraph-cli +langgraph-sdk==0.2.9 + # via + # langgraph + # langgraph-api + # langgraph-cli +langsmith==0.4.43 + # via + # langchain-core + # langgraph-api +markdown-it-py==4.0.0 + # via rich +markdownify==1.2.2 + # via -r ./DeepResearchAgent/requirements.in +marshmallow==4.1.0 + # via environs +mdurl==0.1.2 + # via markdown-it-py +multidict==6.7.0 + # via + # aiohttp + # yarl +multipart==1.3.0 + # via daytona +obstore==0.7.3 + # via daytona +openai==2.8.1 + # via langchain-openai +opentelemetry-api==1.38.0 + # via + # langgraph-api + # opentelemetry-exporter-otlp-proto-http + # opentelemetry-sdk + # opentelemetry-semantic-conventions +opentelemetry-exporter-otlp-proto-common==1.38.0 + # via opentelemetry-exporter-otlp-proto-http +opentelemetry-exporter-otlp-proto-http==1.38.0 + # via langgraph-api +opentelemetry-proto==1.38.0 + # via + # opentelemetry-exporter-otlp-proto-common + # opentelemetry-exporter-otlp-proto-http +opentelemetry-sdk==1.38.0 + # via + # langgraph-api + # opentelemetry-exporter-otlp-proto-http +opentelemetry-semantic-conventions==0.59b0 + # via opentelemetry-sdk +orjson==3.11.4 + # via + # langgraph-api + # langgraph-sdk + # langsmith +ormsgpack==1.12.0 + # via langgraph-checkpoint +packaging==25.0 + # via + # langchain-core + # langsmith +propcache==0.4.1 + # via + # aiohttp + # yarl +protobuf==6.33.1 + # via + # googleapis-common-protos + # grpcio-tools + # langgraph-api + # opentelemetry-proto +pycparser==2.23 ; implementation_name != 'PyPy' and platform_python_implementation != 'PyPy' + # via cffi +pydantic==2.12.4 + # via + # anthropic + # daytona + # daytona-api-client + # daytona-api-client-async + # daytona-toolbox-api-client + # daytona-toolbox-api-client-async + # langchain + # langchain-anthropic + # langchain-core + # langgraph + # langsmith + # openai + # runloop-api-client +pydantic-core==2.41.5 + # via pydantic +pygments==2.19.2 + # via rich +pyjwt==2.10.1 + # via langgraph-api +python-dateutil==2.9.0.post0 + # via + # daytona-api-client + # daytona-api-client-async + # daytona-toolbox-api-client + # daytona-toolbox-api-client-async +python-dotenv==1.2.1 + # via + # environs + # langgraph-cli +pyyaml==6.0.3 + # via langchain-core +regex==2025.11.3 + # via tiktoken +requests==2.32.5 + # via + # langchain-tavily + # langsmith + # opentelemetry-exporter-otlp-proto-http + # requests-toolbelt + # tavily + # tavily-python + # tiktoken +requests-toolbelt==1.0.0 + # via langsmith +rich==14.2.0 + # via -r ./DeepResearchAgent/requirements.in +runloop-api-client==0.68.0 + # via deepagents +setuptools==80.9.0 + # via grpcio-tools +six==1.17.0 + # via + # markdownify + # python-dateutil +sniffio==1.3.1 + # via + # anthropic + # anyio + # openai + # runloop-api-client +soupsieve==2.8 + # via beautifulsoup4 +sse-starlette==2.1.3 + # via + # langgraph-api + # langgraph-runtime-inmem +starlette==0.50.0 + # via + # langgraph-api + # langgraph-runtime-inmem + # sse-starlette +structlog==25.5.0 + # via + # langgraph-api + # langgraph-runtime-inmem +tavily==1.1.0 + # via deepagents +tavily-python==0.7.13 + # via -r ./DeepResearchAgent/requirements.in +tenacity==9.1.2 + # via + # langchain-core + # langgraph-api +tiktoken==0.12.0 + # via + # langchain-openai + # tavily-python +toml==0.10.2 + # via daytona +tqdm==4.67.1 + # via openai +truststore==0.10.4 + # via langgraph-api +typing-extensions==4.15.0 + # via + # aiosignal + # anthropic + # anyio + # beautifulsoup4 + # daytona-api-client + # daytona-api-client-async + # daytona-toolbox-api-client + # daytona-toolbox-api-client-async + # grpcio + # langchain-core + # obstore + # openai + # opentelemetry-api + # opentelemetry-exporter-otlp-proto-http + # opentelemetry-sdk + # opentelemetry-semantic-conventions + # pydantic + # pydantic-core + # runloop-api-client + # starlette + # typing-inspection +typing-inspection==0.4.2 + # via pydantic +urllib3==2.5.0 + # via + # daytona-api-client + # daytona-api-client-async + # daytona-toolbox-api-client + # daytona-toolbox-api-client-async + # requests +uuid-utils==0.11.1 + # via runloop-api-client +uvicorn==0.38.0 + # via + # langgraph-api + # sse-starlette +watchfiles==1.1.1 + # via langgraph-api +wcmatch==10.1 + # via deepagents +websockets==15.0.1 + # via daytona +wrapt==2.0.1 + # via deprecated +xxhash==3.6.0 + # via langgraph +yarl==1.22.0 + # via aiohttp +zipp==3.23.0 + # via importlib-metadata +zstandard==0.25.0 + # via langsmith diff --git a/DeepResearchAgent/research_agent.py b/DeepResearchAgent/research_agent.py index 5964b82853..beb0e77b7f 100644 --- a/DeepResearchAgent/research_agent.py +++ b/DeepResearchAgent/research_agent.py @@ -1,19 +1,19 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -import argparse -import json +import logging import os -import re from typing import List, Union -from comps import opea_microservices, register_microservice +from agent_factory import create_agent +from comps import CustomLogger, opea_microservices, register_microservice from comps.cores.telemetry.opea_telemetry import opea_telemetry from pydantic import BaseModel -from utils import create_agent +from research_agents.deepagents.utils import format_message -config_path = os.path.join(os.path.dirname(__file__), "deep_researcher.yaml") -agent = create_agent(config_path) +logger = CustomLogger(__name__) +log_level = logging.DEBUG if os.getenv("LOGFLAG", "").lower() == "true" else logging.INFO +logging.basicConfig(level=log_level, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s") class SimpleRequest(BaseModel): @@ -29,11 +29,27 @@ class SimpleRequest(BaseModel): @opea_telemetry async def run(request: SimpleRequest): - question = f"Question: {request.question}" - - result = await agent(question) - - return {"answer": result} + logger.debug(f"Received question: {request.question}") + + logger.debug("Creating DeepAgents research agent...") + agent = create_agent(impl="DeepAgents") + + logger.debug("Invoking agent with the provided question...") + result = agent.invoke( + { + "messages": [ + { + "role": "user", + "content": f"Question: {request.question}", + } + ], + }, + ) + logger.debug("Agent invocation completed.") + if os.getenv("LOGFLAG", "").lower() == "true": + format_message(result["messages"]) + + return {"answer": result["messages"][-1].content} if __name__ == "__main__": diff --git a/DeepResearchAgent/research_agents/deepagents/README.md b/DeepResearchAgent/research_agents/deepagents/README.md new file mode 100644 index 0000000000..5b0c279d94 --- /dev/null +++ b/DeepResearchAgent/research_agents/deepagents/README.md @@ -0,0 +1,3 @@ +# Deep Research Agent of DeepAgents + +The code is from LangChain [DeepAgents](https://github.com/langchain-ai/deepagents-quickstarts/tree/main/deep_research). diff --git a/DeepResearchAgent/research_agents/deepagents/prompts.py b/DeepResearchAgent/research_agents/deepagents/prompts.py new file mode 100644 index 0000000000..10451a8265 --- /dev/null +++ b/DeepResearchAgent/research_agents/deepagents/prompts.py @@ -0,0 +1,175 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +"""Prompt templates and tool descriptions for the research deepagent.""" + +RESEARCH_WORKFLOW_INSTRUCTIONS = """# Research Workflow + +Follow this workflow for all research requests: + +1. **Plan**: Create a todo list with write_todos to break down the research into focused tasks +2. **Save the request**: Use write_file() to save the user's research question to `/research_request.md` +3. **Research**: Delegate research tasks to sub-agents using the task() tool - ALWAYS use sub-agents for research, never conduct research yourself +4. **Synthesize**: Review all sub-agent findings and consolidate citations (each unique URL gets one number across all findings) +5. **Write Report**: Write a comprehensive final report to `/final_report.md` (see Report Writing Guidelines below) +6. **Verify**: Read `/research_request.md` and confirm you've addressed all aspects with proper citations and structure + +## Research Planning Guidelines +- Batch similar research tasks into a single TODO to minimize overhead +- For simple fact-finding questions, use 1 sub-agent +- For comparisons or multi-faceted topics, delegate to multiple parallel sub-agents +- Each sub-agent should research one specific aspect and return findings + +## Report Writing Guidelines + +When writing the final report to `/final_report.md`, follow these structure patterns: + +**For comparisons:** +1. Introduction +2. Overview of topic A +3. Overview of topic B +4. Detailed comparison +5. Conclusion + +**For lists/rankings:** +Simply list items with details - no introduction needed: +1. Item 1 with explanation +2. Item 2 with explanation +3. Item 3 with explanation + +**For summaries/overviews:** +1. Overview of topic +2. Key concept 1 +3. Key concept 2 +4. Key concept 3 +5. Conclusion + +**General guidelines:** +- Use clear section headings (## for sections, ### for subsections) +- Write in paragraph form by default - be text-heavy, not just bullet points +- Do NOT use self-referential language ("I found...", "I researched...") +- Write as a professional report without meta-commentary +- Each section should be comprehensive and detailed +- Use bullet points only when listing is more appropriate than prose + +**Citation format:** +- Cite sources inline using [1], [2], [3] format +- Assign each unique URL a single citation number across ALL sub-agent findings +- End report with ### Sources section listing each numbered source +- Number sources sequentially without gaps (1,2,3,4...) +- Format: [1] Source Title: URL (each on separate line for proper list rendering) +- Example: + + Some important finding [1]. Another key insight [2]. + + ### Sources + [1] AI Research Paper: https://example.com/paper + [2] Industry Analysis: https://example.com/analysis +""" + +RESEARCHER_INSTRUCTIONS = """You are a research assistant conducting research on the user's input topic. For context, today's date is {date}. + + +Your job is to use tools to gather information about the user's input topic. +You can use any of the research tools provided to you to find resources that can help answer the research question. +You can call these tools in series or in parallel, your research is conducted in a tool-calling loop. + + + +You have access to two specific research tools: +1. **tavily_search**: For conducting web searches to gather information +2. **think_tool**: For reflection and strategic planning during research +**CRITICAL: Use think_tool after each search to reflect on results and plan next steps** + + + +Think like a human researcher with limited time. Follow these steps: + +1. **Read the question carefully** - What specific information does the user need? +2. **Start with broader searches** - Use broad, comprehensive queries first +3. **After each search, pause and assess** - Do I have enough to answer? What's still missing? +4. **Execute narrower searches as you gather information** - Fill in the gaps +5. **Stop when you can answer confidently** - Don't keep searching for perfection + + + +**Tool Call Budgets** (Prevent excessive searching): +- **Simple queries**: Use 2-3 search tool calls maximum +- **Complex queries**: Use up to 5 search tool calls maximum +- **Always stop**: After 5 search tool calls if you cannot find the right sources + +**Stop Immediately When**: +- You can answer the user's question comprehensively +- You have 3+ relevant examples/sources for the question +- Your last 2 searches returned similar information + + + +After each search tool call, use think_tool to analyze the results: +- What key information did I find? +- What's missing? +- Do I have enough to answer the question comprehensively? +- Should I search more or provide my answer? + + + +When providing your findings back to the orchestrator: + +1. **Structure your response**: Organize findings with clear headings and detailed explanations +2. **Cite sources inline**: Use [1], [2], [3] format when referencing information from your searches +3. **Include Sources section**: End with ### Sources listing each numbered source with title and URL + +Example: +``` +## Key Findings + +Context engineering is a critical technique for AI agents [1]. Studies show that proper context management can improve performance by 40% [2]. + +### Sources +[1] Context Engineering Guide: https://example.com/context-guide +[2] AI Performance Study: https://example.com/study +``` + +The orchestrator will consolidate citations from all sub-agents into the final report. + +""" + +TASK_DESCRIPTION_PREFIX = """Delegate a task to a specialized sub-agent with isolated context. Available agents for delegation are: +{other_agents} +""" + +SUBAGENT_DELEGATION_INSTRUCTIONS = """# Sub-Agent Research Coordination + +Your role is to coordinate research by delegating tasks from your TODO list to specialized research sub-agents. + +## Delegation Strategy + +**DEFAULT: Start with 1 sub-agent** for most queries: +- "What is quantum computing?" → 1 sub-agent (general overview) +- "List the top 10 coffee shops in San Francisco" → 1 sub-agent +- "Summarize the history of the internet" → 1 sub-agent +- "Research context engineering for AI agents" → 1 sub-agent (covers all aspects) + +**ONLY parallelize when the query EXPLICITLY requires comparison or has clearly independent aspects:** + +**Explicit comparisons** → 1 sub-agent per element: +- "Compare OpenAI vs Anthropic vs DeepMind AI safety approaches" → 3 parallel sub-agents +- "Compare Python vs JavaScript for web development" → 2 parallel sub-agents + +**Clearly separated aspects** → 1 sub-agent per aspect (use sparingly): +- "Research renewable energy adoption in Europe, Asia, and North America" → 3 parallel sub-agents (geographic separation) +- Only use this pattern when aspects cannot be covered efficiently by a single comprehensive search + +## Key Principles +- **Bias towards single sub-agent**: One comprehensive research task is more token-efficient than multiple narrow ones +- **Avoid premature decomposition**: Don't break "research X" into "research X overview", "research X techniques", "research X applications" - just use 1 sub-agent for all of X +- **Parallelize only for clear comparisons**: Use multiple sub-agents when comparing distinct entities or geographically separated data + +## Parallel Execution Limits +- Use at most {max_concurrent_research_units} parallel sub-agents per iteration +- Make multiple task() calls in a single response to enable parallel execution +- Each sub-agent returns findings independently + +## Research Limits +- Stop after {max_researcher_iterations} delegation rounds if you haven't found adequate sources +- Stop when you have sufficient information to answer comprehensively +- Bias towards focused research over exhaustive exploration""" diff --git a/DeepResearchAgent/research_agents/deepagents/tools.py b/DeepResearchAgent/research_agents/deepagents/tools.py new file mode 100644 index 0000000000..6a6ca25c05 --- /dev/null +++ b/DeepResearchAgent/research_agents/deepagents/tools.py @@ -0,0 +1,116 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +"""Research Tools. + +This module provides search and content processing utilities for the research agent, +using Tavily for URL discovery and fetching full webpage content. +""" + +import httpx +from langchain_core.tools import InjectedToolArg, tool +from markdownify import markdownify +from tavily import TavilyClient +from typing_extensions import Annotated, Literal + +tavily_client = TavilyClient() + + +def fetch_webpage_content(url: str, timeout: float = 10.0) -> str: + """Fetch and convert webpage content to markdown. + + Args: + url: URL to fetch + timeout: Request timeout in seconds + + Returns: + Webpage content as markdown + """ + headers = { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36" + } + + try: + response = httpx.get(url, headers=headers, timeout=timeout) + response.raise_for_status() + return markdownify(response.text) + except Exception as e: + return f"Error fetching content from {url}: {str(e)}" + + +@tool(parse_docstring=True) +def tavily_search( + query: str, + max_results: Annotated[int, InjectedToolArg] = 1, + topic: Annotated[Literal["general", "news", "finance"], InjectedToolArg] = "general", +) -> str: + """Search the web for information on a given query. + + Uses Tavily to discover relevant URLs, then fetches and returns full webpage content as markdown. + + Args: + query: Search query to execute + max_results: Maximum number of results to return (default: 1) + topic: Topic filter - 'general', 'news', or 'finance' (default: 'general') + + Returns: + Formatted search results with full webpage content + """ + # Use Tavily to discover URLs + search_results = tavily_client.search( + query, + max_results=max_results, + topic=topic, + ) + + # Fetch full content for each URL + result_texts = [] + for result in search_results.get("results", []): + url = result["url"] + title = result["title"] + + # Fetch webpage content + content = fetch_webpage_content(url) + + result_text = f"""## {title} +**URL:** {url} + +{content} + +--- +""" + result_texts.append(result_text) + + # Format final response + response = f"""🔍 Found {len(result_texts)} result(s) for '{query}': + +{chr(10).join(result_texts)}""" + + return response + + +@tool(parse_docstring=True) +def think_tool(reflection: str) -> str: + """Tool for strategic reflection on research progress and decision-making. + + Use this tool after each search to analyze results and plan next steps systematically. + This creates a deliberate pause in the research workflow for quality decision-making. + + When to use: + - After receiving search results: What key information did I find? + - Before deciding next steps: Do I have enough to answer comprehensively? + - When assessing research gaps: What specific information am I still missing? + - Before concluding research: Can I provide a complete answer now? + + Reflection should address: + 1. Analysis of current findings - What concrete information have I gathered? + 2. Gap assessment - What crucial information is still missing? + 3. Quality evaluation - Do I have sufficient evidence/examples for a good answer? + 4. Strategic decision - Should I continue searching or provide my answer? + + Args: + reflection: Your detailed reflection on research progress, findings, gaps, and next steps + + Returns: + Confirmation that reflection was recorded for decision-making + """ + return f"Reflection recorded: {reflection}" diff --git a/DeepResearchAgent/research_agents/deepagents/utils.py b/DeepResearchAgent/research_agents/deepagents/utils.py new file mode 100644 index 0000000000..3a103e244f --- /dev/null +++ b/DeepResearchAgent/research_agents/deepagents/utils.py @@ -0,0 +1,88 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +"""Utility functions for displaying messages and prompts in Jupyter notebooks.""" + +import json + +from rich.console import Console +from rich.panel import Panel +from rich.text import Text + +console = Console() + + +def format_message_content(message): + """Convert message content to displayable string.""" + parts = [] + tool_calls_processed = False + + # Handle main content + if isinstance(message.content, str): + parts.append(message.content) + elif isinstance(message.content, list): + # Handle complex content like tool calls (Anthropic format) + for item in message.content: + if item.get("type") == "text": + parts.append(item["text"]) + elif item.get("type") == "tool_use": + parts.append(f"\n🔧 Tool Call: {item['name']}") + parts.append(f" Args: {json.dumps(item['input'], indent=2)}") + parts.append(f" ID: {item.get('id', 'N/A')}") + tool_calls_processed = True + else: + parts.append(str(message.content)) + + # Handle tool calls attached to the message (OpenAI format) - only if not already processed + if not tool_calls_processed and hasattr(message, "tool_calls") and message.tool_calls: + for tool_call in message.tool_calls: + parts.append(f"\n🔧 Tool Call: {tool_call['name']}") + parts.append(f" Args: {json.dumps(tool_call['args'], indent=2)}") + parts.append(f" ID: {tool_call['id']}") + + return "\n".join(parts) + + +def format_messages(messages): + """Format and display a list of messages with Rich formatting.""" + for m in messages: + msg_type = m.__class__.__name__.replace("Message", "") + content = format_message_content(m) + + if msg_type == "Human": + console.print(Panel(content, title="🧑 Human", border_style="blue")) + elif msg_type == "Ai": + console.print(Panel(content, title="🤖 Assistant", border_style="green")) + elif msg_type == "Tool": + console.print(Panel(content, title="🔧 Tool Output", border_style="yellow")) + else: + console.print(Panel(content, title=f"📝 {msg_type}", border_style="white")) + + +def format_message(messages): + """Alias for format_messages for backward compatibility.""" + return format_messages(messages) + + +def show_prompt(prompt_text: str, title: str = "Prompt", border_style: str = "blue"): + """Display a prompt with rich formatting and XML tag highlighting. + + Args: + prompt_text: The prompt string to display + title: Title for the panel (default: "Prompt") + border_style: Border color style (default: "blue") + """ + # Create a formatted display of the prompt + formatted_text = Text(prompt_text) + formatted_text.highlight_regex(r"<[^>]+>", style="bold blue") # Highlight XML tags + formatted_text.highlight_regex(r"##[^#\n]+", style="bold magenta") # Highlight headers + formatted_text.highlight_regex(r"###[^#\n]+", style="bold cyan") # Highlight sub-headers + + # Display in a panel for better presentation + console.print( + Panel( + formatted_text, + title=f"[bold green]{title}[/bold green]", + border_style=border_style, + padding=(1, 2), + ) + ) diff --git a/DeepResearchAgent/tests/test_compose_on_gaudi.sh b/DeepResearchAgent/tests/test_compose_on_gaudi.sh index e76a66b9cc..2d5d48e702 100644 --- a/DeepResearchAgent/tests/test_compose_on_gaudi.sh +++ b/DeepResearchAgent/tests/test_compose_on_gaudi.sh @@ -64,7 +64,7 @@ function validate_service() { local CONTENT=$(curl -s -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log) - if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then + if echo "$CONTENT" | grep -iq "$EXPECTED_RESULT"; then echo "[ $SERVICE_NAME ] Content is as expected." else echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT" @@ -84,7 +84,7 @@ function validate_microservices() { validate_service \ "${ip_address}:8022/v1/deep_research_agent" \ - "deep" \ + "deep" \ "deep-research-agent" \ "deep-research-agent-server" \ '{"question": "what is the deep learning?"}' diff --git a/DeepResearchAgent/utils.py b/DeepResearchAgent/utils.py deleted file mode 100644 index 036747bbeb..0000000000 --- a/DeepResearchAgent/utils.py +++ /dev/null @@ -1,90 +0,0 @@ -# Copyright (C) 2025 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -from typing import Any - -import yaml - - -def load_config(config_path: str): - with open(config_path, "r") as file: - return yaml.safe_load(file) - - -def create_agent(config: str) -> Any: - - config_dict = load_config(config) - - agent_config = config_dict.get("agent") - agent_type = agent_config.pop("type") - - try: - import uuid - - from langgraph.checkpoint.memory import MemorySaver - from langgraph.types import Command - - # from open_deep_research.graph import builder - # TODO - from legacy.graph import builder - except ImportError as e: - raise ImportError( - f"Failed to import required modules for langchain deep researcher: {e}. Make sure langgraph and open_deep_research are installed. Also make sure that the benchmark directory is in your path. Also, you might need to install the with-open-deep-research extra dependencies (see README.md)." - ) - - memory = MemorySaver() - graph = builder.compile(checkpointer=memory) - - REPORT_STRUCTURE = """Use this structure to create a report on the user-provided topic: - - 1. Introduction (no research needed) - - Brief overview of the topic area - - 2. Main Body Sections: - - Each section should focus on a sub-topic of the user-provided topic - - 3. Conclusion - - Aim for 1 structural element (either a list of table) that distills the main body sections - - Provide a concise summary of the report""" - - # Extract configuration parameters - search_api = agent_config.get("search_api", "tavily") - planner_provider = agent_config.get("planner_provider") - planner_model = agent_config.get("planner_model") - planner_endpoint = agent_config.get("planner_endpoint") - writer_provider = agent_config.get("writer_provider") - writer_model = agent_config.get("writer_model") - writer_endpoint = agent_config.get("writer_endpoint") - max_search_depth = agent_config.get("max_search_depth", 3) - - async def langchain_wrapper(goal: str): - thread = { - "configurable": { - "thread_id": str(uuid.uuid4()), - "search_api": search_api, - "planner_provider": planner_provider, - "planner_model": planner_model, - "writer_provider": writer_provider, - "writer_model": writer_model, - "max_search_depth": max_search_depth, - "report_structure": REPORT_STRUCTURE, - } - } - - # NOTE: add research prompt to the goal for robust benchmarking purposes - goal = goal + " You must perform in-depth research to answer the question." - - results = [] - - async for event in graph.astream({"topic": goal}, thread, stream_mode="updates"): - results.append(event) - - async for event in graph.astream(Command(resume=True), thread, stream_mode="updates"): - results.append(event) - - final_state = graph.get_state(thread) - report = final_state.values.get("final_report") - - return report - - return langchain_wrapper