From 0e9830087a8ebf93b7cc06cfaa677b1b12225819 Mon Sep 17 00:00:00 2001 From: minmin-intel Date: Tue, 18 Feb 2025 19:46:23 +0000 Subject: [PATCH 1/4] allow passing k and top_n to retrieval megaservice Signed-off-by: minmin-intel --- .../intel/cpu/xeon/compose.yaml | 6 ++ DocIndexRetriever/retrieval_tool.py | 56 ++++++++++++------- DocIndexRetriever/tests/test.py | 42 ++++++++++++++ 3 files changed, 85 insertions(+), 19 deletions(-) create mode 100644 DocIndexRetriever/tests/test.py diff --git a/DocIndexRetriever/docker_compose/intel/cpu/xeon/compose.yaml b/DocIndexRetriever/docker_compose/intel/cpu/xeon/compose.yaml index d4bfe0446f..9624df7300 100644 --- a/DocIndexRetriever/docker_compose/intel/cpu/xeon/compose.yaml +++ b/DocIndexRetriever/docker_compose/intel/cpu/xeon/compose.yaml @@ -13,6 +13,8 @@ services: dataprep-redis-service: image: ${REGISTRY:-opea}/dataprep:${TAG:-latest} container_name: dataprep-redis-server + # volumes: + # - $WORKDIR/GenAIExamples/DocIndexRetriever/docker_image_build/GenAIComps/comps:/home/user/comps depends_on: - redis-vector-db ports: @@ -52,6 +54,8 @@ services: embedding: image: ${REGISTRY:-opea}/embedding:${TAG:-latest} container_name: embedding-server + # volumes: + # - $WORKDIR/GenAIExamples/DocIndexRetriever/docker_image_build/GenAIComps/comps:/home/comps ports: - "6000:6000" ipc: host @@ -110,6 +114,8 @@ services: reranking: image: ${REGISTRY:-opea}/reranking:${TAG:-latest} container_name: reranking-tei-xeon-server + # volumes: + # - $WORKDIR/GenAIExamples/DocIndexRetriever/docker_image_build/GenAIComps/comps:/home/user/comps depends_on: tei-reranking-service: condition: service_healthy diff --git a/DocIndexRetriever/retrieval_tool.py b/DocIndexRetriever/retrieval_tool.py index b627f45537..26a7759251 100644 --- a/DocIndexRetriever/retrieval_tool.py +++ b/DocIndexRetriever/retrieval_tool.py @@ -22,16 +22,38 @@ def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **kwargs): - if self.services[cur_node].service_type == ServiceType.EMBEDDING: - inputs["input"] = inputs["text"] - del inputs["text"] + print(f"Inputs to {cur_node}: {inputs}") + for key, value in kwargs.items(): + print(f"{key}: {value}") return inputs def align_outputs(self, data, cur_node, inputs, runtime_graph, llm_parameters_dict, **kwargs): next_data = {} if self.services[cur_node].service_type == ServiceType.EMBEDDING: - next_data = {"text": inputs["input"], "embedding": [item["embedding"] for item in data["data"]]} + # turn into chat completion request + # next_data = {"text": inputs["input"], "embedding": [item["embedding"] for item in data["data"]]} + print("Assembing output from Embedding for next node...") + print("Inputs to Embedding: ", inputs) + print("Keyword arguments: ") + for key, value in kwargs.items(): + print(f"{key}: {value}") + + next_data = { + "input": inputs["input"], + "messages": inputs["input"], + "embedding": data, #[item["embedding"] for item in data["data"]], + "k": kwargs["k"] if "k" in kwargs else 4, + "search_type": kwargs["search_type"] if "search_type" in kwargs else "similarity", + "distance_threshold": kwargs["distance_threshold"] if "distance_threshold" in kwargs else None, + "fetch_k": kwargs["fetch_k"] if "fetch_k" in kwargs else 20, + "lambda_mult": kwargs["lambda_mult"] if "lambda_mult" in kwargs else 0.5, + "score_threshold": kwargs["score_threshold"] if "score_threshold" in kwargs else 0.2, + "top_n": kwargs["top_n"] if "top_n" in kwargs else 1, + } + + print("Output from Embedding for next node:\n", next_data) + else: next_data = data @@ -99,18 +121,6 @@ def parser_input(data, TypeClass, key): raise ValueError(f"Unknown request type: {data}") if isinstance(chat_request, ChatCompletionRequest): - retriever_parameters = RetrieverParms( - search_type=chat_request.search_type if chat_request.search_type else "similarity", - k=chat_request.k if chat_request.k else 4, - distance_threshold=chat_request.distance_threshold if chat_request.distance_threshold else None, - fetch_k=chat_request.fetch_k if chat_request.fetch_k else 20, - lambda_mult=chat_request.lambda_mult if chat_request.lambda_mult else 0.5, - score_threshold=chat_request.score_threshold if chat_request.score_threshold else 0.2, - ) - reranker_parameters = RerankerParms( - top_n=chat_request.top_n if chat_request.top_n else 1, - ) - initial_inputs = { "messages": query, "input": query, # has to be input due to embedding expects either input or text @@ -123,13 +133,21 @@ def parser_input(data, TypeClass, key): "top_n": chat_request.top_n if chat_request.top_n else 1, } + kwargs = { + "search_type": chat_request.search_type if chat_request.search_type else "similarity", + "k": chat_request.k if chat_request.k else 4, + "distance_threshold": chat_request.distance_threshold if chat_request.distance_threshold else None, + "fetch_k": chat_request.fetch_k if chat_request.fetch_k else 20, + "lambda_mult": chat_request.lambda_mult if chat_request.lambda_mult else 0.5, + "score_threshold": chat_request.score_threshold if chat_request.score_threshold else 0.2, + "top_n": chat_request.top_n if chat_request.top_n else 1, + } result_dict, runtime_graph = await self.megaservice.schedule( initial_inputs=initial_inputs, - retriever_parameters=retriever_parameters, - reranker_parameters=reranker_parameters, + **kwargs, ) else: - result_dict, runtime_graph = await self.megaservice.schedule(initial_inputs={"text": query}) + result_dict, runtime_graph = await self.megaservice.schedule(initial_inputs={"input": query}) last_node = runtime_graph.all_leaves()[-1] response = result_dict[last_node] diff --git a/DocIndexRetriever/tests/test.py b/DocIndexRetriever/tests/test.py new file mode 100644 index 0000000000..e655073ddb --- /dev/null +++ b/DocIndexRetriever/tests/test.py @@ -0,0 +1,42 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import os + +import requests + + +def search_knowledge_base(query: str) -> str: + """Search the knowledge base for a specific query.""" + url = os.environ.get("RETRIEVAL_TOOL_URL") + print(url) + proxies = {"http": ""} + payload = { + "messages": query, + "k":5, + "top_n": 2 + } + response = requests.post(url, json=payload, proxies=proxies) + print(response) + if "documents" in response.json(): + docs = response.json()["documents"] + context = "" + for i, doc in enumerate(docs): + context += f"Doc[{i+1}]:\n{doc}\n" + return context + elif "text" in response.json(): + return response.json()["text"] + elif "reranked_docs" in response.json(): + docs = response.json()["reranked_docs"] + context = "" + for i, doc in enumerate(docs): + context += f"Doc[{i+1}]:\n{doc}\n" + return context + else: + return "Error parsing response from the knowledge base." + + +if __name__ == "__main__": + resp = search_knowledge_base("What is OPEA?") + # resp = search_knowledge_base("Thriller") + print(resp) \ No newline at end of file From 2e0ee2cf5f570cb0b6358c7582557997f4c43c98 Mon Sep 17 00:00:00 2001 From: minmin-intel Date: Tue, 18 Feb 2025 22:28:15 +0000 Subject: [PATCH 2/4] update agent example Signed-off-by: minmin-intel --- AgentQnA/README.md | 44 ++++------- .../intel/cpu/xeon/compose_openai.yaml | 11 ++- .../cpu/xeon/launch_agent_service_openai.sh | 4 +- .../intel/hpu/gaudi/compose.yaml | 5 +- .../hpu/gaudi/launch_agent_service_gaudi.sh | 2 +- ... step4_launch_and_validate_agent_gaudi.sh} | 56 ++++--------- AgentQnA/tests/test.py | 79 ++++++++++++------- AgentQnA/tests/test_compose_on_gaudi.sh | 2 +- 8 files changed, 96 insertions(+), 107 deletions(-) rename AgentQnA/tests/{step4_launch_and_validate_agent_tgi.sh => step4_launch_and_validate_agent_gaudi.sh} (84%) diff --git a/AgentQnA/README.md b/AgentQnA/README.md index d45b14ef55..8e77f2f1a6 100644 --- a/AgentQnA/README.md +++ b/AgentQnA/README.md @@ -84,7 +84,7 @@ flowchart LR 3. Hierarchical multi-agents can improve performance. Expert worker agents, such as RAG agent and SQL agent, can provide high-quality output for different aspects of a complex query, and the supervisor agent can aggregate the information together to provide a comprehensive answer. If we only use one agent and provide all the tools to this single agent, it may get overwhelmed and not able to provide accurate answers. -## Deployment with docker +## Deploy with docker 1. Build agent docker image [Optional] @@ -217,13 +217,19 @@ docker build -t opea/agent:latest --build-arg https_proxy=$https_proxy --build-a ::: :::: +## Deploy AgentQnA UI + +The AgentQnA UI can be deployed locally or using Docker. + +For detailed instructions on deploying AgentQnA UI, refer to the [AgentQnA UI Guide](./ui/svelte/README.md). + ## Deploy using Helm Chart Refer to the [AgentQnA helm chart](./kubernetes/helm/README.md) for instructions on deploying AgentQnA on Kubernetes. ## Validate services -First look at logs of the agent docker containers: +1. First look at logs of the agent docker containers: ``` # worker RAG agent @@ -240,35 +246,17 @@ docker logs react-agent-endpoint You should see something like "HTTP server setup successful" if the docker containers are started successfully.

-Second, validate worker RAG agent: - -``` -curl http://${host_ip}:9095/v1/chat/completions -X POST -H "Content-Type: application/json" -d '{ - "messages": "Michael Jackson song Thriller" - }' -``` - -Third, validate worker SQL agent: +2. You can use python to validate the agent system +```bash +# RAG worker agent +python tests/test.py --prompt "Tell me about Michael Jackson song Thriller" --agent_role "worker" --ext_port 9095 -``` -curl http://${host_ip}:9096/v1/chat/completions -X POST -H "Content-Type: application/json" -d '{ - "messages": "How many employees are in the company" - }' -``` - -Finally, validate supervisor agent: +# SQL agent +python tests/test.py --prompt "How many employees in company" --agent_role "worker" --ext_port 9096 +# supervisor agent: this will test a two-turn conversation +python tests/test.py --agent_role "supervisor" --ext_port 9090 ``` -curl http://${host_ip}:9090/v1/chat/completions -X POST -H "Content-Type: application/json" -d '{ - "messages": "How many albums does Iron Maiden have?" - }' -``` - -## Deploy AgentQnA UI - -The AgentQnA UI can be deployed locally or using Docker. - -For detailed instructions on deploying AgentQnA UI, refer to the [AgentQnA UI Guide](./ui/svelte/README.md). ## How to register your own tools with agent diff --git a/AgentQnA/docker_compose/intel/cpu/xeon/compose_openai.yaml b/AgentQnA/docker_compose/intel/cpu/xeon/compose_openai.yaml index 09bde26bde..bbd64ceb30 100644 --- a/AgentQnA/docker_compose/intel/cpu/xeon/compose_openai.yaml +++ b/AgentQnA/docker_compose/intel/cpu/xeon/compose_openai.yaml @@ -13,6 +13,7 @@ services: environment: ip_address: ${ip_address} strategy: rag_agent + with_memory: false recursion_limit: ${recursion_limit_worker} llm_engine: openai OPENAI_API_KEY: ${OPENAI_API_KEY} @@ -35,17 +36,17 @@ services: image: opea/agent:latest container_name: sql-agent-endpoint volumes: - - ${WORKDIR}/TAG-Bench/:/home/user/TAG-Bench # SQL database + - ${WORKDIR}/GenAIExamples/AgentQnA/tests:/home/user/chinook-db # SQL database ports: - "9096:9096" ipc: host environment: ip_address: ${ip_address} strategy: sql_agent + with_memory: false db_name: ${db_name} db_path: ${db_path} use_hints: false - hints_file: /home/user/TAG-Bench/${db_name}_hints.csv recursion_limit: ${recursion_limit_worker} llm_engine: openai OPENAI_API_KEY: ${OPENAI_API_KEY} @@ -64,6 +65,7 @@ services: container_name: react-agent-endpoint depends_on: - worker-rag-agent + - worker-sql-agent volumes: - ${TOOLSET_PATH}:/home/user/tools/ ports: @@ -71,14 +73,15 @@ services: ipc: host environment: ip_address: ${ip_address} - strategy: react_langgraph + strategy: react_llama + with_memory: true recursion_limit: ${recursion_limit_supervisor} llm_engine: openai OPENAI_API_KEY: ${OPENAI_API_KEY} model: ${model} temperature: ${temperature} max_new_tokens: ${max_new_tokens} - stream: false + stream: true tools: /home/user/tools/supervisor_agent_tools.yaml require_human_feedback: false no_proxy: ${no_proxy} diff --git a/AgentQnA/docker_compose/intel/cpu/xeon/launch_agent_service_openai.sh b/AgentQnA/docker_compose/intel/cpu/xeon/launch_agent_service_openai.sh index 7b4e86a781..2455865f27 100644 --- a/AgentQnA/docker_compose/intel/cpu/xeon/launch_agent_service_openai.sh +++ b/AgentQnA/docker_compose/intel/cpu/xeon/launch_agent_service_openai.sh @@ -16,7 +16,7 @@ export WORKER_AGENT_URL="http://${ip_address}:9095/v1/chat/completions" export SQL_AGENT_URL="http://${ip_address}:9096/v1/chat/completions" export RETRIEVAL_TOOL_URL="http://${ip_address}:8889/v1/retrievaltool" export CRAG_SERVER=http://${ip_address}:8080 -export db_name=california_schools -export db_path="sqlite:////home/user/TAG-Bench/dev_folder/dev_databases/${db_name}/${db_name}.sqlite" +export db_name=Chinook +export db_path="sqlite:////home/user/chinook-db/Chinook_Sqlite.sqlite" docker compose -f compose_openai.yaml up -d diff --git a/AgentQnA/docker_compose/intel/hpu/gaudi/compose.yaml b/AgentQnA/docker_compose/intel/hpu/gaudi/compose.yaml index 4895722c93..c14d58c10b 100644 --- a/AgentQnA/docker_compose/intel/hpu/gaudi/compose.yaml +++ b/AgentQnA/docker_compose/intel/hpu/gaudi/compose.yaml @@ -13,6 +13,7 @@ services: environment: ip_address: ${ip_address} strategy: rag_agent_llama + with_memory: false recursion_limit: ${recursion_limit_worker} llm_engine: vllm HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} @@ -43,6 +44,7 @@ services: environment: ip_address: ${ip_address} strategy: sql_agent_llama + with_memory: false db_name: ${db_name} db_path: ${db_path} use_hints: false @@ -74,6 +76,7 @@ services: environment: ip_address: ${ip_address} strategy: react_llama + with_memory: true recursion_limit: ${recursion_limit_supervisor} llm_engine: vllm HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} @@ -81,7 +84,7 @@ services: model: ${LLM_MODEL_ID} temperature: ${temperature} max_new_tokens: ${max_new_tokens} - stream: false + stream: true tools: /home/user/tools/supervisor_agent_tools.yaml require_human_feedback: false no_proxy: ${no_proxy} diff --git a/AgentQnA/docker_compose/intel/hpu/gaudi/launch_agent_service_gaudi.sh b/AgentQnA/docker_compose/intel/hpu/gaudi/launch_agent_service_gaudi.sh index fff5d53f8d..298feee3fd 100644 --- a/AgentQnA/docker_compose/intel/hpu/gaudi/launch_agent_service_gaudi.sh +++ b/AgentQnA/docker_compose/intel/hpu/gaudi/launch_agent_service_gaudi.sh @@ -14,7 +14,7 @@ export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} export HF_CACHE_DIR=${HF_CACHE_DIR} ls $HF_CACHE_DIR export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} -export LLM_MODEL_ID="meta-llama/Meta-Llama-3.1-70B-Instruct" +export LLM_MODEL_ID="meta-llama/Llama-3.3-70B-Instruct" #"meta-llama/Meta-Llama-3.1-70B-Instruct" export NUM_SHARDS=4 export LLM_ENDPOINT_URL="http://${ip_address}:8086" export temperature=0 diff --git a/AgentQnA/tests/step4_launch_and_validate_agent_tgi.sh b/AgentQnA/tests/step4_launch_and_validate_agent_gaudi.sh similarity index 84% rename from AgentQnA/tests/step4_launch_and_validate_agent_tgi.sh rename to AgentQnA/tests/step4_launch_and_validate_agent_gaudi.sh index 824f7aa855..7c3800ba78 100644 --- a/AgentQnA/tests/step4_launch_and_validate_agent_tgi.sh +++ b/AgentQnA/tests/step4_launch_and_validate_agent_gaudi.sh @@ -11,15 +11,15 @@ export ip_address=$(hostname -I | awk '{print $1}') export TOOLSET_PATH=$WORKPATH/tools/ export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} HF_TOKEN=${HUGGINGFACEHUB_API_TOKEN} -model="meta-llama/Meta-Llama-3.1-70B-Instruct" +model="meta-llama/Llama-3.3-70B-Instruct" #"meta-llama/Meta-Llama-3.1-70B-Instruct" -export HF_CACHE_DIR=/data2/huggingface -if [ ! -d "$HF_CACHE_DIR" ]; then - HF_CACHE_DIR=$WORKDIR/hf_cache - mkdir -p "$HF_CACHE_DIR" -fi -echo "HF_CACHE_DIR=$HF_CACHE_DIR" -ls $HF_CACHE_DIR +# export HF_CACHE_DIR=/data2/huggingface +# if [ ! -d "$HF_CACHE_DIR" ]; then +# HF_CACHE_DIR=$WORKDIR/hf_cache +# mkdir -p "$HF_CACHE_DIR" +# fi +# echo "HF_CACHE_DIR=$HF_CACHE_DIR" +ls $HF_CACHE_DIR/hub vllm_port=8086 vllm_volume=${HF_CACHE_DIR} @@ -60,23 +60,6 @@ function start_vllm_service_70B() { echo "Service started successfully" } - -function prepare_data() { - cd $WORKDIR - - echo "Downloading data..." - git clone https://github.com/TAG-Research/TAG-Bench.git - cd TAG-Bench/setup - chmod +x get_dbs.sh - ./get_dbs.sh - - echo "Split data..." - cd $WORKPATH/tests/sql_agent_test - bash run_data_split.sh - - echo "Data preparation done!" -} - function download_chinook_data(){ echo "Downloading chinook data..." cd $WORKDIR @@ -113,7 +96,7 @@ function validate_agent_service() { echo "======================Testing worker rag agent======================" export agent_port="9095" prompt="Tell me about Michael Jackson song Thriller" - local CONTENT=$(python3 $WORKDIR/GenAIExamples/AgentQnA/tests/test.py --prompt "$prompt") + local CONTENT=$(python3 $WORKDIR/GenAIExamples/AgentQnA/tests/test.py --prompt "$prompt" --agent_role "worker" --ext_port $agent_port) # echo $CONTENT local EXIT_CODE=$(validate "$CONTENT" "Thriller" "rag-agent-endpoint") echo $EXIT_CODE @@ -127,7 +110,7 @@ function validate_agent_service() { echo "======================Testing worker sql agent======================" export agent_port="9096" prompt="How many employees are there in the company?" - local CONTENT=$(python3 $WORKDIR/GenAIExamples/AgentQnA/tests/test.py --prompt "$prompt") + local CONTENT=$(python3 $WORKDIR/GenAIExamples/AgentQnA/tests/test.py --prompt "$prompt" --agent_role "worker" --ext_port $agent_port) local EXIT_CODE=$(validate "$CONTENT" "8" "sql-agent-endpoint") echo $CONTENT # echo $EXIT_CODE @@ -140,9 +123,8 @@ function validate_agent_service() { # test supervisor react agent echo "======================Testing supervisor react agent======================" export agent_port="9090" - prompt="How many albums does Iron Maiden have?" - local CONTENT=$(python3 $WORKDIR/GenAIExamples/AgentQnA/tests/test.py --prompt "$prompt") - local EXIT_CODE=$(validate "$CONTENT" "21" "react-agent-endpoint") + local CONTENT=$(python3 $WORKDIR/GenAIExamples/AgentQnA/tests/test.py --agent_role "supervisor" --ext_port $agent_port --stream) + local EXIT_CODE=$(validate "$CONTENT" "Iron" "react-agent-endpoint") # echo $CONTENT echo $EXIT_CODE local EXIT_CODE="${EXIT_CODE:0-1}" @@ -153,15 +135,6 @@ function validate_agent_service() { } -function remove_data() { - echo "Removing data..." - cd $WORKDIR - if [ -d "TAG-Bench" ]; then - rm -rf TAG-Bench - fi - echo "Data removed!" -} - function remove_chinook_data(){ echo "Removing chinook data..." cd $WORKDIR @@ -189,8 +162,9 @@ function main() { echo "==================== Agent service validated ====================" } -remove_data + remove_chinook_data + main -remove_data + remove_chinook_data diff --git a/AgentQnA/tests/test.py b/AgentQnA/tests/test.py index 400684ffd6..046fcd9209 100644 --- a/AgentQnA/tests/test.py +++ b/AgentQnA/tests/test.py @@ -1,34 +1,20 @@ -# Copyright (C) 2024 Intel Corporation +# Copyright (C) 2025 Intel Corporation # SPDX-License-Identifier: Apache-2.0 import argparse -import os +import json +import uuid import requests -def generate_answer_agent_api(url, prompt): - proxies = {"http": ""} - payload = { - "messages": prompt, - } - response = requests.post(url, json=payload, proxies=proxies) - answer = response.json()["text"] - return answer - - def process_request(url, query, is_stream=False): proxies = {"http": ""} - - payload = { - "messages": query, - } - + content = json.dumps(query) if query is not None else None try: - resp = requests.post(url=url, json=payload, proxies=proxies, stream=is_stream) + resp = requests.post(url=url, data=content, proxies=proxies, stream=is_stream) if not is_stream: ret = resp.json()["text"] - print(ret) else: for line in resp.iter_lines(decode_unicode=True): print(line) @@ -38,19 +24,54 @@ def process_request(url, query, is_stream=False): return ret except requests.exceptions.RequestException as e: ret = f"An error occurred:{e}" - print(ret) - return False + return None + +def test_worker_agent(args): + url = f"http://{args.ip_addr}:{args.ext_port}/v1/chat/completions" + query = {"role": "user", "messages": args.prompt, "stream": "false"} + ret = process_request(url, query) + print("Response: ", ret) + + +def add_message_and_run(url, user_message, thread_id, stream=False): + print("User message: ", user_message) + query = {"role": "user", "messages": user_message, "thread_id": thread_id, "stream": stream} + ret = process_request(url, query, is_stream=stream) + print("Response: ", ret) + + +def test_chat_completion_multi_turn(args): + url = f"http://{args.ip_addr}:{args.ext_port}/v1/chat/completions" + thread_id = f"{uuid.uuid4()}" + + # first turn + print("===============First turn==================") + user_message = "Which artist has the most albums in the database?" + add_message_and_run(url, user_message, thread_id, stream=args.stream) + print("===============End of first turn==================") + + # second turn + print("===============Second turn==================") + user_message = "Give me a few examples of the artist's albumns?" + add_message_and_run(url, user_message, thread_id, stream=args.stream) + print("===============End of second turn==================") + if __name__ == "__main__": parser = argparse.ArgumentParser() - parser.add_argument("--prompt", type=str) - parser.add_argument("--stream", action="store_true") - args = parser.parse_args() + parser.add_argument("--ip_addr", type=str, default="127.0.0.1", help="endpoint ip address") + parser.add_argument("--ext_port", type=str, default="9090", help="endpoint port") + parser.add_argument("--stream", action="store_true", help="streaming mode") + parser.add_argument("--prompt", type=str, help="prompt message") + parser.add_argument("--agent_role", type=str, default="supervisor", help="supervisor or worker") + args, _ = parser.parse_known_args() - ip_address = os.getenv("ip_address", "localhost") - agent_port = os.getenv("agent_port", "9090") - url = f"http://{ip_address}:{agent_port}/v1/chat/completions" - prompt = args.prompt + print(args) - process_request(url, prompt, args.stream) + if args.agent_role == "supervisor": + test_chat_completion_multi_turn(args) + elif args.agent_role == "worker": + test_worker_agent(args) + else: + raise ValueError("Invalid agent role") \ No newline at end of file diff --git a/AgentQnA/tests/test_compose_on_gaudi.sh b/AgentQnA/tests/test_compose_on_gaudi.sh index de70514ba6..ab0ce295cb 100644 --- a/AgentQnA/tests/test_compose_on_gaudi.sh +++ b/AgentQnA/tests/test_compose_on_gaudi.sh @@ -78,7 +78,7 @@ bash step3_ingest_data_and_validate_retrieval.sh echo "=================== #3 Data ingestion and validation completed====================" echo "=================== #4 Start agent and API server====================" -bash step4_launch_and_validate_agent_tgi.sh +bash step4_launch_and_validate_agent_gaudi.sh echo "=================== #4 Agent test passed ====================" echo "=================== #5 Stop agent and API server====================" From d219e028daa4363f7ade4078e49e7e90bb35b9df Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 18 Feb 2025 22:29:57 +0000 Subject: [PATCH 3/4] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- AgentQnA/README.md | 1 + AgentQnA/tests/test.py | 6 +++--- DocIndexRetriever/retrieval_tool.py | 2 +- DocIndexRetriever/tests/test.py | 8 ++------ 4 files changed, 7 insertions(+), 10 deletions(-) diff --git a/AgentQnA/README.md b/AgentQnA/README.md index 8e77f2f1a6..397bd0c775 100644 --- a/AgentQnA/README.md +++ b/AgentQnA/README.md @@ -247,6 +247,7 @@ docker logs react-agent-endpoint You should see something like "HTTP server setup successful" if the docker containers are started successfully.

2. You can use python to validate the agent system + ```bash # RAG worker agent python tests/test.py --prompt "Tell me about Michael Jackson song Thriller" --agent_role "worker" --ext_port 9095 diff --git a/AgentQnA/tests/test.py b/AgentQnA/tests/test.py index 046fcd9209..18254f16c5 100644 --- a/AgentQnA/tests/test.py +++ b/AgentQnA/tests/test.py @@ -26,6 +26,7 @@ def process_request(url, query, is_stream=False): ret = f"An error occurred:{e}" return None + def test_worker_agent(args): url = f"http://{args.ip_addr}:{args.ext_port}/v1/chat/completions" query = {"role": "user", "messages": args.prompt, "stream": "false"} @@ -52,12 +53,11 @@ def test_chat_completion_multi_turn(args): # second turn print("===============Second turn==================") - user_message = "Give me a few examples of the artist's albumns?" + user_message = "Give me a few examples of the artist's albums?" add_message_and_run(url, user_message, thread_id, stream=args.stream) print("===============End of second turn==================") - if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--ip_addr", type=str, default="127.0.0.1", help="endpoint ip address") @@ -74,4 +74,4 @@ def test_chat_completion_multi_turn(args): elif args.agent_role == "worker": test_worker_agent(args) else: - raise ValueError("Invalid agent role") \ No newline at end of file + raise ValueError("Invalid agent role") diff --git a/DocIndexRetriever/retrieval_tool.py b/DocIndexRetriever/retrieval_tool.py index 26a7759251..99fab7b1b5 100644 --- a/DocIndexRetriever/retrieval_tool.py +++ b/DocIndexRetriever/retrieval_tool.py @@ -42,7 +42,7 @@ def align_outputs(self, data, cur_node, inputs, runtime_graph, llm_parameters_di next_data = { "input": inputs["input"], "messages": inputs["input"], - "embedding": data, #[item["embedding"] for item in data["data"]], + "embedding": data, # [item["embedding"] for item in data["data"]], "k": kwargs["k"] if "k" in kwargs else 4, "search_type": kwargs["search_type"] if "search_type" in kwargs else "similarity", "distance_threshold": kwargs["distance_threshold"] if "distance_threshold" in kwargs else None, diff --git a/DocIndexRetriever/tests/test.py b/DocIndexRetriever/tests/test.py index e655073ddb..ba74827fa6 100644 --- a/DocIndexRetriever/tests/test.py +++ b/DocIndexRetriever/tests/test.py @@ -11,11 +11,7 @@ def search_knowledge_base(query: str) -> str: url = os.environ.get("RETRIEVAL_TOOL_URL") print(url) proxies = {"http": ""} - payload = { - "messages": query, - "k":5, - "top_n": 2 - } + payload = {"messages": query, "k": 5, "top_n": 2} response = requests.post(url, json=payload, proxies=proxies) print(response) if "documents" in response.json(): @@ -39,4 +35,4 @@ def search_knowledge_base(query: str) -> str: if __name__ == "__main__": resp = search_knowledge_base("What is OPEA?") # resp = search_knowledge_base("Thriller") - print(resp) \ No newline at end of file + print(resp) From 7d10a4d8a8dbb94c9aff3cf3890dd9aee4e0a8a3 Mon Sep 17 00:00:00 2001 From: minmin-intel Date: Tue, 18 Feb 2025 23:05:14 +0000 Subject: [PATCH 4/4] fix hf cache dir Signed-off-by: minmin-intel --- .../tests/step4_launch_and_validate_agent_gaudi.sh | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/AgentQnA/tests/step4_launch_and_validate_agent_gaudi.sh b/AgentQnA/tests/step4_launch_and_validate_agent_gaudi.sh index 7c3800ba78..56f017239b 100644 --- a/AgentQnA/tests/step4_launch_and_validate_agent_gaudi.sh +++ b/AgentQnA/tests/step4_launch_and_validate_agent_gaudi.sh @@ -13,13 +13,13 @@ export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} HF_TOKEN=${HUGGINGFACEHUB_API_TOKEN} model="meta-llama/Llama-3.3-70B-Instruct" #"meta-llama/Meta-Llama-3.1-70B-Instruct" -# export HF_CACHE_DIR=/data2/huggingface -# if [ ! -d "$HF_CACHE_DIR" ]; then -# HF_CACHE_DIR=$WORKDIR/hf_cache -# mkdir -p "$HF_CACHE_DIR" -# fi -# echo "HF_CACHE_DIR=$HF_CACHE_DIR" -ls $HF_CACHE_DIR/hub +export HF_CACHE_DIR=/data2/huggingface +if [ ! -d "$HF_CACHE_DIR" ]; then + HF_CACHE_DIR=$WORKDIR/hf_cache + mkdir -p "$HF_CACHE_DIR" +fi +echo "HF_CACHE_DIR=$HF_CACHE_DIR" +ls $HF_CACHE_DIR vllm_port=8086 vllm_volume=${HF_CACHE_DIR}