From 0e9830087a8ebf93b7cc06cfaa677b1b12225819 Mon Sep 17 00:00:00 2001
From: minmin-intel
Date: Tue, 18 Feb 2025 19:46:23 +0000
Subject: [PATCH 1/4] allow passing k and top_n to retrieval megaservice
Signed-off-by: minmin-intel
---
.../intel/cpu/xeon/compose.yaml | 6 ++
DocIndexRetriever/retrieval_tool.py | 56 ++++++++++++-------
DocIndexRetriever/tests/test.py | 42 ++++++++++++++
3 files changed, 85 insertions(+), 19 deletions(-)
create mode 100644 DocIndexRetriever/tests/test.py
diff --git a/DocIndexRetriever/docker_compose/intel/cpu/xeon/compose.yaml b/DocIndexRetriever/docker_compose/intel/cpu/xeon/compose.yaml
index d4bfe0446f..9624df7300 100644
--- a/DocIndexRetriever/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/DocIndexRetriever/docker_compose/intel/cpu/xeon/compose.yaml
@@ -13,6 +13,8 @@ services:
dataprep-redis-service:
image: ${REGISTRY:-opea}/dataprep:${TAG:-latest}
container_name: dataprep-redis-server
+ # volumes:
+ # - $WORKDIR/GenAIExamples/DocIndexRetriever/docker_image_build/GenAIComps/comps:/home/user/comps
depends_on:
- redis-vector-db
ports:
@@ -52,6 +54,8 @@ services:
embedding:
image: ${REGISTRY:-opea}/embedding:${TAG:-latest}
container_name: embedding-server
+ # volumes:
+ # - $WORKDIR/GenAIExamples/DocIndexRetriever/docker_image_build/GenAIComps/comps:/home/comps
ports:
- "6000:6000"
ipc: host
@@ -110,6 +114,8 @@ services:
reranking:
image: ${REGISTRY:-opea}/reranking:${TAG:-latest}
container_name: reranking-tei-xeon-server
+ # volumes:
+ # - $WORKDIR/GenAIExamples/DocIndexRetriever/docker_image_build/GenAIComps/comps:/home/user/comps
depends_on:
tei-reranking-service:
condition: service_healthy
diff --git a/DocIndexRetriever/retrieval_tool.py b/DocIndexRetriever/retrieval_tool.py
index b627f45537..26a7759251 100644
--- a/DocIndexRetriever/retrieval_tool.py
+++ b/DocIndexRetriever/retrieval_tool.py
@@ -22,16 +22,38 @@
def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **kwargs):
- if self.services[cur_node].service_type == ServiceType.EMBEDDING:
- inputs["input"] = inputs["text"]
- del inputs["text"]
+ print(f"Inputs to {cur_node}: {inputs}")
+ for key, value in kwargs.items():
+ print(f"{key}: {value}")
return inputs
def align_outputs(self, data, cur_node, inputs, runtime_graph, llm_parameters_dict, **kwargs):
next_data = {}
if self.services[cur_node].service_type == ServiceType.EMBEDDING:
- next_data = {"text": inputs["input"], "embedding": [item["embedding"] for item in data["data"]]}
+ # turn into chat completion request
+ # next_data = {"text": inputs["input"], "embedding": [item["embedding"] for item in data["data"]]}
+ print("Assembing output from Embedding for next node...")
+ print("Inputs to Embedding: ", inputs)
+ print("Keyword arguments: ")
+ for key, value in kwargs.items():
+ print(f"{key}: {value}")
+
+ next_data = {
+ "input": inputs["input"],
+ "messages": inputs["input"],
+ "embedding": data, #[item["embedding"] for item in data["data"]],
+ "k": kwargs["k"] if "k" in kwargs else 4,
+ "search_type": kwargs["search_type"] if "search_type" in kwargs else "similarity",
+ "distance_threshold": kwargs["distance_threshold"] if "distance_threshold" in kwargs else None,
+ "fetch_k": kwargs["fetch_k"] if "fetch_k" in kwargs else 20,
+ "lambda_mult": kwargs["lambda_mult"] if "lambda_mult" in kwargs else 0.5,
+ "score_threshold": kwargs["score_threshold"] if "score_threshold" in kwargs else 0.2,
+ "top_n": kwargs["top_n"] if "top_n" in kwargs else 1,
+ }
+
+ print("Output from Embedding for next node:\n", next_data)
+
else:
next_data = data
@@ -99,18 +121,6 @@ def parser_input(data, TypeClass, key):
raise ValueError(f"Unknown request type: {data}")
if isinstance(chat_request, ChatCompletionRequest):
- retriever_parameters = RetrieverParms(
- search_type=chat_request.search_type if chat_request.search_type else "similarity",
- k=chat_request.k if chat_request.k else 4,
- distance_threshold=chat_request.distance_threshold if chat_request.distance_threshold else None,
- fetch_k=chat_request.fetch_k if chat_request.fetch_k else 20,
- lambda_mult=chat_request.lambda_mult if chat_request.lambda_mult else 0.5,
- score_threshold=chat_request.score_threshold if chat_request.score_threshold else 0.2,
- )
- reranker_parameters = RerankerParms(
- top_n=chat_request.top_n if chat_request.top_n else 1,
- )
-
initial_inputs = {
"messages": query,
"input": query, # has to be input due to embedding expects either input or text
@@ -123,13 +133,21 @@ def parser_input(data, TypeClass, key):
"top_n": chat_request.top_n if chat_request.top_n else 1,
}
+ kwargs = {
+ "search_type": chat_request.search_type if chat_request.search_type else "similarity",
+ "k": chat_request.k if chat_request.k else 4,
+ "distance_threshold": chat_request.distance_threshold if chat_request.distance_threshold else None,
+ "fetch_k": chat_request.fetch_k if chat_request.fetch_k else 20,
+ "lambda_mult": chat_request.lambda_mult if chat_request.lambda_mult else 0.5,
+ "score_threshold": chat_request.score_threshold if chat_request.score_threshold else 0.2,
+ "top_n": chat_request.top_n if chat_request.top_n else 1,
+ }
result_dict, runtime_graph = await self.megaservice.schedule(
initial_inputs=initial_inputs,
- retriever_parameters=retriever_parameters,
- reranker_parameters=reranker_parameters,
+ **kwargs,
)
else:
- result_dict, runtime_graph = await self.megaservice.schedule(initial_inputs={"text": query})
+ result_dict, runtime_graph = await self.megaservice.schedule(initial_inputs={"input": query})
last_node = runtime_graph.all_leaves()[-1]
response = result_dict[last_node]
diff --git a/DocIndexRetriever/tests/test.py b/DocIndexRetriever/tests/test.py
new file mode 100644
index 0000000000..e655073ddb
--- /dev/null
+++ b/DocIndexRetriever/tests/test.py
@@ -0,0 +1,42 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import os
+
+import requests
+
+
+def search_knowledge_base(query: str) -> str:
+ """Search the knowledge base for a specific query."""
+ url = os.environ.get("RETRIEVAL_TOOL_URL")
+ print(url)
+ proxies = {"http": ""}
+ payload = {
+ "messages": query,
+ "k":5,
+ "top_n": 2
+ }
+ response = requests.post(url, json=payload, proxies=proxies)
+ print(response)
+ if "documents" in response.json():
+ docs = response.json()["documents"]
+ context = ""
+ for i, doc in enumerate(docs):
+ context += f"Doc[{i+1}]:\n{doc}\n"
+ return context
+ elif "text" in response.json():
+ return response.json()["text"]
+ elif "reranked_docs" in response.json():
+ docs = response.json()["reranked_docs"]
+ context = ""
+ for i, doc in enumerate(docs):
+ context += f"Doc[{i+1}]:\n{doc}\n"
+ return context
+ else:
+ return "Error parsing response from the knowledge base."
+
+
+if __name__ == "__main__":
+ resp = search_knowledge_base("What is OPEA?")
+ # resp = search_knowledge_base("Thriller")
+ print(resp)
\ No newline at end of file
From 2e0ee2cf5f570cb0b6358c7582557997f4c43c98 Mon Sep 17 00:00:00 2001
From: minmin-intel
Date: Tue, 18 Feb 2025 22:28:15 +0000
Subject: [PATCH 2/4] update agent example
Signed-off-by: minmin-intel
---
AgentQnA/README.md | 44 ++++-------
.../intel/cpu/xeon/compose_openai.yaml | 11 ++-
.../cpu/xeon/launch_agent_service_openai.sh | 4 +-
.../intel/hpu/gaudi/compose.yaml | 5 +-
.../hpu/gaudi/launch_agent_service_gaudi.sh | 2 +-
... step4_launch_and_validate_agent_gaudi.sh} | 56 ++++---------
AgentQnA/tests/test.py | 79 ++++++++++++-------
AgentQnA/tests/test_compose_on_gaudi.sh | 2 +-
8 files changed, 96 insertions(+), 107 deletions(-)
rename AgentQnA/tests/{step4_launch_and_validate_agent_tgi.sh => step4_launch_and_validate_agent_gaudi.sh} (84%)
diff --git a/AgentQnA/README.md b/AgentQnA/README.md
index d45b14ef55..8e77f2f1a6 100644
--- a/AgentQnA/README.md
+++ b/AgentQnA/README.md
@@ -84,7 +84,7 @@ flowchart LR
3. Hierarchical multi-agents can improve performance.
Expert worker agents, such as RAG agent and SQL agent, can provide high-quality output for different aspects of a complex query, and the supervisor agent can aggregate the information together to provide a comprehensive answer. If we only use one agent and provide all the tools to this single agent, it may get overwhelmed and not able to provide accurate answers.
-## Deployment with docker
+## Deploy with docker
1. Build agent docker image [Optional]
@@ -217,13 +217,19 @@ docker build -t opea/agent:latest --build-arg https_proxy=$https_proxy --build-a
:::
::::
+## Deploy AgentQnA UI
+
+The AgentQnA UI can be deployed locally or using Docker.
+
+For detailed instructions on deploying AgentQnA UI, refer to the [AgentQnA UI Guide](./ui/svelte/README.md).
+
## Deploy using Helm Chart
Refer to the [AgentQnA helm chart](./kubernetes/helm/README.md) for instructions on deploying AgentQnA on Kubernetes.
## Validate services
-First look at logs of the agent docker containers:
+1. First look at logs of the agent docker containers:
```
# worker RAG agent
@@ -240,35 +246,17 @@ docker logs react-agent-endpoint
You should see something like "HTTP server setup successful" if the docker containers are started successfully.
-Second, validate worker RAG agent:
-
-```
-curl http://${host_ip}:9095/v1/chat/completions -X POST -H "Content-Type: application/json" -d '{
- "messages": "Michael Jackson song Thriller"
- }'
-```
-
-Third, validate worker SQL agent:
+2. You can use python to validate the agent system
+```bash
+# RAG worker agent
+python tests/test.py --prompt "Tell me about Michael Jackson song Thriller" --agent_role "worker" --ext_port 9095
-```
-curl http://${host_ip}:9096/v1/chat/completions -X POST -H "Content-Type: application/json" -d '{
- "messages": "How many employees are in the company"
- }'
-```
-
-Finally, validate supervisor agent:
+# SQL agent
+python tests/test.py --prompt "How many employees in company" --agent_role "worker" --ext_port 9096
+# supervisor agent: this will test a two-turn conversation
+python tests/test.py --agent_role "supervisor" --ext_port 9090
```
-curl http://${host_ip}:9090/v1/chat/completions -X POST -H "Content-Type: application/json" -d '{
- "messages": "How many albums does Iron Maiden have?"
- }'
-```
-
-## Deploy AgentQnA UI
-
-The AgentQnA UI can be deployed locally or using Docker.
-
-For detailed instructions on deploying AgentQnA UI, refer to the [AgentQnA UI Guide](./ui/svelte/README.md).
## How to register your own tools with agent
diff --git a/AgentQnA/docker_compose/intel/cpu/xeon/compose_openai.yaml b/AgentQnA/docker_compose/intel/cpu/xeon/compose_openai.yaml
index 09bde26bde..bbd64ceb30 100644
--- a/AgentQnA/docker_compose/intel/cpu/xeon/compose_openai.yaml
+++ b/AgentQnA/docker_compose/intel/cpu/xeon/compose_openai.yaml
@@ -13,6 +13,7 @@ services:
environment:
ip_address: ${ip_address}
strategy: rag_agent
+ with_memory: false
recursion_limit: ${recursion_limit_worker}
llm_engine: openai
OPENAI_API_KEY: ${OPENAI_API_KEY}
@@ -35,17 +36,17 @@ services:
image: opea/agent:latest
container_name: sql-agent-endpoint
volumes:
- - ${WORKDIR}/TAG-Bench/:/home/user/TAG-Bench # SQL database
+ - ${WORKDIR}/GenAIExamples/AgentQnA/tests:/home/user/chinook-db # SQL database
ports:
- "9096:9096"
ipc: host
environment:
ip_address: ${ip_address}
strategy: sql_agent
+ with_memory: false
db_name: ${db_name}
db_path: ${db_path}
use_hints: false
- hints_file: /home/user/TAG-Bench/${db_name}_hints.csv
recursion_limit: ${recursion_limit_worker}
llm_engine: openai
OPENAI_API_KEY: ${OPENAI_API_KEY}
@@ -64,6 +65,7 @@ services:
container_name: react-agent-endpoint
depends_on:
- worker-rag-agent
+ - worker-sql-agent
volumes:
- ${TOOLSET_PATH}:/home/user/tools/
ports:
@@ -71,14 +73,15 @@ services:
ipc: host
environment:
ip_address: ${ip_address}
- strategy: react_langgraph
+ strategy: react_llama
+ with_memory: true
recursion_limit: ${recursion_limit_supervisor}
llm_engine: openai
OPENAI_API_KEY: ${OPENAI_API_KEY}
model: ${model}
temperature: ${temperature}
max_new_tokens: ${max_new_tokens}
- stream: false
+ stream: true
tools: /home/user/tools/supervisor_agent_tools.yaml
require_human_feedback: false
no_proxy: ${no_proxy}
diff --git a/AgentQnA/docker_compose/intel/cpu/xeon/launch_agent_service_openai.sh b/AgentQnA/docker_compose/intel/cpu/xeon/launch_agent_service_openai.sh
index 7b4e86a781..2455865f27 100644
--- a/AgentQnA/docker_compose/intel/cpu/xeon/launch_agent_service_openai.sh
+++ b/AgentQnA/docker_compose/intel/cpu/xeon/launch_agent_service_openai.sh
@@ -16,7 +16,7 @@ export WORKER_AGENT_URL="http://${ip_address}:9095/v1/chat/completions"
export SQL_AGENT_URL="http://${ip_address}:9096/v1/chat/completions"
export RETRIEVAL_TOOL_URL="http://${ip_address}:8889/v1/retrievaltool"
export CRAG_SERVER=http://${ip_address}:8080
-export db_name=california_schools
-export db_path="sqlite:////home/user/TAG-Bench/dev_folder/dev_databases/${db_name}/${db_name}.sqlite"
+export db_name=Chinook
+export db_path="sqlite:////home/user/chinook-db/Chinook_Sqlite.sqlite"
docker compose -f compose_openai.yaml up -d
diff --git a/AgentQnA/docker_compose/intel/hpu/gaudi/compose.yaml b/AgentQnA/docker_compose/intel/hpu/gaudi/compose.yaml
index 4895722c93..c14d58c10b 100644
--- a/AgentQnA/docker_compose/intel/hpu/gaudi/compose.yaml
+++ b/AgentQnA/docker_compose/intel/hpu/gaudi/compose.yaml
@@ -13,6 +13,7 @@ services:
environment:
ip_address: ${ip_address}
strategy: rag_agent_llama
+ with_memory: false
recursion_limit: ${recursion_limit_worker}
llm_engine: vllm
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
@@ -43,6 +44,7 @@ services:
environment:
ip_address: ${ip_address}
strategy: sql_agent_llama
+ with_memory: false
db_name: ${db_name}
db_path: ${db_path}
use_hints: false
@@ -74,6 +76,7 @@ services:
environment:
ip_address: ${ip_address}
strategy: react_llama
+ with_memory: true
recursion_limit: ${recursion_limit_supervisor}
llm_engine: vllm
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
@@ -81,7 +84,7 @@ services:
model: ${LLM_MODEL_ID}
temperature: ${temperature}
max_new_tokens: ${max_new_tokens}
- stream: false
+ stream: true
tools: /home/user/tools/supervisor_agent_tools.yaml
require_human_feedback: false
no_proxy: ${no_proxy}
diff --git a/AgentQnA/docker_compose/intel/hpu/gaudi/launch_agent_service_gaudi.sh b/AgentQnA/docker_compose/intel/hpu/gaudi/launch_agent_service_gaudi.sh
index fff5d53f8d..298feee3fd 100644
--- a/AgentQnA/docker_compose/intel/hpu/gaudi/launch_agent_service_gaudi.sh
+++ b/AgentQnA/docker_compose/intel/hpu/gaudi/launch_agent_service_gaudi.sh
@@ -14,7 +14,7 @@ export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
export HF_CACHE_DIR=${HF_CACHE_DIR}
ls $HF_CACHE_DIR
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
-export LLM_MODEL_ID="meta-llama/Meta-Llama-3.1-70B-Instruct"
+export LLM_MODEL_ID="meta-llama/Llama-3.3-70B-Instruct" #"meta-llama/Meta-Llama-3.1-70B-Instruct"
export NUM_SHARDS=4
export LLM_ENDPOINT_URL="http://${ip_address}:8086"
export temperature=0
diff --git a/AgentQnA/tests/step4_launch_and_validate_agent_tgi.sh b/AgentQnA/tests/step4_launch_and_validate_agent_gaudi.sh
similarity index 84%
rename from AgentQnA/tests/step4_launch_and_validate_agent_tgi.sh
rename to AgentQnA/tests/step4_launch_and_validate_agent_gaudi.sh
index 824f7aa855..7c3800ba78 100644
--- a/AgentQnA/tests/step4_launch_and_validate_agent_tgi.sh
+++ b/AgentQnA/tests/step4_launch_and_validate_agent_gaudi.sh
@@ -11,15 +11,15 @@ export ip_address=$(hostname -I | awk '{print $1}')
export TOOLSET_PATH=$WORKPATH/tools/
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
HF_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
-model="meta-llama/Meta-Llama-3.1-70B-Instruct"
+model="meta-llama/Llama-3.3-70B-Instruct" #"meta-llama/Meta-Llama-3.1-70B-Instruct"
-export HF_CACHE_DIR=/data2/huggingface
-if [ ! -d "$HF_CACHE_DIR" ]; then
- HF_CACHE_DIR=$WORKDIR/hf_cache
- mkdir -p "$HF_CACHE_DIR"
-fi
-echo "HF_CACHE_DIR=$HF_CACHE_DIR"
-ls $HF_CACHE_DIR
+# export HF_CACHE_DIR=/data2/huggingface
+# if [ ! -d "$HF_CACHE_DIR" ]; then
+# HF_CACHE_DIR=$WORKDIR/hf_cache
+# mkdir -p "$HF_CACHE_DIR"
+# fi
+# echo "HF_CACHE_DIR=$HF_CACHE_DIR"
+ls $HF_CACHE_DIR/hub
vllm_port=8086
vllm_volume=${HF_CACHE_DIR}
@@ -60,23 +60,6 @@ function start_vllm_service_70B() {
echo "Service started successfully"
}
-
-function prepare_data() {
- cd $WORKDIR
-
- echo "Downloading data..."
- git clone https://github.com/TAG-Research/TAG-Bench.git
- cd TAG-Bench/setup
- chmod +x get_dbs.sh
- ./get_dbs.sh
-
- echo "Split data..."
- cd $WORKPATH/tests/sql_agent_test
- bash run_data_split.sh
-
- echo "Data preparation done!"
-}
-
function download_chinook_data(){
echo "Downloading chinook data..."
cd $WORKDIR
@@ -113,7 +96,7 @@ function validate_agent_service() {
echo "======================Testing worker rag agent======================"
export agent_port="9095"
prompt="Tell me about Michael Jackson song Thriller"
- local CONTENT=$(python3 $WORKDIR/GenAIExamples/AgentQnA/tests/test.py --prompt "$prompt")
+ local CONTENT=$(python3 $WORKDIR/GenAIExamples/AgentQnA/tests/test.py --prompt "$prompt" --agent_role "worker" --ext_port $agent_port)
# echo $CONTENT
local EXIT_CODE=$(validate "$CONTENT" "Thriller" "rag-agent-endpoint")
echo $EXIT_CODE
@@ -127,7 +110,7 @@ function validate_agent_service() {
echo "======================Testing worker sql agent======================"
export agent_port="9096"
prompt="How many employees are there in the company?"
- local CONTENT=$(python3 $WORKDIR/GenAIExamples/AgentQnA/tests/test.py --prompt "$prompt")
+ local CONTENT=$(python3 $WORKDIR/GenAIExamples/AgentQnA/tests/test.py --prompt "$prompt" --agent_role "worker" --ext_port $agent_port)
local EXIT_CODE=$(validate "$CONTENT" "8" "sql-agent-endpoint")
echo $CONTENT
# echo $EXIT_CODE
@@ -140,9 +123,8 @@ function validate_agent_service() {
# test supervisor react agent
echo "======================Testing supervisor react agent======================"
export agent_port="9090"
- prompt="How many albums does Iron Maiden have?"
- local CONTENT=$(python3 $WORKDIR/GenAIExamples/AgentQnA/tests/test.py --prompt "$prompt")
- local EXIT_CODE=$(validate "$CONTENT" "21" "react-agent-endpoint")
+ local CONTENT=$(python3 $WORKDIR/GenAIExamples/AgentQnA/tests/test.py --agent_role "supervisor" --ext_port $agent_port --stream)
+ local EXIT_CODE=$(validate "$CONTENT" "Iron" "react-agent-endpoint")
# echo $CONTENT
echo $EXIT_CODE
local EXIT_CODE="${EXIT_CODE:0-1}"
@@ -153,15 +135,6 @@ function validate_agent_service() {
}
-function remove_data() {
- echo "Removing data..."
- cd $WORKDIR
- if [ -d "TAG-Bench" ]; then
- rm -rf TAG-Bench
- fi
- echo "Data removed!"
-}
-
function remove_chinook_data(){
echo "Removing chinook data..."
cd $WORKDIR
@@ -189,8 +162,9 @@ function main() {
echo "==================== Agent service validated ===================="
}
-remove_data
+
remove_chinook_data
+
main
-remove_data
+
remove_chinook_data
diff --git a/AgentQnA/tests/test.py b/AgentQnA/tests/test.py
index 400684ffd6..046fcd9209 100644
--- a/AgentQnA/tests/test.py
+++ b/AgentQnA/tests/test.py
@@ -1,34 +1,20 @@
-# Copyright (C) 2024 Intel Corporation
+# Copyright (C) 2025 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
import argparse
-import os
+import json
+import uuid
import requests
-def generate_answer_agent_api(url, prompt):
- proxies = {"http": ""}
- payload = {
- "messages": prompt,
- }
- response = requests.post(url, json=payload, proxies=proxies)
- answer = response.json()["text"]
- return answer
-
-
def process_request(url, query, is_stream=False):
proxies = {"http": ""}
-
- payload = {
- "messages": query,
- }
-
+ content = json.dumps(query) if query is not None else None
try:
- resp = requests.post(url=url, json=payload, proxies=proxies, stream=is_stream)
+ resp = requests.post(url=url, data=content, proxies=proxies, stream=is_stream)
if not is_stream:
ret = resp.json()["text"]
- print(ret)
else:
for line in resp.iter_lines(decode_unicode=True):
print(line)
@@ -38,19 +24,54 @@ def process_request(url, query, is_stream=False):
return ret
except requests.exceptions.RequestException as e:
ret = f"An error occurred:{e}"
- print(ret)
- return False
+ return None
+
+def test_worker_agent(args):
+ url = f"http://{args.ip_addr}:{args.ext_port}/v1/chat/completions"
+ query = {"role": "user", "messages": args.prompt, "stream": "false"}
+ ret = process_request(url, query)
+ print("Response: ", ret)
+
+
+def add_message_and_run(url, user_message, thread_id, stream=False):
+ print("User message: ", user_message)
+ query = {"role": "user", "messages": user_message, "thread_id": thread_id, "stream": stream}
+ ret = process_request(url, query, is_stream=stream)
+ print("Response: ", ret)
+
+
+def test_chat_completion_multi_turn(args):
+ url = f"http://{args.ip_addr}:{args.ext_port}/v1/chat/completions"
+ thread_id = f"{uuid.uuid4()}"
+
+ # first turn
+ print("===============First turn==================")
+ user_message = "Which artist has the most albums in the database?"
+ add_message_and_run(url, user_message, thread_id, stream=args.stream)
+ print("===============End of first turn==================")
+
+ # second turn
+ print("===============Second turn==================")
+ user_message = "Give me a few examples of the artist's albumns?"
+ add_message_and_run(url, user_message, thread_id, stream=args.stream)
+ print("===============End of second turn==================")
+
if __name__ == "__main__":
parser = argparse.ArgumentParser()
- parser.add_argument("--prompt", type=str)
- parser.add_argument("--stream", action="store_true")
- args = parser.parse_args()
+ parser.add_argument("--ip_addr", type=str, default="127.0.0.1", help="endpoint ip address")
+ parser.add_argument("--ext_port", type=str, default="9090", help="endpoint port")
+ parser.add_argument("--stream", action="store_true", help="streaming mode")
+ parser.add_argument("--prompt", type=str, help="prompt message")
+ parser.add_argument("--agent_role", type=str, default="supervisor", help="supervisor or worker")
+ args, _ = parser.parse_known_args()
- ip_address = os.getenv("ip_address", "localhost")
- agent_port = os.getenv("agent_port", "9090")
- url = f"http://{ip_address}:{agent_port}/v1/chat/completions"
- prompt = args.prompt
+ print(args)
- process_request(url, prompt, args.stream)
+ if args.agent_role == "supervisor":
+ test_chat_completion_multi_turn(args)
+ elif args.agent_role == "worker":
+ test_worker_agent(args)
+ else:
+ raise ValueError("Invalid agent role")
\ No newline at end of file
diff --git a/AgentQnA/tests/test_compose_on_gaudi.sh b/AgentQnA/tests/test_compose_on_gaudi.sh
index de70514ba6..ab0ce295cb 100644
--- a/AgentQnA/tests/test_compose_on_gaudi.sh
+++ b/AgentQnA/tests/test_compose_on_gaudi.sh
@@ -78,7 +78,7 @@ bash step3_ingest_data_and_validate_retrieval.sh
echo "=================== #3 Data ingestion and validation completed===================="
echo "=================== #4 Start agent and API server===================="
-bash step4_launch_and_validate_agent_tgi.sh
+bash step4_launch_and_validate_agent_gaudi.sh
echo "=================== #4 Agent test passed ===================="
echo "=================== #5 Stop agent and API server===================="
From d219e028daa4363f7ade4078e49e7e90bb35b9df Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
<66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 18 Feb 2025 22:29:57 +0000
Subject: [PATCH 3/4] [pre-commit.ci] auto fixes from pre-commit.com hooks
for more information, see https://pre-commit.ci
---
AgentQnA/README.md | 1 +
AgentQnA/tests/test.py | 6 +++---
DocIndexRetriever/retrieval_tool.py | 2 +-
DocIndexRetriever/tests/test.py | 8 ++------
4 files changed, 7 insertions(+), 10 deletions(-)
diff --git a/AgentQnA/README.md b/AgentQnA/README.md
index 8e77f2f1a6..397bd0c775 100644
--- a/AgentQnA/README.md
+++ b/AgentQnA/README.md
@@ -247,6 +247,7 @@ docker logs react-agent-endpoint
You should see something like "HTTP server setup successful" if the docker containers are started successfully.
2. You can use python to validate the agent system
+
```bash
# RAG worker agent
python tests/test.py --prompt "Tell me about Michael Jackson song Thriller" --agent_role "worker" --ext_port 9095
diff --git a/AgentQnA/tests/test.py b/AgentQnA/tests/test.py
index 046fcd9209..18254f16c5 100644
--- a/AgentQnA/tests/test.py
+++ b/AgentQnA/tests/test.py
@@ -26,6 +26,7 @@ def process_request(url, query, is_stream=False):
ret = f"An error occurred:{e}"
return None
+
def test_worker_agent(args):
url = f"http://{args.ip_addr}:{args.ext_port}/v1/chat/completions"
query = {"role": "user", "messages": args.prompt, "stream": "false"}
@@ -52,12 +53,11 @@ def test_chat_completion_multi_turn(args):
# second turn
print("===============Second turn==================")
- user_message = "Give me a few examples of the artist's albumns?"
+ user_message = "Give me a few examples of the artist's albums?"
add_message_and_run(url, user_message, thread_id, stream=args.stream)
print("===============End of second turn==================")
-
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--ip_addr", type=str, default="127.0.0.1", help="endpoint ip address")
@@ -74,4 +74,4 @@ def test_chat_completion_multi_turn(args):
elif args.agent_role == "worker":
test_worker_agent(args)
else:
- raise ValueError("Invalid agent role")
\ No newline at end of file
+ raise ValueError("Invalid agent role")
diff --git a/DocIndexRetriever/retrieval_tool.py b/DocIndexRetriever/retrieval_tool.py
index 26a7759251..99fab7b1b5 100644
--- a/DocIndexRetriever/retrieval_tool.py
+++ b/DocIndexRetriever/retrieval_tool.py
@@ -42,7 +42,7 @@ def align_outputs(self, data, cur_node, inputs, runtime_graph, llm_parameters_di
next_data = {
"input": inputs["input"],
"messages": inputs["input"],
- "embedding": data, #[item["embedding"] for item in data["data"]],
+ "embedding": data, # [item["embedding"] for item in data["data"]],
"k": kwargs["k"] if "k" in kwargs else 4,
"search_type": kwargs["search_type"] if "search_type" in kwargs else "similarity",
"distance_threshold": kwargs["distance_threshold"] if "distance_threshold" in kwargs else None,
diff --git a/DocIndexRetriever/tests/test.py b/DocIndexRetriever/tests/test.py
index e655073ddb..ba74827fa6 100644
--- a/DocIndexRetriever/tests/test.py
+++ b/DocIndexRetriever/tests/test.py
@@ -11,11 +11,7 @@ def search_knowledge_base(query: str) -> str:
url = os.environ.get("RETRIEVAL_TOOL_URL")
print(url)
proxies = {"http": ""}
- payload = {
- "messages": query,
- "k":5,
- "top_n": 2
- }
+ payload = {"messages": query, "k": 5, "top_n": 2}
response = requests.post(url, json=payload, proxies=proxies)
print(response)
if "documents" in response.json():
@@ -39,4 +35,4 @@ def search_knowledge_base(query: str) -> str:
if __name__ == "__main__":
resp = search_knowledge_base("What is OPEA?")
# resp = search_knowledge_base("Thriller")
- print(resp)
\ No newline at end of file
+ print(resp)
From 7d10a4d8a8dbb94c9aff3cf3890dd9aee4e0a8a3 Mon Sep 17 00:00:00 2001
From: minmin-intel
Date: Tue, 18 Feb 2025 23:05:14 +0000
Subject: [PATCH 4/4] fix hf cache dir
Signed-off-by: minmin-intel
---
.../tests/step4_launch_and_validate_agent_gaudi.sh | 14 +++++++-------
1 file changed, 7 insertions(+), 7 deletions(-)
diff --git a/AgentQnA/tests/step4_launch_and_validate_agent_gaudi.sh b/AgentQnA/tests/step4_launch_and_validate_agent_gaudi.sh
index 7c3800ba78..56f017239b 100644
--- a/AgentQnA/tests/step4_launch_and_validate_agent_gaudi.sh
+++ b/AgentQnA/tests/step4_launch_and_validate_agent_gaudi.sh
@@ -13,13 +13,13 @@ export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
HF_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
model="meta-llama/Llama-3.3-70B-Instruct" #"meta-llama/Meta-Llama-3.1-70B-Instruct"
-# export HF_CACHE_DIR=/data2/huggingface
-# if [ ! -d "$HF_CACHE_DIR" ]; then
-# HF_CACHE_DIR=$WORKDIR/hf_cache
-# mkdir -p "$HF_CACHE_DIR"
-# fi
-# echo "HF_CACHE_DIR=$HF_CACHE_DIR"
-ls $HF_CACHE_DIR/hub
+export HF_CACHE_DIR=/data2/huggingface
+if [ ! -d "$HF_CACHE_DIR" ]; then
+ HF_CACHE_DIR=$WORKDIR/hf_cache
+ mkdir -p "$HF_CACHE_DIR"
+fi
+echo "HF_CACHE_DIR=$HF_CACHE_DIR"
+ls $HF_CACHE_DIR
vllm_port=8086
vllm_volume=${HF_CACHE_DIR}