From d70ae5bc7b93a3d1b5999d50f96232fc12346c03 Mon Sep 17 00:00:00 2001 From: "Yeoh, Hoong Tee" Date: Mon, 4 Nov 2024 11:32:22 +0530 Subject: [PATCH 1/4] ProductivitySuite: Update TGI CPU image version to 2.4.0 Update TGI CPU image to align with other services with respect to updating effort. Signed-off-by: Yeoh, Hoong Tee --- ProductivitySuite/docker_compose/intel/cpu/xeon/compose.yaml | 4 ++-- ProductivitySuite/tests/test_compose_on_xeon.sh | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/ProductivitySuite/docker_compose/intel/cpu/xeon/compose.yaml b/ProductivitySuite/docker_compose/intel/cpu/xeon/compose.yaml index 2e56d65844..8c3ffa477b 100644 --- a/ProductivitySuite/docker_compose/intel/cpu/xeon/compose.yaml +++ b/ProductivitySuite/docker_compose/intel/cpu/xeon/compose.yaml @@ -111,7 +111,7 @@ services: LANGCHAIN_PROJECT: "opea-reranking-service" restart: unless-stopped tgi_service: - image: ghcr.io/huggingface/text-generation-inference:2.1.0 + image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu container_name: tgi-service ports: - "9009:80" @@ -172,7 +172,7 @@ services: ipc: host restart: always tgi_service_codegen: - image: ghcr.io/huggingface/text-generation-inference:2.1.0 + image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu container_name: tgi_service_codegen ports: - "8028:80" diff --git a/ProductivitySuite/tests/test_compose_on_xeon.sh b/ProductivitySuite/tests/test_compose_on_xeon.sh index 7c18f35d42..b351680dfa 100755 --- a/ProductivitySuite/tests/test_compose_on_xeon.sh +++ b/ProductivitySuite/tests/test_compose_on_xeon.sh @@ -22,7 +22,7 @@ function build_docker_images() { docker compose -f build.yaml build --no-cache > ${LOG_PATH}/docker_image_build.log docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 - docker pull ghcr.io/huggingface/text-generation-inference:2.1.0 + docker pull ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu docker images && sleep 1s } From b95d93eab7c2b0bc16800f28449b87666cff9dce Mon Sep 17 00:00:00 2001 From: "Yeoh, Hoong Tee" Date: Thu, 7 Nov 2024 13:41:58 +0530 Subject: [PATCH 2/4] ProductivitySuite: Update compose.yaml and test Update chatqna yaml env variable with recent changes in chatqna megaservice Signed-off-by: Yeoh, Hoong Tee --- .../docker_compose/intel/cpu/xeon/README.md | 3 + .../intel/cpu/xeon/compose.yaml | 78 ++++--------------- .../tests/test_compose_on_xeon.sh | 3 + 3 files changed, 20 insertions(+), 64 deletions(-) diff --git a/ProductivitySuite/docker_compose/intel/cpu/xeon/README.md b/ProductivitySuite/docker_compose/intel/cpu/xeon/README.md index c5463ad103..5533f0750b 100644 --- a/ProductivitySuite/docker_compose/intel/cpu/xeon/README.md +++ b/ProductivitySuite/docker_compose/intel/cpu/xeon/README.md @@ -175,6 +175,9 @@ export LLM_SERVICE_HOST_PORT_FAQGEN=9002 export LLM_SERVICE_HOST_PORT_CODEGEN=9001 export LLM_SERVICE_HOST_PORT_DOCSUM=9003 export PROMPT_COLLECTION_NAME="prompt" +export RERANK_SERVER_PORT=8808 +export EMBEDDING_SERVER_PORT=6006 +export LLM_SERVER_PORT=9009 ``` Note: Please replace with `host_ip` with you external IP address, do not use localhost. diff --git a/ProductivitySuite/docker_compose/intel/cpu/xeon/compose.yaml b/ProductivitySuite/docker_compose/intel/cpu/xeon/compose.yaml index 8c3ffa477b..18b6031120 100644 --- a/ProductivitySuite/docker_compose/intel/cpu/xeon/compose.yaml +++ b/ProductivitySuite/docker_compose/intel/cpu/xeon/compose.yaml @@ -26,7 +26,10 @@ services: http_proxy: ${http_proxy} https_proxy: ${https_proxy} REDIS_URL: ${REDIS_URL} + REDIS_HOST: redis-vector-db INDEX_NAME: ${INDEX_NAME} + TEI_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} + HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} tei-embedding-service: image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 container_name: tei-embedding-server @@ -40,23 +43,6 @@ services: http_proxy: ${http_proxy} https_proxy: ${https_proxy} command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate - embedding: - image: ${REGISTRY:-opea}/embedding-tei:${TAG:-latest} - container_name: embedding-tei-server - depends_on: - - tei-embedding-service - ports: - - "6000:6000" - ipc: host - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} - LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY} - LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2} - LANGCHAIN_PROJECT: "opea-embedding-service" - restart: unless-stopped retriever: image: ${REGISTRY:-opea}/retriever-redis:${TAG:-latest} container_name: retriever-redis-server @@ -70,6 +56,7 @@ services: http_proxy: ${http_proxy} https_proxy: ${https_proxy} REDIS_URL: ${REDIS_URL} + REDIS_HOST: redis-vector-db INDEX_NAME: ${INDEX_NAME} TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} @@ -90,26 +77,6 @@ services: HF_HUB_DISABLE_PROGRESS_BARS: 1 HF_HUB_ENABLE_HF_TRANSFER: 0 command: --model-id ${RERANK_MODEL_ID} --auto-truncate - reranking: - image: ${REGISTRY:-opea}/reranking-tei:${TAG:-latest} - container_name: reranking-tei-xeon-server - depends_on: - - tei-reranking-service - ports: - - "8000:8000" - ipc: host - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - TEI_RERANKING_ENDPOINT: ${TEI_RERANKING_ENDPOINT} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} - HF_HUB_DISABLE_PROGRESS_BARS: 1 - HF_HUB_ENABLE_HF_TRANSFER: 0 - LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY} - LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2} - LANGCHAIN_PROJECT: "opea-reranking-service" - restart: unless-stopped tgi_service: image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu container_name: tgi-service @@ -125,39 +92,17 @@ services: HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} HF_HUB_DISABLE_PROGRESS_BARS: 1 HF_HUB_ENABLE_HF_TRANSFER: 0 - command: --model-id ${LLM_MODEL_ID} - llm: - image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest} - container_name: llm-tgi-server - depends_on: - - tgi_service - ports: - - "9000:9000" - ipc: host - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT_CHATQNA} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} - HF_HUB_DISABLE_PROGRESS_BARS: 1 - HF_HUB_ENABLE_HF_TRANSFER: 0 - LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY} - LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2} - LANGCHAIN_PROJECT: "opea-llm-service" - restart: unless-stopped + command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0 chatqna-xeon-backend-server: image: ${REGISTRY:-opea}/chatqna:${TAG:-latest} container_name: chatqna-xeon-backend-server depends_on: - redis-vector-db - tei-embedding-service - - embedding + - dataprep-redis-service - retriever - tei-reranking-service - - reranking - tgi_service - - llm ports: - "8888:8888" environment: @@ -165,10 +110,15 @@ services: https_proxy: ${https_proxy} http_proxy: ${http_proxy} MEGA_SERVICE_HOST_IP: ${MEGA_SERVICE_HOST_IP} - EMBEDDING_SERVICE_HOST_IP: ${EMBEDDING_SERVICE_HOST_IP} + EMBEDDING_SERVER_HOST_IP: ${EMBEDDING_SERVICE_HOST_IP} + EMBEDDING_SERVER_PORT: ${EMBEDDING_SERVER_PORT:-80} RETRIEVER_SERVICE_HOST_IP: ${RETRIEVER_SERVICE_HOST_IP} - RERANK_SERVICE_HOST_IP: ${RERANK_SERVICE_HOST_IP} - LLM_SERVICE_HOST_IP: ${LLM_SERVICE_HOST_IP_CHATQNA} + RERANK_SERVER_HOST_IP: ${RERANK_SERVICE_HOST_IP} + RERANK_SERVER_PORT: ${RERANK_SERVER_PORT:-80} + LLM_SERVER_HOST_IP: ${LLM_SERVICE_HOST_IP_CHATQNA} + LLM_SERVER_PORT: ${LLM_SERVER_PORT:-80} + LLM_MODEL: ${LLM_MODEL_ID} + LOGFLAG: ${LOGFLAG} ipc: host restart: always tgi_service_codegen: diff --git a/ProductivitySuite/tests/test_compose_on_xeon.sh b/ProductivitySuite/tests/test_compose_on_xeon.sh index b351680dfa..eb8b5977d3 100755 --- a/ProductivitySuite/tests/test_compose_on_xeon.sh +++ b/ProductivitySuite/tests/test_compose_on_xeon.sh @@ -74,6 +74,9 @@ function start_services() { export LLM_SERVICE_HOST_PORT_FAQGEN=9002 export LLM_SERVICE_HOST_PORT_CODEGEN=9001 export LLM_SERVICE_HOST_PORT_DOCSUM=9003 + export RERANK_SERVER_PORT=8808 + export EMBEDDING_SERVER_PORT=6006 + export LLM_SERVER_PORT=9009 export PROMPT_COLLECTION_NAME="prompt" # Start Docker Containers From 03eda6bb6cd5267b6a2410ac4aa39864999f0495 Mon Sep 17 00:00:00 2001 From: "Yeoh, Hoong Tee" Date: Thu, 7 Nov 2024 14:03:01 +0530 Subject: [PATCH 3/4] ProductivitySuite: Fix CI failure Signed-off-by: Yeoh, Hoong Tee --- .../intel/cpu/xeon/compose.yaml | 60 +++++++++++++++++++ 1 file changed, 60 insertions(+) diff --git a/ProductivitySuite/docker_compose/intel/cpu/xeon/compose.yaml b/ProductivitySuite/docker_compose/intel/cpu/xeon/compose.yaml index 18b6031120..4bda722234 100644 --- a/ProductivitySuite/docker_compose/intel/cpu/xeon/compose.yaml +++ b/ProductivitySuite/docker_compose/intel/cpu/xeon/compose.yaml @@ -43,6 +43,23 @@ services: http_proxy: ${http_proxy} https_proxy: ${https_proxy} command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate + embedding: + image: ${REGISTRY:-opea}/embedding-tei:${TAG:-latest} + container_name: embedding-tei-server + depends_on: + - tei-embedding-service + ports: + - "6000:6000" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} + LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY} + LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2} + LANGCHAIN_PROJECT: "opea-embedding-service" + restart: unless-stopped retriever: image: ${REGISTRY:-opea}/retriever-redis:${TAG:-latest} container_name: retriever-redis-server @@ -77,6 +94,26 @@ services: HF_HUB_DISABLE_PROGRESS_BARS: 1 HF_HUB_ENABLE_HF_TRANSFER: 0 command: --model-id ${RERANK_MODEL_ID} --auto-truncate + reranking: + image: ${REGISTRY:-opea}/reranking-tei:${TAG:-latest} + container_name: reranking-tei-xeon-server + depends_on: + - tei-reranking-service + ports: + - "8000:8000" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + TEI_RERANKING_ENDPOINT: ${TEI_RERANKING_ENDPOINT} + HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_HUB_DISABLE_PROGRESS_BARS: 1 + HF_HUB_ENABLE_HF_TRANSFER: 0 + LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY} + LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2} + LANGCHAIN_PROJECT: "opea-reranking-service" + restart: unless-stopped tgi_service: image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu container_name: tgi-service @@ -93,6 +130,26 @@ services: HF_HUB_DISABLE_PROGRESS_BARS: 1 HF_HUB_ENABLE_HF_TRANSFER: 0 command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0 + llm: + image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest} + container_name: llm-tgi-server + depends_on: + - tgi_service + ports: + - "9000:9000" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT_CHATQNA} + HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_HUB_DISABLE_PROGRESS_BARS: 1 + HF_HUB_ENABLE_HF_TRANSFER: 0 + LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY} + LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2} + LANGCHAIN_PROJECT: "opea-llm-service" + restart: unless-stopped chatqna-xeon-backend-server: image: ${REGISTRY:-opea}/chatqna:${TAG:-latest} container_name: chatqna-xeon-backend-server @@ -103,6 +160,9 @@ services: - retriever - tei-reranking-service - tgi_service + - embedding + - reranking + - llm ports: - "8888:8888" environment: From 573f55108eafd94c4828371f96b91d3a2352724d Mon Sep 17 00:00:00 2001 From: "Yeoh, Hoong Tee" Date: Thu, 7 Nov 2024 15:25:47 +0530 Subject: [PATCH 4/4] ProductivitySuite: Fix docsum test Signed-off-by: Yeoh, Hoong Tee --- ProductivitySuite/tests/test_compose_on_xeon.sh | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/ProductivitySuite/tests/test_compose_on_xeon.sh b/ProductivitySuite/tests/test_compose_on_xeon.sh index eb8b5977d3..11a45f734a 100755 --- a/ProductivitySuite/tests/test_compose_on_xeon.sh +++ b/ProductivitySuite/tests/test_compose_on_xeon.sh @@ -119,6 +119,9 @@ function validate_service() { HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -H 'Content-Type: application/json' "$URL") elif [[ $SERVICE_NAME == *"dataprep_del"* ]]; then HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -d '{"file_path": "all"}' -H 'Content-Type: application/json' "$URL") + elif [[ $SERVICE_NAME == *"docsum-xeon-backend-server"* ]]; then + local INPUT_DATA="messages=Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5." + HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -F "$INPUT_DATA" -H 'Content-Type: multipart/form-data' "$URL") else HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL") fi @@ -318,7 +321,7 @@ function validate_megaservice() { # Curl the DocSum Mega Service validate_service \ "${ip_address}:8890/v1/docsum" \ - "toolkit" \ + "embedding" \ "docsum-xeon-backend-server" \ "docsum-xeon-backend-server" \ '{"messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}'