diff --git a/ProductivitySuite/docker_compose/intel/cpu/xeon/README.md b/ProductivitySuite/docker_compose/intel/cpu/xeon/README.md index c5463ad103..5533f0750b 100644 --- a/ProductivitySuite/docker_compose/intel/cpu/xeon/README.md +++ b/ProductivitySuite/docker_compose/intel/cpu/xeon/README.md @@ -175,6 +175,9 @@ export LLM_SERVICE_HOST_PORT_FAQGEN=9002 export LLM_SERVICE_HOST_PORT_CODEGEN=9001 export LLM_SERVICE_HOST_PORT_DOCSUM=9003 export PROMPT_COLLECTION_NAME="prompt" +export RERANK_SERVER_PORT=8808 +export EMBEDDING_SERVER_PORT=6006 +export LLM_SERVER_PORT=9009 ``` Note: Please replace with `host_ip` with you external IP address, do not use localhost. diff --git a/ProductivitySuite/docker_compose/intel/cpu/xeon/compose.yaml b/ProductivitySuite/docker_compose/intel/cpu/xeon/compose.yaml index 2e56d65844..4bda722234 100644 --- a/ProductivitySuite/docker_compose/intel/cpu/xeon/compose.yaml +++ b/ProductivitySuite/docker_compose/intel/cpu/xeon/compose.yaml @@ -26,7 +26,10 @@ services: http_proxy: ${http_proxy} https_proxy: ${https_proxy} REDIS_URL: ${REDIS_URL} + REDIS_HOST: redis-vector-db INDEX_NAME: ${INDEX_NAME} + TEI_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} + HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} tei-embedding-service: image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 container_name: tei-embedding-server @@ -70,6 +73,7 @@ services: http_proxy: ${http_proxy} https_proxy: ${https_proxy} REDIS_URL: ${REDIS_URL} + REDIS_HOST: redis-vector-db INDEX_NAME: ${INDEX_NAME} TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} @@ -111,7 +115,7 @@ services: LANGCHAIN_PROJECT: "opea-reranking-service" restart: unless-stopped tgi_service: - image: ghcr.io/huggingface/text-generation-inference:2.1.0 + image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu container_name: tgi-service ports: - "9009:80" @@ -125,7 +129,7 @@ services: HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} HF_HUB_DISABLE_PROGRESS_BARS: 1 HF_HUB_ENABLE_HF_TRANSFER: 0 - command: --model-id ${LLM_MODEL_ID} + command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0 llm: image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest} container_name: llm-tgi-server @@ -152,11 +156,12 @@ services: depends_on: - redis-vector-db - tei-embedding-service - - embedding + - dataprep-redis-service - retriever - tei-reranking-service - - reranking - tgi_service + - embedding + - reranking - llm ports: - "8888:8888" @@ -165,14 +170,19 @@ services: https_proxy: ${https_proxy} http_proxy: ${http_proxy} MEGA_SERVICE_HOST_IP: ${MEGA_SERVICE_HOST_IP} - EMBEDDING_SERVICE_HOST_IP: ${EMBEDDING_SERVICE_HOST_IP} + EMBEDDING_SERVER_HOST_IP: ${EMBEDDING_SERVICE_HOST_IP} + EMBEDDING_SERVER_PORT: ${EMBEDDING_SERVER_PORT:-80} RETRIEVER_SERVICE_HOST_IP: ${RETRIEVER_SERVICE_HOST_IP} - RERANK_SERVICE_HOST_IP: ${RERANK_SERVICE_HOST_IP} - LLM_SERVICE_HOST_IP: ${LLM_SERVICE_HOST_IP_CHATQNA} + RERANK_SERVER_HOST_IP: ${RERANK_SERVICE_HOST_IP} + RERANK_SERVER_PORT: ${RERANK_SERVER_PORT:-80} + LLM_SERVER_HOST_IP: ${LLM_SERVICE_HOST_IP_CHATQNA} + LLM_SERVER_PORT: ${LLM_SERVER_PORT:-80} + LLM_MODEL: ${LLM_MODEL_ID} + LOGFLAG: ${LOGFLAG} ipc: host restart: always tgi_service_codegen: - image: ghcr.io/huggingface/text-generation-inference:2.1.0 + image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu container_name: tgi_service_codegen ports: - "8028:80" diff --git a/ProductivitySuite/tests/test_compose_on_xeon.sh b/ProductivitySuite/tests/test_compose_on_xeon.sh index 7c18f35d42..11a45f734a 100755 --- a/ProductivitySuite/tests/test_compose_on_xeon.sh +++ b/ProductivitySuite/tests/test_compose_on_xeon.sh @@ -22,7 +22,7 @@ function build_docker_images() { docker compose -f build.yaml build --no-cache > ${LOG_PATH}/docker_image_build.log docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 - docker pull ghcr.io/huggingface/text-generation-inference:2.1.0 + docker pull ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu docker images && sleep 1s } @@ -74,6 +74,9 @@ function start_services() { export LLM_SERVICE_HOST_PORT_FAQGEN=9002 export LLM_SERVICE_HOST_PORT_CODEGEN=9001 export LLM_SERVICE_HOST_PORT_DOCSUM=9003 + export RERANK_SERVER_PORT=8808 + export EMBEDDING_SERVER_PORT=6006 + export LLM_SERVER_PORT=9009 export PROMPT_COLLECTION_NAME="prompt" # Start Docker Containers @@ -116,6 +119,9 @@ function validate_service() { HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -H 'Content-Type: application/json' "$URL") elif [[ $SERVICE_NAME == *"dataprep_del"* ]]; then HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -d '{"file_path": "all"}' -H 'Content-Type: application/json' "$URL") + elif [[ $SERVICE_NAME == *"docsum-xeon-backend-server"* ]]; then + local INPUT_DATA="messages=Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5." + HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -F "$INPUT_DATA" -H 'Content-Type: multipart/form-data' "$URL") else HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL") fi @@ -315,7 +321,7 @@ function validate_megaservice() { # Curl the DocSum Mega Service validate_service \ "${ip_address}:8890/v1/docsum" \ - "toolkit" \ + "embedding" \ "docsum-xeon-backend-server" \ "docsum-xeon-backend-server" \ '{"messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}'