From d70ae5bc7b93a3d1b5999d50f96232fc12346c03 Mon Sep 17 00:00:00 2001
From: "Yeoh, Hoong Tee" <hoong.tee.yeoh@intel.com>
Date: Mon, 4 Nov 2024 11:32:22 +0530
Subject: [PATCH 1/4] ProductivitySuite: Update TGI CPU image version to 2.4.0

Update TGI CPU image to align with other services with respect to
updating effort.

Signed-off-by: Yeoh, Hoong Tee <hoong.tee.yeoh@intel.com>
---
 ProductivitySuite/docker_compose/intel/cpu/xeon/compose.yaml | 4 ++--
 ProductivitySuite/tests/test_compose_on_xeon.sh              | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/ProductivitySuite/docker_compose/intel/cpu/xeon/compose.yaml b/ProductivitySuite/docker_compose/intel/cpu/xeon/compose.yaml
index 2e56d65844..8c3ffa477b 100644
--- a/ProductivitySuite/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/ProductivitySuite/docker_compose/intel/cpu/xeon/compose.yaml
@@ -111,7 +111,7 @@ services:
       LANGCHAIN_PROJECT: "opea-reranking-service"
     restart: unless-stopped
   tgi_service:
-    image: ghcr.io/huggingface/text-generation-inference:2.1.0
+    image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
     container_name: tgi-service
     ports:
       - "9009:80"
@@ -172,7 +172,7 @@ services:
     ipc: host
     restart: always
   tgi_service_codegen:
-    image: ghcr.io/huggingface/text-generation-inference:2.1.0
+    image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
     container_name: tgi_service_codegen
     ports:
       - "8028:80"
diff --git a/ProductivitySuite/tests/test_compose_on_xeon.sh b/ProductivitySuite/tests/test_compose_on_xeon.sh
index 7c18f35d42..b351680dfa 100755
--- a/ProductivitySuite/tests/test_compose_on_xeon.sh
+++ b/ProductivitySuite/tests/test_compose_on_xeon.sh
@@ -22,7 +22,7 @@ function build_docker_images() {
     docker compose -f build.yaml build --no-cache > ${LOG_PATH}/docker_image_build.log
 
     docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
-    docker pull ghcr.io/huggingface/text-generation-inference:2.1.0
+    docker pull ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
     docker images && sleep 1s
 }
 

From b95d93eab7c2b0bc16800f28449b87666cff9dce Mon Sep 17 00:00:00 2001
From: "Yeoh, Hoong Tee" <hoong.tee.yeoh@intel.com>
Date: Thu, 7 Nov 2024 13:41:58 +0530
Subject: [PATCH 2/4] ProductivitySuite: Update compose.yaml and test

Update chatqna yaml env variable with recent changes in chatqna
megaservice

Signed-off-by: Yeoh, Hoong Tee <hoong.tee.yeoh@intel.com>
---
 .../docker_compose/intel/cpu/xeon/README.md   |  3 +
 .../intel/cpu/xeon/compose.yaml               | 78 ++++---------------
 .../tests/test_compose_on_xeon.sh             |  3 +
 3 files changed, 20 insertions(+), 64 deletions(-)

diff --git a/ProductivitySuite/docker_compose/intel/cpu/xeon/README.md b/ProductivitySuite/docker_compose/intel/cpu/xeon/README.md
index c5463ad103..5533f0750b 100644
--- a/ProductivitySuite/docker_compose/intel/cpu/xeon/README.md
+++ b/ProductivitySuite/docker_compose/intel/cpu/xeon/README.md
@@ -175,6 +175,9 @@ export LLM_SERVICE_HOST_PORT_FAQGEN=9002
 export LLM_SERVICE_HOST_PORT_CODEGEN=9001
 export LLM_SERVICE_HOST_PORT_DOCSUM=9003
 export PROMPT_COLLECTION_NAME="prompt"
+export RERANK_SERVER_PORT=8808
+export EMBEDDING_SERVER_PORT=6006
+export LLM_SERVER_PORT=9009
 ```
 
 Note: Please replace with `host_ip` with you external IP address, do not use localhost.
diff --git a/ProductivitySuite/docker_compose/intel/cpu/xeon/compose.yaml b/ProductivitySuite/docker_compose/intel/cpu/xeon/compose.yaml
index 8c3ffa477b..18b6031120 100644
--- a/ProductivitySuite/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/ProductivitySuite/docker_compose/intel/cpu/xeon/compose.yaml
@@ -26,7 +26,10 @@ services:
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
       REDIS_URL: ${REDIS_URL}
+      REDIS_HOST: redis-vector-db
       INDEX_NAME: ${INDEX_NAME}
+      TEI_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
   tei-embedding-service:
     image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
     container_name: tei-embedding-server
@@ -40,23 +43,6 @@ services:
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
     command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate
-  embedding:
-    image: ${REGISTRY:-opea}/embedding-tei:${TAG:-latest}
-    container_name: embedding-tei-server
-    depends_on:
-      - tei-embedding-service
-    ports:
-      - "6000:6000"
-    ipc: host
-    environment:
-      no_proxy: ${no_proxy}
-      http_proxy: ${http_proxy}
-      https_proxy: ${https_proxy}
-      TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
-      LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
-      LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
-      LANGCHAIN_PROJECT: "opea-embedding-service"
-    restart: unless-stopped
   retriever:
     image: ${REGISTRY:-opea}/retriever-redis:${TAG:-latest}
     container_name: retriever-redis-server
@@ -70,6 +56,7 @@ services:
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
       REDIS_URL: ${REDIS_URL}
+      REDIS_HOST: redis-vector-db
       INDEX_NAME: ${INDEX_NAME}
       TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
       HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
@@ -90,26 +77,6 @@ services:
       HF_HUB_DISABLE_PROGRESS_BARS: 1
       HF_HUB_ENABLE_HF_TRANSFER: 0
     command: --model-id ${RERANK_MODEL_ID} --auto-truncate
-  reranking:
-    image: ${REGISTRY:-opea}/reranking-tei:${TAG:-latest}
-    container_name: reranking-tei-xeon-server
-    depends_on:
-      - tei-reranking-service
-    ports:
-      - "8000:8000"
-    ipc: host
-    environment:
-      no_proxy: ${no_proxy}
-      http_proxy: ${http_proxy}
-      https_proxy: ${https_proxy}
-      TEI_RERANKING_ENDPOINT: ${TEI_RERANKING_ENDPOINT}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
-      HF_HUB_DISABLE_PROGRESS_BARS: 1
-      HF_HUB_ENABLE_HF_TRANSFER: 0
-      LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
-      LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
-      LANGCHAIN_PROJECT: "opea-reranking-service"
-    restart: unless-stopped
   tgi_service:
     image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
     container_name: tgi-service
@@ -125,39 +92,17 @@ services:
       HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
       HF_HUB_DISABLE_PROGRESS_BARS: 1
       HF_HUB_ENABLE_HF_TRANSFER: 0
-    command: --model-id ${LLM_MODEL_ID}
-  llm:
-    image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest}
-    container_name: llm-tgi-server
-    depends_on:
-      - tgi_service
-    ports:
-      - "9000:9000"
-    ipc: host
-    environment:
-      no_proxy: ${no_proxy}
-      http_proxy: ${http_proxy}
-      https_proxy: ${https_proxy}
-      TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT_CHATQNA}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
-      HF_HUB_DISABLE_PROGRESS_BARS: 1
-      HF_HUB_ENABLE_HF_TRANSFER: 0
-      LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
-      LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
-      LANGCHAIN_PROJECT: "opea-llm-service"
-    restart: unless-stopped
+    command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0
   chatqna-xeon-backend-server:
     image: ${REGISTRY:-opea}/chatqna:${TAG:-latest}
     container_name: chatqna-xeon-backend-server
     depends_on:
       - redis-vector-db
       - tei-embedding-service
-      - embedding
+      - dataprep-redis-service
       - retriever
       - tei-reranking-service
-      - reranking
       - tgi_service
-      - llm
     ports:
       - "8888:8888"
     environment:
@@ -165,10 +110,15 @@ services:
       https_proxy: ${https_proxy}
       http_proxy: ${http_proxy}
       MEGA_SERVICE_HOST_IP: ${MEGA_SERVICE_HOST_IP}
-      EMBEDDING_SERVICE_HOST_IP: ${EMBEDDING_SERVICE_HOST_IP}
+      EMBEDDING_SERVER_HOST_IP: ${EMBEDDING_SERVICE_HOST_IP}
+      EMBEDDING_SERVER_PORT: ${EMBEDDING_SERVER_PORT:-80}
       RETRIEVER_SERVICE_HOST_IP: ${RETRIEVER_SERVICE_HOST_IP}
-      RERANK_SERVICE_HOST_IP: ${RERANK_SERVICE_HOST_IP}
-      LLM_SERVICE_HOST_IP: ${LLM_SERVICE_HOST_IP_CHATQNA}
+      RERANK_SERVER_HOST_IP: ${RERANK_SERVICE_HOST_IP}
+      RERANK_SERVER_PORT: ${RERANK_SERVER_PORT:-80}
+      LLM_SERVER_HOST_IP: ${LLM_SERVICE_HOST_IP_CHATQNA}
+      LLM_SERVER_PORT: ${LLM_SERVER_PORT:-80}
+      LLM_MODEL: ${LLM_MODEL_ID}
+      LOGFLAG: ${LOGFLAG}
     ipc: host
     restart: always
   tgi_service_codegen:
diff --git a/ProductivitySuite/tests/test_compose_on_xeon.sh b/ProductivitySuite/tests/test_compose_on_xeon.sh
index b351680dfa..eb8b5977d3 100755
--- a/ProductivitySuite/tests/test_compose_on_xeon.sh
+++ b/ProductivitySuite/tests/test_compose_on_xeon.sh
@@ -74,6 +74,9 @@ function start_services() {
     export LLM_SERVICE_HOST_PORT_FAQGEN=9002
     export LLM_SERVICE_HOST_PORT_CODEGEN=9001
     export LLM_SERVICE_HOST_PORT_DOCSUM=9003
+    export RERANK_SERVER_PORT=8808
+    export EMBEDDING_SERVER_PORT=6006
+    export LLM_SERVER_PORT=9009
     export PROMPT_COLLECTION_NAME="prompt"
 
     # Start Docker Containers

From 03eda6bb6cd5267b6a2410ac4aa39864999f0495 Mon Sep 17 00:00:00 2001
From: "Yeoh, Hoong Tee" <hoong.tee.yeoh@intel.com>
Date: Thu, 7 Nov 2024 14:03:01 +0530
Subject: [PATCH 3/4] ProductivitySuite: Fix CI failure

Signed-off-by: Yeoh, Hoong Tee <hoong.tee.yeoh@intel.com>
---
 .../intel/cpu/xeon/compose.yaml               | 60 +++++++++++++++++++
 1 file changed, 60 insertions(+)

diff --git a/ProductivitySuite/docker_compose/intel/cpu/xeon/compose.yaml b/ProductivitySuite/docker_compose/intel/cpu/xeon/compose.yaml
index 18b6031120..4bda722234 100644
--- a/ProductivitySuite/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/ProductivitySuite/docker_compose/intel/cpu/xeon/compose.yaml
@@ -43,6 +43,23 @@ services:
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
     command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate
+  embedding:
+    image: ${REGISTRY:-opea}/embedding-tei:${TAG:-latest}
+    container_name: embedding-tei-server
+    depends_on:
+      - tei-embedding-service
+    ports:
+      - "6000:6000"
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
+      LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
+      LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
+      LANGCHAIN_PROJECT: "opea-embedding-service"
+    restart: unless-stopped
   retriever:
     image: ${REGISTRY:-opea}/retriever-redis:${TAG:-latest}
     container_name: retriever-redis-server
@@ -77,6 +94,26 @@ services:
       HF_HUB_DISABLE_PROGRESS_BARS: 1
       HF_HUB_ENABLE_HF_TRANSFER: 0
     command: --model-id ${RERANK_MODEL_ID} --auto-truncate
+  reranking:
+    image: ${REGISTRY:-opea}/reranking-tei:${TAG:-latest}
+    container_name: reranking-tei-xeon-server
+    depends_on:
+      - tei-reranking-service
+    ports:
+      - "8000:8000"
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      TEI_RERANKING_ENDPOINT: ${TEI_RERANKING_ENDPOINT}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_HUB_DISABLE_PROGRESS_BARS: 1
+      HF_HUB_ENABLE_HF_TRANSFER: 0
+      LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
+      LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
+      LANGCHAIN_PROJECT: "opea-reranking-service"
+    restart: unless-stopped
   tgi_service:
     image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
     container_name: tgi-service
@@ -93,6 +130,26 @@ services:
       HF_HUB_DISABLE_PROGRESS_BARS: 1
       HF_HUB_ENABLE_HF_TRANSFER: 0
     command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0
+  llm:
+    image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest}
+    container_name: llm-tgi-server
+    depends_on:
+      - tgi_service
+    ports:
+      - "9000:9000"
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT_CHATQNA}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_HUB_DISABLE_PROGRESS_BARS: 1
+      HF_HUB_ENABLE_HF_TRANSFER: 0
+      LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
+      LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
+      LANGCHAIN_PROJECT: "opea-llm-service"
+    restart: unless-stopped
   chatqna-xeon-backend-server:
     image: ${REGISTRY:-opea}/chatqna:${TAG:-latest}
     container_name: chatqna-xeon-backend-server
@@ -103,6 +160,9 @@ services:
       - retriever
       - tei-reranking-service
       - tgi_service
+      - embedding
+      - reranking
+      - llm
     ports:
       - "8888:8888"
     environment:

From 573f55108eafd94c4828371f96b91d3a2352724d Mon Sep 17 00:00:00 2001
From: "Yeoh, Hoong Tee" <hoong.tee.yeoh@intel.com>
Date: Thu, 7 Nov 2024 15:25:47 +0530
Subject: [PATCH 4/4] ProductivitySuite: Fix docsum test

Signed-off-by: Yeoh, Hoong Tee <hoong.tee.yeoh@intel.com>
---
 ProductivitySuite/tests/test_compose_on_xeon.sh | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/ProductivitySuite/tests/test_compose_on_xeon.sh b/ProductivitySuite/tests/test_compose_on_xeon.sh
index eb8b5977d3..11a45f734a 100755
--- a/ProductivitySuite/tests/test_compose_on_xeon.sh
+++ b/ProductivitySuite/tests/test_compose_on_xeon.sh
@@ -119,6 +119,9 @@ function validate_service() {
         HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -H 'Content-Type: application/json' "$URL")
     elif [[ $SERVICE_NAME == *"dataprep_del"* ]]; then
         HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -d '{"file_path": "all"}' -H 'Content-Type: application/json' "$URL")
+    elif [[ $SERVICE_NAME == *"docsum-xeon-backend-server"* ]]; then
+	local INPUT_DATA="messages=Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."
+        HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -F "$INPUT_DATA" -H 'Content-Type: multipart/form-data' "$URL")
     else
         HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL")
     fi
@@ -318,7 +321,7 @@ function validate_megaservice() {
     # Curl the DocSum Mega Service
     validate_service \
         "${ip_address}:8890/v1/docsum" \
-        "toolkit" \
+        "embedding" \
         "docsum-xeon-backend-server" \
         "docsum-xeon-backend-server" \
         '{"messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}'