Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions ProductivitySuite/docker_compose/intel/cpu/xeon/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,9 @@ export LLM_SERVICE_HOST_PORT_FAQGEN=9002
export LLM_SERVICE_HOST_PORT_CODEGEN=9001
export LLM_SERVICE_HOST_PORT_DOCSUM=9003
export PROMPT_COLLECTION_NAME="prompt"
export RERANK_SERVER_PORT=8808
export EMBEDDING_SERVER_PORT=6006
export LLM_SERVER_PORT=9009
```

Note: Please replace with `host_ip` with you external IP address, do not use localhost.
Expand Down
26 changes: 18 additions & 8 deletions ProductivitySuite/docker_compose/intel/cpu/xeon/compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,10 @@ services:
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
REDIS_URL: ${REDIS_URL}
REDIS_HOST: redis-vector-db
INDEX_NAME: ${INDEX_NAME}
TEI_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
tei-embedding-service:
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
container_name: tei-embedding-server
Expand Down Expand Up @@ -70,6 +73,7 @@ services:
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
REDIS_URL: ${REDIS_URL}
REDIS_HOST: redis-vector-db
INDEX_NAME: ${INDEX_NAME}
TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
Expand Down Expand Up @@ -111,7 +115,7 @@ services:
LANGCHAIN_PROJECT: "opea-reranking-service"
restart: unless-stopped
tgi_service:
image: ghcr.io/huggingface/text-generation-inference:2.1.0
image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
container_name: tgi-service
ports:
- "9009:80"
Expand All @@ -125,7 +129,7 @@ services:
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
command: --model-id ${LLM_MODEL_ID}
command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0
llm:
image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest}
container_name: llm-tgi-server
Expand All @@ -152,11 +156,12 @@ services:
depends_on:
- redis-vector-db
- tei-embedding-service
- embedding
- dataprep-redis-service
- retriever
- tei-reranking-service
- reranking
- tgi_service
- embedding
- reranking
- llm
ports:
- "8888:8888"
Expand All @@ -165,14 +170,19 @@ services:
https_proxy: ${https_proxy}
http_proxy: ${http_proxy}
MEGA_SERVICE_HOST_IP: ${MEGA_SERVICE_HOST_IP}
EMBEDDING_SERVICE_HOST_IP: ${EMBEDDING_SERVICE_HOST_IP}
EMBEDDING_SERVER_HOST_IP: ${EMBEDDING_SERVICE_HOST_IP}
EMBEDDING_SERVER_PORT: ${EMBEDDING_SERVER_PORT:-80}
RETRIEVER_SERVICE_HOST_IP: ${RETRIEVER_SERVICE_HOST_IP}
RERANK_SERVICE_HOST_IP: ${RERANK_SERVICE_HOST_IP}
LLM_SERVICE_HOST_IP: ${LLM_SERVICE_HOST_IP_CHATQNA}
RERANK_SERVER_HOST_IP: ${RERANK_SERVICE_HOST_IP}
RERANK_SERVER_PORT: ${RERANK_SERVER_PORT:-80}
LLM_SERVER_HOST_IP: ${LLM_SERVICE_HOST_IP_CHATQNA}
LLM_SERVER_PORT: ${LLM_SERVER_PORT:-80}
LLM_MODEL: ${LLM_MODEL_ID}
LOGFLAG: ${LOGFLAG}
ipc: host
restart: always
tgi_service_codegen:
image: ghcr.io/huggingface/text-generation-inference:2.1.0
image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
container_name: tgi_service_codegen
ports:
- "8028:80"
Expand Down
10 changes: 8 additions & 2 deletions ProductivitySuite/tests/test_compose_on_xeon.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ function build_docker_images() {
docker compose -f build.yaml build --no-cache > ${LOG_PATH}/docker_image_build.log

docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
docker pull ghcr.io/huggingface/text-generation-inference:2.1.0
docker pull ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
docker images && sleep 1s
}

Expand Down Expand Up @@ -74,6 +74,9 @@ function start_services() {
export LLM_SERVICE_HOST_PORT_FAQGEN=9002
export LLM_SERVICE_HOST_PORT_CODEGEN=9001
export LLM_SERVICE_HOST_PORT_DOCSUM=9003
export RERANK_SERVER_PORT=8808
export EMBEDDING_SERVER_PORT=6006
export LLM_SERVER_PORT=9009
export PROMPT_COLLECTION_NAME="prompt"

# Start Docker Containers
Expand Down Expand Up @@ -116,6 +119,9 @@ function validate_service() {
HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -H 'Content-Type: application/json' "$URL")
elif [[ $SERVICE_NAME == *"dataprep_del"* ]]; then
HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -d '{"file_path": "all"}' -H 'Content-Type: application/json' "$URL")
elif [[ $SERVICE_NAME == *"docsum-xeon-backend-server"* ]]; then
local INPUT_DATA="messages=Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."
HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -F "$INPUT_DATA" -H 'Content-Type: multipart/form-data' "$URL")
else
HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL")
fi
Expand Down Expand Up @@ -315,7 +321,7 @@ function validate_megaservice() {
# Curl the DocSum Mega Service
validate_service \
"${ip_address}:8890/v1/docsum" \
"toolkit" \
"embedding" \
"docsum-xeon-backend-server" \
"docsum-xeon-backend-server" \
'{"messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}'
Expand Down
Loading