Skip to content

Commit 8102235

Browse files
authored
Enable OpenTelemetry Tracing for ChatQnA TGI serving on Gaudi (#1316)
Signed-off-by: Tsai, Louie <louie.tsai@intel.com>
1 parent ddacb7e commit 8102235

File tree

4 files changed

+47
-3
lines changed

4 files changed

+47
-3
lines changed

ChatQnA/README.md

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -344,3 +344,22 @@ OPEA microservice deployment can easily be monitored through Grafana dashboards
344344

345345
![chatqna dashboards](./assets/img/chatqna_dashboards.png)
346346
![tgi dashboard](./assets/img/tgi_dashboard.png)
347+
348+
## Tracing Services with OpenTelemetry Tracing and Jaeger
349+
350+
> NOTE: limited support. Only LLM inference serving with TGI on Gaudi is enabled for this feature.
351+
352+
OPEA microservice and TGI/TEI serving can easily be traced through Jaeger dashboards in conjunction with OpenTelemetry Tracing feature. Follow the [README](https://github.com/opea-project/GenAIComps/tree/main/comps/cores/telemetry#tracing) to trace additional functions if needed.
353+
354+
Tracing data is exported to http://{EXTERNAL_IP}:4318/v1/traces via Jaeger.
355+
Users could also get the external IP via below command.
356+
357+
```bash
358+
ip route get 8.8.8.8 | grep -oP 'src \K[^ ]+'
359+
```
360+
361+
For TGI serving on Gaudi, users could see different services like opea, TEI and TGI.
362+
![Screenshot from 2024-12-27 11-58-18](https://github.com/user-attachments/assets/6126fa70-e830-4780-bd3f-83cb6eff064e)
363+
364+
Here is a screenshot for one tracing of TGI serving request.
365+
![Screenshot from 2024-12-27 11-26-25](https://github.com/user-attachments/assets/3a7c51c6-f422-41eb-8e82-c3df52cd48b8)

ChatQnA/docker_compose/intel/hpu/gaudi/compose.yaml

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ services:
2525
INDEX_NAME: ${INDEX_NAME}
2626
TEI_ENDPOINT: http://tei-embedding-service:80
2727
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
28+
TELEMETRY_ENDPOINT: ${TELEMETRY_ENDPOINT}
2829
tei-embedding-service:
2930
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
3031
container_name: tei-embedding-gaudi-server
@@ -37,7 +38,7 @@ services:
3738
no_proxy: ${no_proxy}
3839
http_proxy: ${http_proxy}
3940
https_proxy: ${https_proxy}
40-
command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate
41+
command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate --otlp-endpoint $OTEL_EXPORTER_OTLP_TRACES_ENDPOINT
4142
retriever:
4243
image: ${REGISTRY:-opea}/retriever-redis:${TAG:-latest}
4344
container_name: retriever-redis-server
@@ -55,6 +56,7 @@ services:
5556
INDEX_NAME: ${INDEX_NAME}
5657
TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80
5758
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
59+
TELEMETRY_ENDPOINT: ${TELEMETRY_ENDPOINT}
5860
restart: unless-stopped
5961
tei-reranking-service:
6062
image: ghcr.io/huggingface/tei-gaudi:1.5.0
@@ -76,7 +78,7 @@ services:
7678
HABANA_VISIBLE_DEVICES: all
7779
OMPI_MCA_btl_vader_single_copy_mechanism: none
7880
MAX_WARMUP_SEQUENCE_LENGTH: 512
79-
command: --model-id ${RERANK_MODEL_ID} --auto-truncate
81+
command: --model-id ${RERANK_MODEL_ID} --auto-truncate --otlp-endpoint $OTEL_EXPORTER_OTLP_TRACES_ENDPOINT
8082
tgi-service:
8183
image: ghcr.io/huggingface/tgi-gaudi:2.0.6
8284
container_name: tgi-gaudi-server
@@ -101,7 +103,22 @@ services:
101103
cap_add:
102104
- SYS_NICE
103105
ipc: host
104-
command: --model-id ${LLM_MODEL_ID} --max-input-length 2048 --max-total-tokens 4096
106+
command: --model-id ${LLM_MODEL_ID} --max-input-length 2048 --max-total-tokens 4096 --otlp-endpoint $OTEL_EXPORTER_OTLP_TRACES_ENDPOINT
107+
jaeger:
108+
image: jaegertracing/all-in-one:latest
109+
container_name: jaeger
110+
ports:
111+
- "16686:16686"
112+
- "4317:4317"
113+
- "4318:4318"
114+
- "9411:9411"
115+
ipc: host
116+
environment:
117+
no_proxy: ${no_proxy}
118+
http_proxy: ${http_proxy}
119+
https_proxy: ${https_proxy}
120+
COLLECTOR_ZIPKIN_HOST_PORT: 9411
121+
restart: unless-stopped
105122
chatqna-gaudi-backend-server:
106123
image: ${REGISTRY:-opea}/chatqna:${TAG:-latest}
107124
container_name: chatqna-gaudi-backend-server
@@ -127,6 +144,7 @@ services:
127144
- LLM_SERVER_PORT=${LLM_SERVER_PORT:-80}
128145
- LLM_MODEL=${LLM_MODEL_ID}
129146
- LOGFLAG=${LOGFLAG}
147+
- TELEMETRY_ENDPOINT=${TELEMETRY_ENDPOINT}
130148
ipc: host
131149
restart: always
132150
chatqna-gaudi-ui-server:

ChatQnA/docker_compose/intel/hpu/gaudi/set_env.sh

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,3 +14,7 @@ export INDEX_NAME="rag-redis"
1414
# Set it as a non-null string, such as true, if you want to enable logging facility,
1515
# otherwise, keep it as "" to disable it.
1616
export LOGFLAG=""
17+
# Set OpenTelemetry Tracing Endpoint
18+
export JAEGER_IP=$(ip route get 8.8.8.8 | grep -oP 'src \K[^ ]+')
19+
export OTEL_EXPORTER_OTLP_TRACES_ENDPOINT=grpc://$JAEGER_IP:4317
20+
export TELEMETRY_ENDPOINT=http://$JAEGER_IP:4318/v1/traces

ChatQnA/tests/test_compose_on_gaudi.sh

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,9 @@ function start_services() {
3636
export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
3737
export INDEX_NAME="rag-redis"
3838
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
39+
export JAEGER_IP=$(ip route get 8.8.8.8 | grep -oP 'src \K[^ ]+')
40+
export OTEL_EXPORTER_OTLP_TRACES_ENDPOINT=grpc://$JAEGER_IP:4317
41+
export TELEMETRY_ENDPOINT=http://$JAEGER_IP:4318/v1/traces
3942

4043
# Start Docker Containers
4144
sed -i "s|container_name: chatqna-gaudi-backend-server|container_name: chatqna-gaudi-backend-server\n volumes:\n - \"${WORKPATH}\/docker_image_build\/GenAIComps:\/home\/user\/GenAIComps\"|g" compose.yaml

0 commit comments

Comments
 (0)