opea-project
diff --git a/‎FaqGen/docker/gaudi/README.md
Lines changed: 2 additions & 2 deletions b/‎FaqGen/docker/gaudi/README.md
Lines changed: 2 additions & 2 deletions
diff --git a/‎FaqGen/docker/gaudi/compose.yaml
Lines changed: 4 additions & 2 deletions b/‎FaqGen/docker/gaudi/compose.yaml
Lines changed: 4 additions & 2 deletions
diff --git a/‎FaqGen/kubernetes/manifests/README.md
Lines changed: 12 additions & 1 deletion b/‎FaqGen/kubernetes/manifests/README.md
Lines changed: 12 additions & 1 deletion
@@ -16,7 +16,7 @@ cd GenAIComps
 As TGI Gaudi has been officially published as a Docker image, we simply need to pull it:
 
 ```bash
-docker pull ghcr.io/huggingface/tgi-gaudi:1.2.1
+docker pull ghcr.io/huggingface/tgi-gaudi:2.0.1
 ```
 
 ### 2. Build LLM Image
@@ -56,7 +56,7 @@ docker build -t opea/faqgen-react-ui:latest --build-arg https_proxy=$https_proxy
 
 Then run the command `docker images`, you will have the following Docker Images:
 
-1. `ghcr.io/huggingface/tgi-gaudi:1.2.1`
+1. `ghcr.io/huggingface/tgi-gaudi:2.0.1`
 2. `opea/llm-faqgen-tgi:latest`
 3. `opea/faqgen:latest`
 4. `opea/faqgen-ui:latest`
 
@@ -17,12 +17,14 @@ services:
       https_proxy: ${https_proxy}
       HABANA_VISIBLE_DEVICES: all
       OMPI_MCA_btl_vader_single_copy_mechanism: none
-      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      PREFILL_BATCH_BUCKET_SIZE: 1
+      BATCH_BUCKET_SIZE: 8
     runtime: habana
     cap_add:
       - SYS_NICE
     ipc: host
-    command: --model-id ${LLM_MODEL_ID} --max-input-length 1024 --max-total-tokens 2048
+    command: --model-id ${LLM_MODEL_ID} --max-input-length 2048 --max-total-tokens 4096 --max-batch-total-tokens 65536 --max-batch-prefill-tokens 4096
   llm_faqgen:
     image: opea/llm-faqgen-tgi:latest
     container_name: llm-faqgen-server
 
@@ -23,13 +23,24 @@ sed -i "s/insert-your-huggingface-token-here/${HUGGINGFACEHUB_API_TOKEN}/g" faqg
 kubectl apply -f faqgen.yaml
 ```
 
+## Deploy UI
+
+```
+cd GenAIExamples/FaqGen/kubernetes/manifests/
+kubectl get svc # get ip address
+ip_address="" # according to your svc address
+sed -i "s/insert_your_ip_here/${ip_address}/g" ui.yaml
+kubectl apply -f ui.yaml
+```
+
 ## Verify Services
 
 Make sure all the pods are running, and restart the faqgen-xxxx pod if necessary.
 
 ```
 kubectl get pods
-curl http://${host_ip}:8888/v1/faqgen -H "Content-Type: application/json" -d '{
+port=7779 # 7779 for gaudi, 7778 for xeon
+curl http://${host_ip}:7779/v1/faqgen -H "Content-Type: application/json" -d '{
      "messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."
      }'
 ```