opea-project · XinyaoWa · Nov 14, 2024 · Nov 12, 2024 · Nov 12, 2024 · Nov 13, 2024
@@ -432,57 +432,66 @@ curl -X POST "http://${host_ip}:6007/v1/dataprep/delete_file" \
      -H "Content-Type: application/json"
 ```
 
-
 ### Profile Microservices
 
-To further analyze MicroService Performance, users could follow the instructions to profile MicroServices. 
+To further analyze MicroService Performance, users could follow the instructions to profile MicroServices.
 
 #### 1. vLLM backend Service
-   Users could follow previous section to testing vLLM microservice or ChatQnA MegaService.  
-   By default, vLLM profiling is not enabled. Users could start and stop profiling by following commands.  
 
-   ##### Start vLLM profiling
+Users could follow previous section to testing vLLM microservice or ChatQnA MegaService.  
+ By default, vLLM profiling is not enabled. Users could start and stop profiling by following commands.
 
-   ```bash
-   curl http://${host_ip}:9009/start_profile \
-     -H "Content-Type: application/json" \
-     -d '{"model": "Intel/neural-chat-7b-v3-3"}'
-   ```
-   Users would see below docker logs from vllm-service if profiling is started correctly.
-   ```bash
-   INFO api_server.py:361] Starting profiler...
-   INFO api_server.py:363] Profiler started.
-   INFO:     x.x.x.x:35940 - "POST /start_profile HTTP/1.1" 200 OK
-   ```
-   After vLLM profiling is started, users could start asking questions and get responses from vLLM MicroService  
-   or ChatQnA MicroService.  
-
-   ##### Stop vLLM profiling
-   By following command, users could stop vLLM profliing and generate a *.pt.trace.json.gz file as profiling result  
-   under /mnt folder in vllm-service docker instance.  
-   ```bash
-   # vLLM Service
-   curl http://${host_ip}:9009/stop_profile \
-     -H "Content-Type: application/json" \
-     -d '{"model": "Intel/neural-chat-7b-v3-3"}'
-   ```
-   Users would see below docker logs from vllm-service if profiling is stopped correctly.  
-   ```bash
-   INFO api_server.py:368] Stopping profiler...
-   INFO api_server.py:370] Profiler stopped.
-   INFO:     x.x.x.x:41614 - "POST /stop_profile HTTP/1.1" 200 OK
-   ```
-   After vllm profiling is stopped, users could use below command to get the *.pt.trace.json.gz file under /mnt folder.  
-   ```bash
-   docker cp  vllm-service:/mnt/ .
-   ```
+##### Start vLLM profiling
+
+```bash
+curl http://${host_ip}:9009/start_profile \
+  -H "Content-Type: application/json" \
+  -d '{"model": "Intel/neural-chat-7b-v3-3"}'
+```
+
+Users would see below docker logs from vllm-service if profiling is started correctly.
+
+```bash
+INFO api_server.py:361] Starting profiler...
+INFO api_server.py:363] Profiler started.
+INFO:     x.x.x.x:35940 - "POST /start_profile HTTP/1.1" 200 OK
+```
+
+After vLLM profiling is started, users could start asking questions and get responses from vLLM MicroService  
+ or ChatQnA MicroService.
+
+##### Stop vLLM profiling
+
+By following command, users could stop vLLM profliing and generate a \*.pt.trace.json.gz file as profiling result  
+ under /mnt folder in vllm-service docker instance.
+
+```bash
+# vLLM Service
+curl http://${host_ip}:9009/stop_profile \
+  -H "Content-Type: application/json" \
+  -d '{"model": "Intel/neural-chat-7b-v3-3"}'
+```
+
+Users would see below docker logs from vllm-service if profiling is stopped correctly.
+
+```bash
+INFO api_server.py:368] Stopping profiler...
+INFO api_server.py:370] Profiler stopped.
+INFO:     x.x.x.x:41614 - "POST /stop_profile HTTP/1.1" 200 OK
+```
+
+After vllm profiling is stopped, users could use below command to get the \*.pt.trace.json.gz file under /mnt folder.
+
+```bash
+docker cp  vllm-service:/mnt/ .
+```
+
+##### Check profiling result
 
-   ##### Check profiling result
-   Open a web browser and type "chrome://tracing" or "ui.perfetto.dev", and then load the json.gz file, you should be able  
-   to see the vLLM profiling result as below diagram. 
+Open a web browser and type "chrome://tracing" or "ui.perfetto.dev", and then load the json.gz file, you should be able  
+ to see the vLLM profiling result as below diagram.
 ![image](https://github.com/user-attachments/assets/55c7097e-5574-41dc-97a7-5e87c31bc286)
 
-
 ## 🚀 Launch the UI
 
 ### Launch with origin port

@@ -79,6 +79,7 @@ export TGI_LLM_ENDPOINT="http://${your_ip}:8008"
 export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
 export MEGA_SERVICE_HOST_IP=${host_ip}
 export LLM_SERVICE_HOST_IP=${host_ip}
+export LLM_SERVICE_PORT=9000
 export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/faqgen"
 ```
 

@@ -46,6 +46,7 @@ services:
       - http_proxy=${http_proxy}
       - MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
       - LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP}
+      - LLM_SERVICE_PORT=${LLM_SERVICE_PORT}
     ipc: host
     restart: always
   faqgen-xeon-ui-server:
@@ -59,7 +60,7 @@ services:
       - no_proxy=${no_proxy}
       - https_proxy=${https_proxy}
       - http_proxy=${http_proxy}
-      - DOC_BASE_URL=${BACKEND_SERVICE_ENDPOINT}
+      - FAQ_BASE_URL=${BACKEND_SERVICE_ENDPOINT}
     ipc: host
     restart: always
 networks:

@@ -80,6 +80,7 @@ export TGI_LLM_ENDPOINT="http://${your_ip}:8008"
 export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
 export MEGA_SERVICE_HOST_IP=${host_ip}
 export LLM_SERVICE_HOST_IP=${host_ip}
+export LLM_SERVICE_PORT=9000
 export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/faqgen"
 ```
 

@@ -56,6 +56,7 @@ services:
       - http_proxy=${http_proxy}
       - MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
       - LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP}
+      - LLM_SERVICE_PORT=${LLM_SERVICE_PORT}
     ipc: host
     restart: always
   faqgen-gaudi-ui-server:
@@ -69,7 +70,7 @@ services:
       - no_proxy=${no_proxy}
       - https_proxy=${https_proxy}
       - http_proxy=${http_proxy}
-      - DOC_BASE_URL=${BACKEND_SERVICE_ENDPOINT}
+      - FAQ_BASE_URL=${BACKEND_SERVICE_ENDPOINT}
     ipc: host
     restart: always
 

@@ -264,7 +264,7 @@ spec:
       containers:
         - name: faqgen-react-ui
           env:
-            - name: DOC_BASE_URL
+            - name: FAQ_BASE_URL
               value: "http://faqgen:8888/v1/faqgen"
             - name: http_proxy
               value:

@@ -22,7 +22,7 @@ spec:
       containers:
         - name: faq-mega-ui-deploy
           env:
-            - name: DOC_BASE_URL
+            - name: FAQ_BASE_URL
               value: http://{insert_your_ip_here}:7779/v1/faqgen
           image: opea/faqgen-ui:latest
           imagePullPolicy: IfNotPresent

@@ -22,7 +22,7 @@ spec:
       containers:
         - name: faq-mega-ui-deploy
           env:
-            - name: DOC_BASE_URL
+            - name: FAQ_BASE_URL
               value: http://{insert_your_ip_here}:7779/v1/faqgen
           image: opea/faqgen-ui:latest
           imagePullPolicy: IfNotPresent

@@ -34,6 +34,7 @@ function start_services() {
     export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
     export MEGA_SERVICE_HOST_IP=${ip_address}
     export LLM_SERVICE_HOST_IP=${ip_address}
+    export LLM_SERVICE_PORT=9000
     export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:8888/v1/faqgen"
 
     sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env

@@ -34,6 +34,7 @@ function start_services() {
     export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
     export MEGA_SERVICE_HOST_IP=${ip_address}
     export LLM_SERVICE_HOST_IP=${ip_address}
+    export LLM_SERVICE_PORT=9000
     export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:8888/v1/faqgen"
 
     sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env