opea-project · lvliang-intel · Oct 22, 2024 · Oct 18, 2024 · Oct 18, 2024 · Oct 18, 2024
@@ -47,6 +47,7 @@ def generate_rag_prompt(question, documents):
 RERANK_SERVER_PORT = int(os.getenv("RERANK_SERVER_PORT", 80))
 LLM_SERVER_HOST_IP = os.getenv("LLM_SERVER_HOST_IP", "0.0.0.0")
 LLM_SERVER_PORT = int(os.getenv("LLM_SERVER_PORT", 80))
+LLM_MODEL = os.getenv("LLM_MODEL", "Intel/neural-chat-7b-v3-3")
 
 
 def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **kwargs):
@@ -61,7 +62,7 @@ def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **k
     elif self.services[cur_node].service_type == ServiceType.LLM:
         # convert TGI/vLLM to unified OpenAI /v1/chat/completions format
         next_inputs = {}
-        next_inputs["model"] = "tgi"  # specifically clarify the fake model to make the format unified
+        next_inputs["model"] = LLM_MODEL
         next_inputs["messages"] = [{"role": "user", "content": inputs["inputs"]}]
         next_inputs["max_tokens"] = llm_parameters_dict["max_tokens"]
         next_inputs["top_p"] = llm_parameters_dict["top_p"]

@@ -78,26 +78,27 @@ llama3.2:latest   a80c4f17acd5    2.0 GB    2 minutes ago
 Access ollama service to verify that the ollama is functioning correctly.
 
 ```bash
-curl http://${host_ip}:11434/api/generate -d '{"model": "llama3.2", "prompt":"What is Deep Learning?"}'
+curl http://${host_ip}:11434/v1/chat/completions \
+    -H "Content-Type: application/json" \
+    -d '{
+        "model": "llama3.2",
+        "messages": [
+            {
+                "role": "system",
+                "content": "You are a helpful assistant."
+            },
+            {
+                "role": "user",
+                "content": "Hello!"
+            }
+        ]
+    }'
 ```
 
 The outputs are similar to these:
 
 ```
-{"model":"llama3.2","created_at":"2024-10-12T12:55:28.098813868Z","response":"Deep","done":false}
-{"model":"llama3.2","created_at":"2024-10-12T12:55:28.124514468Z","response":" learning","done":false}
-{"model":"llama3.2","created_at":"2024-10-12T12:55:28.149754216Z","response":" is","done":false}
-{"model":"llama3.2","created_at":"2024-10-12T12:55:28.180420784Z","response":" a","done":false}
-{"model":"llama3.2","created_at":"2024-10-12T12:55:28.229185873Z","response":" subset","done":false}
-{"model":"llama3.2","created_at":"2024-10-12T12:55:28.263956118Z","response":" of","done":false}
-{"model":"llama3.2","created_at":"2024-10-12T12:55:28.289097354Z","response":" machine","done":false}
-{"model":"llama3.2","created_at":"2024-10-12T12:55:28.316838918Z","response":" learning","done":false}
-{"model":"llama3.2","created_at":"2024-10-12T12:55:28.342309506Z","response":" that","done":false}
-{"model":"llama3.2","created_at":"2024-10-12T12:55:28.367221264Z","response":" involves","done":false}
-{"model":"llama3.2","created_at":"2024-10-12T12:55:28.39205893Z","response":" the","done":false}
-{"model":"llama3.2","created_at":"2024-10-12T12:55:28.417933974Z","response":" use","done":false}
-{"model":"llama3.2","created_at":"2024-10-12T12:55:28.443110388Z","response":" of","done":false}
-...
+{"id":"chatcmpl-4","object":"chat.completion","created":1729232496,"model":"llama3.2","system_fingerprint":"fp_ollama","choices":[{"index":0,"message":{"role":"assistant","content":"How can I assist you today? Are you looking for information, answers to a question, or just need someone to chat with? I'm here to help in any way I can."},"finish_reason":"stop"}],"usage":{"prompt_tokens":33,"completion_tokens":38,"total_tokens":71}}
 ```
 
 ## 🚀 Build Docker Images
@@ -122,20 +123,14 @@ export https_proxy="Your_HTTPs_Proxy"
 docker build --no-cache -t opea/retriever-redis:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/redis/langchain/Dockerfile .
 ```
 
-### 2 Build LLM Image
-
-```bash
-docker build --no-cache -t opea/llm-ollama:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/ollama/langchain/Dockerfile .
-```
-
-### 3. Build Dataprep Image
+### 2. Build Dataprep Image
 
 ```bash
 docker build --no-cache -t opea/dataprep-redis:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/redis/langchain/Dockerfile .
 cd ..
 ```
 
-### 4. Build MegaService Docker Image
+### 3. Build MegaService Docker Image
 
 To construct the Mega Service, we utilize the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline within the `chatqna.py` Python script. Build MegaService Docker image via below command:
 
@@ -146,7 +141,7 @@ cd GenAIExamples/ChatQnA
 docker build --no-cache -t opea/chatqna:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy  -f Dockerfile .
 ```
 
-### 5. Build UI Docker Image
+### 4. Build UI Docker Image
 
 Build frontend Docker image via below command:
 
@@ -155,7 +150,7 @@ cd ~/OPEA/GenAIExamples/ChatQnA/ui
 docker build --no-cache -t opea/chatqna-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f ./docker/Dockerfile .
 ```
 
-### 6. Build Nginx Docker Image
+### 5. Build Nginx Docker Image
 
 ```bash
 cd GenAIComps
@@ -166,10 +161,9 @@ Then run the command `docker images`, you will have the following 6 Docker Image
 
 1. `opea/dataprep-redis:latest`
 2. `opea/retriever-redis:latest`
-3. `opea/llm-ollama:latest`
-4. `opea/chatqna:latest`
-5. `opea/chatqna-ui:latest`
-6. `opea/nginx:latest`
+3. `opea/chatqna:latest`
+4. `opea/chatqna-ui:latest`
+5. `opea/nginx:latest`
 
 ## 🚀 Start Microservices
 
@@ -195,10 +189,10 @@ For Linux users, please run `hostname -I | awk '{print $1}'`. For Windows users,
 export your_hf_api_token="Your_Huggingface_API_Token"
 ```
 
-**Append the value of the public IP address to the no_proxy list**
+**Append the value of the public IP address to the no_proxy list if you are in a proxy environment**
 
 ```
-export your_no_proxy=${your_no_proxy},"External_Public_IP"
+export your_no_proxy=${your_no_proxy},"External_Public_IP",chatqna-aipc-backend-server,tei-embedding-service,retriever,tei-reranking-service,redis-vector-db,dataprep-redis-service
 ```
 
 - Linux PC
@@ -211,7 +205,7 @@ export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
 export RERANK_MODEL_ID="BAAI/bge-reranker-base"
 export INDEX_NAME="rag-redis"
 export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
-export OLLAMA_ENDPOINT=http://${host_ip}:11434
+export OLLAMA_HOST=${host_ip}
 export OLLAMA_MODEL="llama3.2"
 ```
 
@@ -222,7 +216,7 @@ set EMBEDDING_MODEL_ID=BAAI/bge-base-en-v1.5
 set RERANK_MODEL_ID=BAAI/bge-reranker-base
 set INDEX_NAME=rag-redis
 set HUGGINGFACEHUB_API_TOKEN=%your_hf_api_token%
-set OLLAMA_ENDPOINT=http://host.docker.internal:11434
+set OLLAMA_HOST=host.docker.internal
 set OLLAMA_MODEL="llama3.2"
 ```
 
@@ -277,24 +271,15 @@ For details on how to verify the correctness of the response, refer to [how-to-v
    curl http://${host_ip}:11434/api/generate -d '{"model": "llama3.2", "prompt":"What is Deep Learning?"}'
    ```
 
-5. LLM Microservice
-
-   ```bash
-   curl http://${host_ip}:9000/v1/chat/completions\
-     -X POST \
-     -d '{"query":"What is Deep Learning?","max_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \
-     -H 'Content-Type: application/json'
-   ```
-
-6. MegaService
+5. MegaService
 
    ```bash
    curl http://${host_ip}:8888/v1/chatqna -H "Content-Type: application/json" -d '{
         "messages": "What is the revenue of Nike in 2023?"
         }'
    ```
 
-7. Upload RAG Files through Dataprep Microservice (Optional)
+6. Upload RAG Files through Dataprep Microservice (Optional)
 
    To chat with retrieved information, you need to upload a file using Dataprep service.
 
@@ -334,4 +319,4 @@ the output is:
 
 ## 🚀 Launch the UI
 
-To access the frontend, open the following URL in your browser: http://{host_ip}:5173.
+To access the frontend, open the following URL in your browser: http://{host_ip}:80.
@@ -72,22 +72,7 @@ services:
       HF_HUB_DISABLE_PROGRESS_BARS: 1
       HF_HUB_ENABLE_HF_TRANSFER: 0
     command: --model-id ${RERANK_MODEL_ID} --auto-truncate
-  llm:
-    image: ${REGISTRY:-opea}/llm-ollama
-    container_name: llm-ollama
-    ports:
-      - "9000:9000"
-    ipc: host
-    environment:
-      no_proxy: ${no_proxy}
-      http_proxy: ${http_proxy}
-      https_proxy: ${https_proxy}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
-      HF_HUB_DISABLE_PROGRESS_BARS: 1
-      HF_HUB_ENABLE_HF_TRANSFER: 0
-      OLLAMA_ENDPOINT: ${OLLAMA_ENDPOINT}
-      OLLAMA_MODEL: ${OLLAMA_MODEL}
-  chaqna-aipc-backend-server:
+  chatqna-aipc-backend-server:
     image: ${REGISTRY:-opea}/chatqna:${TAG:-latest}
     container_name: chatqna-aipc-backend-server
     depends_on:
@@ -96,29 +81,29 @@ services:
       - tei-embedding-service
       - retriever
       - tei-reranking-service
-      - llm
     ports:
       - "8888:8888"
     environment:
       - no_proxy=${no_proxy}
       - https_proxy=${https_proxy}
       - http_proxy=${http_proxy}
-      - MEGA_SERVICE_HOST_IP=chaqna-aipc-backend-server
+      - MEGA_SERVICE_HOST_IP=chatqna-aipc-backend-server
       - EMBEDDING_SERVER_HOST_IP=tei-embedding-service
       - EMBEDDING_SERVER_PORT=80
       - RETRIEVER_SERVICE_HOST_IP=retriever
       - RERANK_SERVER_HOST_IP=tei-reranking-service
       - RERANK_SERVER_PORT=80
-      - LLM_SERVER_HOST_IP=llm
-      - LLM_SERVER_PORT=9000
+      - LLM_SERVER_HOST_IP=${OLLAMA_HOST}
+      - LLM_SERVER_PORT=11434
+      - LLM_MODEL=${OLLAMA_MODEL}
       - LOGFLAG=${LOGFLAG}
     ipc: host
     restart: always
-  chaqna-aipc-ui-server:
+  chatqna-aipc-ui-server:
     image: ${REGISTRY:-opea}/chatqna-ui:${TAG:-latest}
     container_name: chatqna-aipc-ui-server
     depends_on:
-      - chaqna-aipc-backend-server
+      - chatqna-aipc-backend-server
     ports:
       - "5173:5173"
     environment:
@@ -127,22 +112,22 @@ services:
       - http_proxy=${http_proxy}
     ipc: host
     restart: always
-  chaqna-aipc-nginx-server:
+  chatqna-aipc-nginx-server:
     image: ${REGISTRY:-opea}/nginx:${TAG:-latest}
-    container_name: chaqna-aipc-nginx-server
+    container_name: chatqna-aipc-nginx-server
     depends_on:
-      - chaqna-aipc-backend-server
-      - chaqna-aipc-ui-server
+      - chatqna-aipc-backend-server
+      - chatqna-aipc-ui-server
     ports:
       - "${NGINX_PORT:-80}:80"
     environment:
       - no_proxy=${no_proxy}
       - https_proxy=${https_proxy}
       - http_proxy=${http_proxy}
-      - FRONTEND_SERVICE_IP=chatqna-xeon-ui-server
+      - FRONTEND_SERVICE_IP=chatqna-aipc-ui-server
       - FRONTEND_SERVICE_PORT=5173
       - BACKEND_SERVICE_NAME=chatqna
-      - BACKEND_SERVICE_IP=chatqna-xeon-backend-server
+      - BACKEND_SERVICE_IP=chatqna-aipc-backend-server
       - BACKEND_SERVICE_PORT=8888
       - DATAPREP_SERVICE_IP=dataprep-redis-service
       - DATAPREP_SERVICE_PORT=6007

@@ -16,5 +16,5 @@ export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
 export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
 export RERANK_MODEL_ID="BAAI/bge-reranker-base"
 export INDEX_NAME="rag-redis"
-export OLLAMA_ENDPOINT=http://${host_ip}:11434
+export OLLAMA_HOST=${host_ip}
 export OLLAMA_MODEL="llama3.2"
@@ -17,8 +17,6 @@ To set up environment variables for deploying ChatQnA services, follow these ste
    ```bash
    # Example: host_ip="192.168.1.1"
    export host_ip="External_Public_IP"
-   # Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
-   export no_proxy="Your_No_Proxy"
    export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token"
    ```
 
@@ -27,6 +25,9 @@ To set up environment variables for deploying ChatQnA services, follow these ste
    ```bash
    export http_proxy="Your_HTTP_Proxy"
    export https_proxy="Your_HTTPs_Proxy"
+   # Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
+   # Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
+   export no_proxy="Your_No_Proxy",chatqna-xeon-ui-server,chatqna-xeon-backend-server,dataprep-redis-service,tei-embedding-service,retriever,tei-reranking-service,tgi-service,vllm_service
    ```
 
 3. Set up other environment variables:
@@ -213,8 +214,6 @@ For users in China who are unable to download models directly from Huggingface,
    ```bash
    # Example: host_ip="192.168.1.1"
    export host_ip="External_Public_IP"
-   # Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
-   export no_proxy="Your_No_Proxy"
    export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token"
    # Example: NGINX_PORT=80
    export NGINX_PORT=${your_nginx_port}
@@ -225,6 +224,8 @@ For users in China who are unable to download models directly from Huggingface,
    ```bash
    export http_proxy="Your_HTTP_Proxy"
    export https_proxy="Your_HTTPs_Proxy"
+   # Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
+   export no_proxy="Your_No_Proxy",chatqna-xeon-ui-server,chatqna-xeon-backend-server,dataprep-redis-service,tei-embedding-service,retriever,tei-reranking-service,tgi-service,vllm_service
    ```
 
 3. Set up other environment variables:

@@ -167,10 +167,10 @@ export host_ip="External_Public_IP"
 export your_hf_api_token="Your_Huggingface_API_Token"
 ```
 
-**Append the value of the public IP address to the no_proxy list**
+**Append the value of the public IP address to the no_proxy list if you are in a proxy environment**
 
 ```
-export your_no_proxy=${your_no_proxy},"External_Public_IP"
+export your_no_proxy=${your_no_proxy},"External_Public_IP",chatqna-xeon-ui-server,chatqna-xeon-backend-server,dataprep-qdrant-service,tei-embedding-service,retriever,tei-reranking-service,tgi-service
 ```
 
 ```bash

@@ -112,6 +112,7 @@ services:
       - RERANK_SERVER_PORT=${RERANK_SERVER_PORT:-80}
       - LLM_SERVER_HOST_IP=tgi-service
       - LLM_SERVER_PORT=${LLM_SERVER_PORT:-80}
+      - LLM_MODEL=${LLM_MODEL_ID}
       - LOGFLAG=${LOGFLAG}
     ipc: host
     restart: always

@@ -111,6 +111,7 @@ services:
       - RERANK_SERVER_PORT=${RERANK_SERVER_PORT:-80}
       - LLM_SERVER_HOST_IP=tgi-service
       - LLM_SERVER_PORT=${LLM_SERVER_PORT:-80}
+      - LLM_MODEL=${LLM_MODEL_ID}
       - LOGFLAG=${LOGFLAG}
     ipc: host
     restart: always

@@ -110,6 +110,7 @@ services:
       - RERANK_SERVER_PORT=${RERANK_SERVER_PORT:-80}
       - LLM_SERVER_HOST_IP=vllm_service
       - LLM_SERVER_PORT=${LLM_SERVER_PORT:-80}
+      - LLM_MODEL=${LLM_MODEL_ID}
       - LOGFLAG=${LOGFLAG}
     ipc: host
     restart: always

@@ -93,6 +93,7 @@ services:
       - RETRIEVER_SERVICE_HOST_IP=retriever
       - LLM_SERVER_HOST_IP=tgi-service
       - LLM_SERVER_PORT=${LLM_SERVER_PORT:-80}
+      - LLM_MODEL=${LLM_MODEL_ID}
       - LOGFLAG=${LOGFLAG}
     ipc: host
     restart: always

@@ -17,8 +17,6 @@ To set up environment variables for deploying ChatQnA services, follow these ste
    ```bash
    # Example: host_ip="192.168.1.1"
    export host_ip="External_Public_IP"
-   # Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
-   export no_proxy="Your_No_Proxy"
    export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token"
    ```
 
@@ -27,6 +25,8 @@ To set up environment variables for deploying ChatQnA services, follow these ste
    ```bash
    export http_proxy="Your_HTTP_Proxy"
    export https_proxy="Your_HTTPs_Proxy"
+   # Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
+   export no_proxy="Your_No_Proxy",chatqna-gaudi-ui-server,chatqna-gaudi-backend-server,dataprep-redis-service,tei-embedding-service,retriever,tei-reranking-service,tgi-service,vllm_service,vllm-ray-service,guardrails
    ```
 
 3. Set up other environment variables:
@@ -211,8 +211,6 @@ For users in China who are unable to download models directly from Huggingface,
    ```bash
    # Example: host_ip="192.168.1.1"
    export host_ip="External_Public_IP"
-   # Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
-   export no_proxy="Your_No_Proxy"
    export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token"
    # Example: NGINX_PORT=80
    export NGINX_PORT=${your_nginx_port}
@@ -223,6 +221,8 @@ For users in China who are unable to download models directly from Huggingface,
    ```bash
    export http_proxy="Your_HTTP_Proxy"
    export https_proxy="Your_HTTPs_Proxy"
+   # Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
+   export no_proxy="Your_No_Proxy",chatqna-gaudi-ui-server,chatqna-gaudi-backend-server,dataprep-redis-service,tei-embedding-service,retriever,tei-reranking-service,tgi-service,vllm_service,vllm-ray-service,guardrails
    ```
 
 3. Set up other environment variables: