@@ -173,9 +173,9 @@ export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
173
173
export LLM_MODEL_ID_NAME=" neural-chat-7b-v3-3"
174
174
export TEI_EMBEDDING_ENDPOINT=" http://${host_ip} :8090"
175
175
export TEI_RERANKING_ENDPOINT=" http://${host_ip} :8808"
176
- export TGI_LLM_ENDPOINT=" http://${host_ip} :8008 "
177
- export vLLM_LLM_ENDPOINT=" http://${host_ip} :8008 "
178
- export vLLM_RAY_LLM_ENDPOINT=" http://${host_ip} :8008 "
176
+ export TGI_LLM_ENDPOINT=" http://${host_ip} :8005 "
177
+ export vLLM_LLM_ENDPOINT=" http://${host_ip} :8007 "
178
+ export vLLM_RAY_LLM_ENDPOINT=" http://${host_ip} :8006 "
179
179
export LLM_SERVICE_PORT=9000
180
180
export REDIS_URL=" redis://${host_ip} :6379"
181
181
export INDEX_NAME=" rag-redis"
@@ -296,15 +296,15 @@ curl http://${host_ip}:8000/v1/reranking \
296
296
297
297
``` bash
298
298
# TGI Service
299
- curl http://${host_ip} :8008 /generate \
299
+ curl http://${host_ip} :8005 /generate \
300
300
-X POST \
301
301
-d ' {"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":64, "do_sample": true}}' \
302
302
-H ' Content-Type: application/json'
303
303
```
304
304
305
305
``` bash
306
306
# vLLM Service
307
- curl http://${host_ip} :8008 /v1/completions \
307
+ curl http://${host_ip} :8007 /v1/completions \
308
308
-H " Content-Type: application/json" \
309
309
-d ' {
310
310
"model": "${LLM_MODEL_ID}",
@@ -316,7 +316,7 @@ curl http://${host_ip}:8008/v1/completions \
316
316
317
317
``` bash
318
318
# vLLM-on-Ray Service
319
- curl http://${host_ip} :8008 /v1/chat/completions \
319
+ curl http://${host_ip} :8006 /v1/chat/completions \
320
320
-H " Content-Type: application/json" \
321
321
-d ' {"model": "${LLM_MODEL_ID}", "messages": [{"role": "user", "content": "What is Deep Learning?"}]}'
322
322
```
0 commit comments