Skip to content

Commit ba78b4c

Browse files
authored
update manifests for v0.9 (#632)
* update model HF TOKEN variables & reranking name for v0.9
1 parent 01c1b75 commit ba78b4c

11 files changed

+28
-28
lines changed

ChatQnA/benchmark/four_gaudi/embedding-dependency_run.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ metadata:
77
name: embedding-dependency-deploy
88
namespace: default
99
spec:
10-
replicas: 6
10+
replicas: 4
1111
selector:
1212
matchLabels:
1313
app: embedding-dependency-deploy
@@ -48,7 +48,7 @@ spec:
4848
volumes:
4949
- name: model-volume
5050
hostPath:
51-
path: /home/sdp/cesg
51+
path: /mnt/models
5252
type: Directory
5353
- name: shm
5454
emptyDir:

ChatQnA/benchmark/four_gaudi/llm-dependency_run.yaml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -35,9 +35,9 @@ spec:
3535
- --model-id
3636
- $(LLM_MODEL_ID)
3737
- --max-input-length
38-
- '1024'
39-
- --max-total-tokens
4038
- '2048'
39+
- --max-total-tokens
40+
- '4096'
4141
- --max-batch-total-tokens
4242
- '65536'
4343
- --max-batch-prefill-tokens
@@ -62,12 +62,12 @@ spec:
6262
- name: HABANA_VISIBLE_DEVICES
6363
value: all
6464
- name: HF_TOKEN
65-
value: $(HF_TOKEN)
65+
value: ${HF_TOKEN}
6666
serviceAccountName: default
6767
volumes:
6868
- name: model-volume
6969
hostPath:
70-
path: /home/sdp/cesg
70+
path: /mnt/models
7171
type: Directory
7272
- name: shm
7373
emptyDir:

ChatQnA/benchmark/four_gaudi/reranking-dependency_run.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ spec:
3131
- envFrom:
3232
- configMapRef:
3333
name: qna-config
34-
image: tei_gaudi:rerank
34+
image: opea/tei-gaudi:latest
3535
name: reranking-dependency-deploy
3636
args:
3737
- --model-id
@@ -57,14 +57,14 @@ spec:
5757
- name: HABANA_VISIBLE_DEVICES
5858
value: all
5959
- name: HF_TOKEN
60-
value: $(HF_TOKEN)
60+
value: ${HF_TOKEN}
6161
- name: MAX_WARMUP_SEQUENCE_LENGTH
6262
value: '512'
6363
serviceAccountName: default
6464
volumes:
6565
- name: model-volume
6666
hostPath:
67-
path: /home/sdp/cesg
67+
path: /mnt/models
6868
type: Directory
6969
- name: shm
7070
emptyDir:

ChatQnA/benchmark/single_gaudi/chatqna_config_map.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ data:
1515
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
1616
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
1717
INDEX_NAME: rag-redis
18-
HUGGINGFACEHUB_API_TOKEN: {HF_TOKEN}
18+
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
1919
EMBEDDING_SERVICE_HOST_IP: embedding-svc
2020
RETRIEVER_SERVICE_HOST_IP: retriever-svc
2121
RERANK_SERVICE_HOST_IP: reranking-svc

ChatQnA/benchmark/single_gaudi/embedding-dependency_run.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ spec:
4848
volumes:
4949
- name: model-volume
5050
hostPath:
51-
path: /home/sdp/cesg
51+
path: /mnt/models
5252
type: Directory
5353
- name: shm
5454
emptyDir:

ChatQnA/benchmark/single_gaudi/llm-dependency_run.yaml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -35,9 +35,9 @@ spec:
3535
- --model-id
3636
- $(LLM_MODEL_ID)
3737
- --max-input-length
38-
- '1024'
39-
- --max-total-tokens
4038
- '2048'
39+
- --max-total-tokens
40+
- '4096'
4141
- --max-batch-total-tokens
4242
- '65536'
4343
- --max-batch-prefill-tokens
@@ -62,12 +62,12 @@ spec:
6262
- name: HABANA_VISIBLE_DEVICES
6363
value: all
6464
- name: HF_TOKEN
65-
value: $(HF_TOKEN)
65+
value: ${HF_TOKEN}
6666
serviceAccountName: default
6767
volumes:
6868
- name: model-volume
6969
hostPath:
70-
path: /home/sdp/cesg
70+
path: /mnt/models
7171
type: Directory
7272
- name: shm
7373
emptyDir:

ChatQnA/benchmark/single_gaudi/reranking-dependency_run.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ spec:
3131
- envFrom:
3232
- configMapRef:
3333
name: qna-config
34-
image: tei_gaudi:rerank
34+
image: opea/tei-gaudi:latest
3535
name: reranking-dependency-deploy
3636
args:
3737
- --model-id
@@ -57,14 +57,14 @@ spec:
5757
- name: HABANA_VISIBLE_DEVICES
5858
value: all
5959
- name: HF_TOKEN
60-
value: $(HF_TOKEN)
60+
value: ${HF_TOKEN}
6161
- name: MAX_WARMUP_SEQUENCE_LENGTH
6262
value: '512'
6363
serviceAccountName: default
6464
volumes:
6565
- name: model-volume
6666
hostPath:
67-
path: /home/sdp/cesg
67+
path: /mnt/models
6868
type: Directory
6969
- name: shm
7070
emptyDir:

ChatQnA/benchmark/two_gaudi/chatqna_config_map.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ data:
1515
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
1616
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
1717
INDEX_NAME: rag-redis
18-
HUGGINGFACEHUB_API_TOKEN: {HF_TOKEN}
18+
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
1919
EMBEDDING_SERVICE_HOST_IP: embedding-svc
2020
RETRIEVER_SERVICE_HOST_IP: retriever-svc
2121
RERANK_SERVICE_HOST_IP: reranking-svc

ChatQnA/benchmark/two_gaudi/embedding-dependency_run.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ metadata:
77
name: embedding-dependency-deploy
88
namespace: default
99
spec:
10-
replicas: 3
10+
replicas: 2
1111
selector:
1212
matchLabels:
1313
app: embedding-dependency-deploy
@@ -48,7 +48,7 @@ spec:
4848
volumes:
4949
- name: model-volume
5050
hostPath:
51-
path: /home/sdp/cesg
51+
path: /mnt/models
5252
type: Directory
5353
- name: shm
5454
emptyDir:

ChatQnA/benchmark/two_gaudi/llm-dependency_run.yaml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -35,9 +35,9 @@ spec:
3535
- --model-id
3636
- $(LLM_MODEL_ID)
3737
- --max-input-length
38-
- '1024'
39-
- --max-total-tokens
4038
- '2048'
39+
- --max-total-tokens
40+
- '4096'
4141
- --max-batch-total-tokens
4242
- '65536'
4343
- --max-batch-prefill-tokens
@@ -62,12 +62,12 @@ spec:
6262
- name: HABANA_VISIBLE_DEVICES
6363
value: all
6464
- name: HF_TOKEN
65-
value: $(HF_TOKEN)
65+
value: ${HF_TOKEN}
6666
serviceAccountName: default
6767
volumes:
6868
- name: model-volume
6969
hostPath:
70-
path: /home/sdp/cesg
70+
path: /mnt/models
7171
type: Directory
7272
- name: shm
7373
emptyDir:

0 commit comments

Comments
 (0)