File tree Expand file tree Collapse file tree 11 files changed +28
-28
lines changed Expand file tree Collapse file tree 11 files changed +28
-28
lines changed Original file line number Diff line number Diff line change @@ -7,7 +7,7 @@ metadata:
7
7
name : embedding-dependency-deploy
8
8
namespace : default
9
9
spec :
10
- replicas : 6
10
+ replicas : 4
11
11
selector :
12
12
matchLabels :
13
13
app : embedding-dependency-deploy
48
48
volumes :
49
49
- name : model-volume
50
50
hostPath :
51
- path : /home/sdp/cesg
51
+ path : /mnt/models
52
52
type : Directory
53
53
- name : shm
54
54
emptyDir :
Original file line number Diff line number Diff line change 35
35
- --model-id
36
36
- $(LLM_MODEL_ID)
37
37
- --max-input-length
38
- - ' 1024'
39
- - --max-total-tokens
40
38
- ' 2048'
39
+ - --max-total-tokens
40
+ - ' 4096'
41
41
- --max-batch-total-tokens
42
42
- ' 65536'
43
43
- --max-batch-prefill-tokens
@@ -62,12 +62,12 @@ spec:
62
62
- name : HABANA_VISIBLE_DEVICES
63
63
value : all
64
64
- name : HF_TOKEN
65
- value : $( HF_TOKEN)
65
+ value : ${ HF_TOKEN}
66
66
serviceAccountName : default
67
67
volumes :
68
68
- name : model-volume
69
69
hostPath :
70
- path : /home/sdp/cesg
70
+ path : /mnt/models
71
71
type : Directory
72
72
- name : shm
73
73
emptyDir :
Original file line number Diff line number Diff line change 31
31
- envFrom :
32
32
- configMapRef :
33
33
name : qna-config
34
- image : tei_gaudi:rerank
34
+ image : opea/tei-gaudi:latest
35
35
name : reranking-dependency-deploy
36
36
args :
37
37
- --model-id
@@ -57,14 +57,14 @@ spec:
57
57
- name : HABANA_VISIBLE_DEVICES
58
58
value : all
59
59
- name : HF_TOKEN
60
- value : $( HF_TOKEN)
60
+ value : ${ HF_TOKEN}
61
61
- name : MAX_WARMUP_SEQUENCE_LENGTH
62
62
value : ' 512'
63
63
serviceAccountName : default
64
64
volumes :
65
65
- name : model-volume
66
66
hostPath :
67
- path : /home/sdp/cesg
67
+ path : /mnt/models
68
68
type : Directory
69
69
- name : shm
70
70
emptyDir :
Original file line number Diff line number Diff line change 15
15
TGI_LLM_ENDPOINT : http://llm-dependency-svc.default.svc.cluster.local:9009
16
16
REDIS_URL : redis://vector-db.default.svc.cluster.local:6379
17
17
INDEX_NAME : rag-redis
18
- HUGGINGFACEHUB_API_TOKEN : {HF_TOKEN}
18
+ HUGGINGFACEHUB_API_TOKEN : $ {HF_TOKEN}
19
19
EMBEDDING_SERVICE_HOST_IP : embedding-svc
20
20
RETRIEVER_SERVICE_HOST_IP : retriever-svc
21
21
RERANK_SERVICE_HOST_IP : reranking-svc
Original file line number Diff line number Diff line change 48
48
volumes :
49
49
- name : model-volume
50
50
hostPath :
51
- path : /home/sdp/cesg
51
+ path : /mnt/models
52
52
type : Directory
53
53
- name : shm
54
54
emptyDir :
Original file line number Diff line number Diff line change 35
35
- --model-id
36
36
- $(LLM_MODEL_ID)
37
37
- --max-input-length
38
- - ' 1024'
39
- - --max-total-tokens
40
38
- ' 2048'
39
+ - --max-total-tokens
40
+ - ' 4096'
41
41
- --max-batch-total-tokens
42
42
- ' 65536'
43
43
- --max-batch-prefill-tokens
@@ -62,12 +62,12 @@ spec:
62
62
- name : HABANA_VISIBLE_DEVICES
63
63
value : all
64
64
- name : HF_TOKEN
65
- value : $( HF_TOKEN)
65
+ value : ${ HF_TOKEN}
66
66
serviceAccountName : default
67
67
volumes :
68
68
- name : model-volume
69
69
hostPath :
70
- path : /home/sdp/cesg
70
+ path : /mnt/models
71
71
type : Directory
72
72
- name : shm
73
73
emptyDir :
Original file line number Diff line number Diff line change 31
31
- envFrom :
32
32
- configMapRef :
33
33
name : qna-config
34
- image : tei_gaudi:rerank
34
+ image : opea/tei-gaudi:latest
35
35
name : reranking-dependency-deploy
36
36
args :
37
37
- --model-id
@@ -57,14 +57,14 @@ spec:
57
57
- name : HABANA_VISIBLE_DEVICES
58
58
value : all
59
59
- name : HF_TOKEN
60
- value : $( HF_TOKEN)
60
+ value : ${ HF_TOKEN}
61
61
- name : MAX_WARMUP_SEQUENCE_LENGTH
62
62
value : ' 512'
63
63
serviceAccountName : default
64
64
volumes :
65
65
- name : model-volume
66
66
hostPath :
67
- path : /home/sdp/cesg
67
+ path : /mnt/models
68
68
type : Directory
69
69
- name : shm
70
70
emptyDir :
Original file line number Diff line number Diff line change 15
15
TGI_LLM_ENDPOINT : http://llm-dependency-svc.default.svc.cluster.local:9009
16
16
REDIS_URL : redis://vector-db.default.svc.cluster.local:6379
17
17
INDEX_NAME : rag-redis
18
- HUGGINGFACEHUB_API_TOKEN : {HF_TOKEN}
18
+ HUGGINGFACEHUB_API_TOKEN : $ {HF_TOKEN}
19
19
EMBEDDING_SERVICE_HOST_IP : embedding-svc
20
20
RETRIEVER_SERVICE_HOST_IP : retriever-svc
21
21
RERANK_SERVICE_HOST_IP : reranking-svc
Original file line number Diff line number Diff line change @@ -7,7 +7,7 @@ metadata:
7
7
name : embedding-dependency-deploy
8
8
namespace : default
9
9
spec :
10
- replicas : 3
10
+ replicas : 2
11
11
selector :
12
12
matchLabels :
13
13
app : embedding-dependency-deploy
48
48
volumes :
49
49
- name : model-volume
50
50
hostPath :
51
- path : /home/sdp/cesg
51
+ path : /mnt/models
52
52
type : Directory
53
53
- name : shm
54
54
emptyDir :
Original file line number Diff line number Diff line change 35
35
- --model-id
36
36
- $(LLM_MODEL_ID)
37
37
- --max-input-length
38
- - ' 1024'
39
- - --max-total-tokens
40
38
- ' 2048'
39
+ - --max-total-tokens
40
+ - ' 4096'
41
41
- --max-batch-total-tokens
42
42
- ' 65536'
43
43
- --max-batch-prefill-tokens
@@ -62,12 +62,12 @@ spec:
62
62
- name : HABANA_VISIBLE_DEVICES
63
63
value : all
64
64
- name : HF_TOKEN
65
- value : $( HF_TOKEN)
65
+ value : ${ HF_TOKEN}
66
66
serviceAccountName : default
67
67
volumes :
68
68
- name : model-volume
69
69
hostPath :
70
- path : /home/sdp/cesg
70
+ path : /mnt/models
71
71
type : Directory
72
72
- name : shm
73
73
emptyDir :
You can’t perform that action at this time.
0 commit comments