Skip to content

Commit ac34860

Browse files
[ChatQnA] udate OOB & Tuned manifests (#738)
* update OOB manifests * update tgi parameters * update OOB manifests for w/o rerank * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * update tgi parameters * update tgi parameters for v0.9 w/o rerank * update OOB manifests 2.0.4->2.0.1 for w/o rerank * update tuned manifests * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * update tuned manifests --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
1 parent e0bc5f2 commit ac34860

File tree

122 files changed

+5428
-0
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

122 files changed

+5428
-0
lines changed
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
# Copyright (C) 2024 Intel Corporation
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
apiVersion: apps/v1
5+
kind: Deployment
6+
metadata:
7+
name: chatqna-backend-server-deploy
8+
namespace: default
9+
spec:
10+
replicas: 1
11+
selector:
12+
matchLabels:
13+
app: chatqna-backend-server-deploy
14+
template:
15+
metadata:
16+
annotations:
17+
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
18+
labels:
19+
app: chatqna-backend-server-deploy
20+
spec:
21+
nodeSelector:
22+
node-type: chatqna-opea
23+
topologySpreadConstraints:
24+
- maxSkew: 1
25+
topologyKey: kubernetes.io/hostname
26+
whenUnsatisfiable: ScheduleAnyway
27+
labelSelector:
28+
matchLabels:
29+
app: chatqna-backend-server-deploy
30+
hostIPC: true
31+
containers:
32+
- envFrom:
33+
- configMapRef:
34+
name: qna-config
35+
image: opea/chatqna:latest
36+
imagePullPolicy: IfNotPresent
37+
name: chatqna-backend-server-deploy
38+
args: null
39+
ports:
40+
- containerPort: 8888
41+
serviceAccountName: default
42+
---
43+
kind: Service
44+
apiVersion: v1
45+
metadata:
46+
name: chatqna-backend-server-svc
47+
spec:
48+
type: NodePort
49+
selector:
50+
app: chatqna-backend-server-deploy
51+
ports:
52+
- name: service
53+
port: 8888
54+
targetPort: 8888
55+
nodePort: 30888
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
# Copyright (C) 2024 Intel Corporation
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
apiVersion: apps/v1
5+
kind: Deployment
6+
metadata:
7+
name: embedding-dependency-deploy
8+
namespace: default
9+
spec:
10+
replicas: 1
11+
selector:
12+
matchLabels:
13+
app: embedding-dependency-deploy
14+
template:
15+
metadata:
16+
annotations:
17+
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
18+
labels:
19+
app: embedding-dependency-deploy
20+
spec:
21+
nodeSelector:
22+
node-type: chatqna-opea
23+
containers:
24+
- envFrom:
25+
- configMapRef:
26+
name: qna-config
27+
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
28+
name: embedding-dependency-deploy
29+
args:
30+
- --model-id
31+
- $(EMBEDDING_MODEL_ID)
32+
- --auto-truncate
33+
volumeMounts:
34+
- mountPath: /data
35+
name: model-volume
36+
- mountPath: /dev/shm
37+
name: shm
38+
ports:
39+
- containerPort: 80
40+
serviceAccountName: default
41+
volumes:
42+
- name: model-volume
43+
hostPath:
44+
path: /mnt/models
45+
type: Directory
46+
- name: shm
47+
emptyDir:
48+
medium: Memory
49+
sizeLimit: 1Gi
50+
---
51+
kind: Service
52+
apiVersion: v1
53+
metadata:
54+
name: embedding-dependency-svc
55+
spec:
56+
type: ClusterIP
57+
selector:
58+
app: embedding-dependency-deploy
59+
ports:
60+
- name: service
61+
port: 6006
62+
targetPort: 80
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
# Copyright (C) 2024 Intel Corporation
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
apiVersion: apps/v1
5+
kind: Deployment
6+
metadata:
7+
name: embedding-deploy
8+
namespace: default
9+
spec:
10+
replicas: 1
11+
selector:
12+
matchLabels:
13+
app: embedding-deploy
14+
template:
15+
metadata:
16+
annotations:
17+
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
18+
labels:
19+
app: embedding-deploy
20+
spec:
21+
nodeSelector:
22+
node-type: chatqna-opea
23+
topologySpreadConstraints:
24+
- maxSkew: 1
25+
topologyKey: kubernetes.io/hostname
26+
whenUnsatisfiable: ScheduleAnyway
27+
labelSelector:
28+
matchLabels:
29+
app: embedding-deploy
30+
hostIPC: true
31+
containers:
32+
- envFrom:
33+
- configMapRef:
34+
name: qna-config
35+
image: opea/embedding-tei:latest
36+
imagePullPolicy: IfNotPresent
37+
name: embedding-deploy
38+
args: null
39+
ports:
40+
- containerPort: 6000
41+
serviceAccountName: default
42+
---
43+
kind: Service
44+
apiVersion: v1
45+
metadata:
46+
name: embedding-svc
47+
spec:
48+
type: ClusterIP
49+
selector:
50+
app: embedding-deploy
51+
ports:
52+
- name: service
53+
port: 6000
54+
targetPort: 6000
Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
# Copyright (C) 2024 Intel Corporation
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
apiVersion: apps/v1
5+
kind: Deployment
6+
metadata:
7+
name: llm-dependency-deploy
8+
namespace: default
9+
spec:
10+
replicas: 31
11+
selector:
12+
matchLabels:
13+
app: llm-dependency-deploy
14+
template:
15+
metadata:
16+
annotations:
17+
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
18+
labels:
19+
app: llm-dependency-deploy
20+
spec:
21+
nodeSelector:
22+
node-type: chatqna-opea
23+
hostIPC: true
24+
containers:
25+
- envFrom:
26+
- configMapRef:
27+
name: qna-config
28+
image: ghcr.io/huggingface/tgi-gaudi:2.0.1
29+
name: llm-dependency-deploy-demo
30+
securityContext:
31+
capabilities:
32+
add:
33+
- SYS_NICE
34+
args:
35+
- --model-id
36+
- $(LLM_MODEL_ID)
37+
- --max-input-length
38+
- '2048'
39+
- --max-total-tokens
40+
- '4096'
41+
- --max-batch-total-tokens
42+
- '65536'
43+
- --max-batch-prefill-tokens
44+
- '4096'
45+
volumeMounts:
46+
- mountPath: /data
47+
name: model-volume
48+
- mountPath: /dev/shm
49+
name: shm
50+
ports:
51+
- containerPort: 80
52+
resources:
53+
limits:
54+
habana.ai/gaudi: 1
55+
env:
56+
- name: OMPI_MCA_btl_vader_single_copy_mechanism
57+
value: none
58+
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
59+
value: 'true'
60+
- name: runtime
61+
value: habana
62+
- name: HABANA_VISIBLE_DEVICES
63+
value: all
64+
- name: HF_TOKEN
65+
value: ${HF_TOKEN}
66+
serviceAccountName: default
67+
volumes:
68+
- name: model-volume
69+
hostPath:
70+
path: /mnt/models
71+
type: Directory
72+
- name: shm
73+
emptyDir:
74+
medium: Memory
75+
sizeLimit: 1Gi
76+
---
77+
kind: Service
78+
apiVersion: v1
79+
metadata:
80+
name: llm-dependency-svc
81+
spec:
82+
type: ClusterIP
83+
selector:
84+
app: llm-dependency-deploy
85+
ports:
86+
- name: service
87+
port: 9009
88+
targetPort: 80
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
# Copyright (C) 2024 Intel Corporation
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
apiVersion: apps/v1
5+
kind: Deployment
6+
metadata:
7+
name: llm-deploy
8+
namespace: default
9+
spec:
10+
replicas: 1
11+
selector:
12+
matchLabels:
13+
app: llm-deploy
14+
template:
15+
metadata:
16+
annotations:
17+
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
18+
labels:
19+
app: llm-deploy
20+
spec:
21+
nodeSelector:
22+
node-type: chatqna-opea
23+
topologySpreadConstraints:
24+
- maxSkew: 1
25+
topologyKey: kubernetes.io/hostname
26+
whenUnsatisfiable: ScheduleAnyway
27+
labelSelector:
28+
matchLabels:
29+
app: llm-deploy
30+
hostIPC: true
31+
containers:
32+
- envFrom:
33+
- configMapRef:
34+
name: qna-config
35+
image: opea/llm-tgi:latest
36+
imagePullPolicy: IfNotPresent
37+
name: llm-deploy
38+
args: null
39+
ports:
40+
- containerPort: 9000
41+
serviceAccountName: default
42+
---
43+
kind: Service
44+
apiVersion: v1
45+
metadata:
46+
name: llm-svc
47+
spec:
48+
type: ClusterIP
49+
selector:
50+
app: llm-deploy
51+
ports:
52+
- name: service
53+
port: 9000
54+
targetPort: 9000
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
# Copyright (C) 2024 Intel Corporation
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
apiVersion: apps/v1
5+
kind: Deployment
6+
metadata:
7+
name: reranking-deploy
8+
namespace: default
9+
spec:
10+
replicas: 1
11+
selector:
12+
matchLabels:
13+
app: reranking-deploy
14+
template:
15+
metadata:
16+
annotations:
17+
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
18+
labels:
19+
app: reranking-deploy
20+
spec:
21+
nodeSelector:
22+
node-type: chatqna-opea
23+
topologySpreadConstraints:
24+
- maxSkew: 1
25+
topologyKey: kubernetes.io/hostname
26+
whenUnsatisfiable: ScheduleAnyway
27+
labelSelector:
28+
matchLabels:
29+
app: reranking-deploy
30+
hostIPC: true
31+
containers:
32+
- envFrom:
33+
- configMapRef:
34+
name: qna-config
35+
image: opea/reranking-tei:latest
36+
imagePullPolicy: IfNotPresent
37+
name: reranking-deploy
38+
args: null
39+
ports:
40+
- containerPort: 8000
41+
serviceAccountName: default
42+
---
43+
kind: Service
44+
apiVersion: v1
45+
metadata:
46+
name: reranking-svc
47+
spec:
48+
type: ClusterIP
49+
selector:
50+
app: reranking-deploy
51+
ports:
52+
- name: service
53+
port: 8000
54+
targetPort: 8000

0 commit comments

Comments
 (0)