Skip to content

Commit af47b3c

Browse files
authored
helm: Add K8S probes to retriever-usvc (#244)
- Add K8S probes to retriever-usvc - Remove redundant values in xx-values.yaml Signed-off-by: Lianhao Lu <lianhao.lu@intel.com>
1 parent aa2730a commit af47b3c

File tree

7 files changed

+81
-105
lines changed

7 files changed

+81
-105
lines changed

helm-charts/chatqna/gaudi-values.yaml

Lines changed: 5 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1,34 +1,6 @@
11
# Copyright (C) 2024 Intel Corporation
22
# SPDX-License-Identifier: Apache-2.0
33

4-
# Default values for chatqna.
5-
# This is a YAML-formatted file.
6-
# Declare variables to be passed into your templates.
7-
8-
replicaCount: 1
9-
10-
image:
11-
repository: opea/chatqna:latest
12-
pullPolicy: IfNotPresent
13-
# Overrides the image tag whose default is the chart appVersion.
14-
# tag: "1.0"
15-
16-
port: 8888
17-
service:
18-
type: ClusterIP
19-
port: 8888
20-
21-
securityContext:
22-
readOnlyRootFilesystem: true
23-
allowPrivilegeEscalation: false
24-
runAsNonRoot: true
25-
runAsUser: 1000
26-
capabilities:
27-
drop:
28-
- ALL
29-
seccompProfile:
30-
type: RuntimeDefault
31-
324
tei:
335
image:
346
repository: ghcr.io/huggingface/tei-gaudi
@@ -39,22 +11,14 @@ tei:
3911

4012
# To override values in subchart tgi
4113
tgi:
42-
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
43-
# LLM_MODEL_ID: /data/OpenCodeInterpreter-DS-6.7B
4414
image:
4515
repository: ghcr.io/huggingface/tgi-gaudi
4616
tag: "2.0.1"
4717
resources:
4818
limits:
4919
habana.ai/gaudi: 1
50-
51-
global:
52-
http_proxy:
53-
https_proxy:
54-
no_proxy:
55-
HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
56-
LANGCHAIN_TRACING_V2: false
57-
LANGCHAIN_API_KEY: "insert-your-langchain-key-here"
58-
# set modelUseHostPath to host directory if you want to use hostPath volume for model storage
59-
# comment out modeluseHostPath if you want to download the model from huggingface
60-
modelUseHostPath: /mnt/opea-models
20+
extraArgs:
21+
- "--max-input-length"
22+
- "1024"
23+
- "--max-total-tokens"
24+
- "2048"

helm-charts/common/retriever-usvc/templates/deployment.yaml

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -48,19 +48,17 @@ spec:
4848
volumeMounts:
4949
- mountPath: /tmp
5050
name: tmp
51-
{{- if not .Values.noProbe }}
51+
{{- if .Values.livenessProbe }}
52+
livenessProbe:
53+
{{- toYaml .Values.livenessProbe | nindent 12 }}
54+
{{- end }}
55+
{{- if .Values.readinessProbe }}
56+
readinessProbe:
57+
{{- toYaml .Values.readinessProbe | nindent 12 }}
58+
{{- end }}
59+
{{- if .Values.startupProbe }}
5260
startupProbe:
53-
exec:
54-
command:
55-
- curl
56-
{{- if .Values.TEI_EMBEDDING_ENDPOINT }}
57-
- {{ .Values.TEI_EMBEDDING_ENDPOINT }}
58-
{{- else }}
59-
- http://{{ .Release.Name }}-tei
60-
{{- end }}
61-
initialDelaySeconds: 5
62-
periodSeconds: 5
63-
failureThreshold: 120
61+
{{- toYaml .Values.startupProbe | nindent 12 }}
6462
{{- end }}
6563
resources:
6664
{{- toYaml .Values.resources | nindent 12 }}

helm-charts/common/retriever-usvc/templates/tests/test-pod.yaml

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,17 @@ spec:
1515
- name: curl
1616
#image: alpine/curl
1717
image: python:3.10.14
18-
command: ['sh', '-c']
18+
command: ['bash', '-c']
1919
args:
2020
- |
2121
your_embedding=$(python3 -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)");
22-
curl http://{{ include "retriever-usvc.fullname" . }}:{{ .Values.service.port }}/v1/retrieval -sS --fail-with-body \
22+
max_retry=20;
23+
for ((i=1; i<=max_retry; i++)); do
24+
curl http://{{ include "retriever-usvc.fullname" . }}:{{ .Values.service.port }}/v1/retrieval -sS --fail-with-body \
2325
-X POST \
2426
-d "{\"text\":\"What is the revenue of Nike in 2023?\",\"embedding\":${your_embedding}}" \
25-
-H 'Content-Type: application/json'
27+
-H 'Content-Type: application/json' && break;
28+
sleep 10;
29+
done;
30+
if [ $i -gt $max_retry ]; then echo "retriever test failed."; exit 1; fi
2631
restartPolicy: Never

helm-charts/common/retriever-usvc/values.yaml

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,27 @@ resources: {}
5555
# cpu: 100m
5656
# memory: 128Mi
5757

58+
livenessProbe:
59+
httpGet:
60+
path: v1/health_check
61+
port: retriever-usvc
62+
initialDelaySeconds: 5
63+
periodSeconds: 5
64+
failureThreshold: 24
65+
readinessProbe:
66+
httpGet:
67+
path: v1/health_check
68+
port: retriever-usvc
69+
initialDelaySeconds: 5
70+
periodSeconds: 5
71+
startupProbe:
72+
httpGet:
73+
path: v1/health_check
74+
port: retriever-usvc
75+
initialDelaySeconds: 5
76+
periodSeconds: 5
77+
failureThreshold: 120
78+
5879
nodeSelector: {}
5980

6081
tolerations: []

helm-charts/common/tgi/nv-values.yaml

Lines changed: 0 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -5,56 +5,10 @@
55
# This is a YAML-formatted file.
66
# Declare variables to be passed into your templates.
77

8-
replicaCount: 1
9-
10-
port: 2080
11-
128
image:
139
repository: ghcr.io/huggingface/text-generation-inference
14-
pullPolicy: IfNotPresent
15-
# Overrides the image tag whose default is the chart appVersion.
1610
tag: "2.0"
1711

18-
imagePullSecrets: []
19-
nameOverride: ""
20-
fullnameOverride: ""
21-
22-
podAnnotations: {}
23-
24-
podSecurityContext: {}
25-
# fsGroup: 2000
26-
27-
securityContext:
28-
readOnlyRootFilesystem: true
29-
allowPrivilegeEscalation: false
30-
runAsNonRoot: true
31-
runAsUser: 1000
32-
capabilities:
33-
drop:
34-
- ALL
35-
seccompProfile:
36-
type: RuntimeDefault
37-
38-
service:
39-
type: ClusterIP
40-
4112
resources:
4213
limits:
4314
nvidia.com/gpu: 1
44-
45-
nodeSelector: {}
46-
47-
tolerations: []
48-
49-
affinity: {}
50-
51-
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
52-
53-
global:
54-
http_proxy: ""
55-
https_proxy: ""
56-
no_proxy: ""
57-
HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
58-
# set modelUseHostPath to host directory if you want to use hostPath volume for model storage
59-
# comment out modeluseHostPath if you want to download the model from huggingface
60-
modelUseHostPath: /mnt/opea-models

manifests/common/retriever-usvc.yaml

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,26 @@ spec:
106106
volumeMounts:
107107
- mountPath: /tmp
108108
name: tmp
109+
livenessProbe:
110+
failureThreshold: 24
111+
httpGet:
112+
path: v1/health_check
113+
port: retriever-usvc
114+
initialDelaySeconds: 5
115+
periodSeconds: 5
116+
readinessProbe:
117+
httpGet:
118+
path: v1/health_check
119+
port: retriever-usvc
120+
initialDelaySeconds: 5
121+
periodSeconds: 5
122+
startupProbe:
123+
failureThreshold: 120
124+
httpGet:
125+
path: v1/health_check
126+
port: retriever-usvc
127+
initialDelaySeconds: 5
128+
periodSeconds: 5
109129
resources:
110130
{}
111131
volumes:

manifests/common/tgi_nv.yaml

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,7 @@ metadata:
1616
data:
1717
MODEL_ID: "Intel/neural-chat-7b-v3-3"
1818
PORT: "2080"
19-
HUGGING_FACE_HUB_TOKEN: "insert-your-huggingface-token-here"
2019
HF_TOKEN: "insert-your-huggingface-token-here"
21-
MAX_INPUT_TOKENS: "1024"
22-
MAX_TOTAL_TOKENS: "4096"
2320
http_proxy: ""
2421
https_proxy: ""
2522
no_proxy: ""
@@ -102,6 +99,23 @@ spec:
10299
- name: http
103100
containerPort: 2080
104101
protocol: TCP
102+
livenessProbe:
103+
failureThreshold: 24
104+
initialDelaySeconds: 5
105+
periodSeconds: 5
106+
tcpSocket:
107+
port: http
108+
readinessProbe:
109+
initialDelaySeconds: 5
110+
periodSeconds: 5
111+
tcpSocket:
112+
port: http
113+
startupProbe:
114+
failureThreshold: 120
115+
initialDelaySeconds: 5
116+
periodSeconds: 5
117+
tcpSocket:
118+
port: http
105119
resources:
106120
limits:
107121
nvidia.com/gpu: 1

0 commit comments

Comments
 (0)