File tree Expand file tree Collapse file tree 7 files changed +81
-105
lines changed Expand file tree Collapse file tree 7 files changed +81
-105
lines changed Original file line number Diff line number Diff line change 1
1
# Copyright (C) 2024 Intel Corporation
2
2
# SPDX-License-Identifier: Apache-2.0
3
3
4
- # Default values for chatqna.
5
- # This is a YAML-formatted file.
6
- # Declare variables to be passed into your templates.
7
-
8
- replicaCount : 1
9
-
10
- image :
11
- repository : opea/chatqna:latest
12
- pullPolicy : IfNotPresent
13
- # Overrides the image tag whose default is the chart appVersion.
14
- # tag: "1.0"
15
-
16
- port : 8888
17
- service :
18
- type : ClusterIP
19
- port : 8888
20
-
21
- securityContext :
22
- readOnlyRootFilesystem : true
23
- allowPrivilegeEscalation : false
24
- runAsNonRoot : true
25
- runAsUser : 1000
26
- capabilities :
27
- drop :
28
- - ALL
29
- seccompProfile :
30
- type : RuntimeDefault
31
-
32
4
tei :
33
5
image :
34
6
repository : ghcr.io/huggingface/tei-gaudi
39
11
40
12
# To override values in subchart tgi
41
13
tgi :
42
- LLM_MODEL_ID : Intel/neural-chat-7b-v3-3
43
- # LLM_MODEL_ID: /data/OpenCodeInterpreter-DS-6.7B
44
14
image :
45
15
repository : ghcr.io/huggingface/tgi-gaudi
46
16
tag : " 2.0.1"
47
17
resources :
48
18
limits :
49
19
habana.ai/gaudi : 1
50
-
51
- global :
52
- http_proxy :
53
- https_proxy :
54
- no_proxy :
55
- HUGGINGFACEHUB_API_TOKEN : " insert-your-huggingface-token-here"
56
- LANGCHAIN_TRACING_V2 : false
57
- LANGCHAIN_API_KEY : " insert-your-langchain-key-here"
58
- # set modelUseHostPath to host directory if you want to use hostPath volume for model storage
59
- # comment out modeluseHostPath if you want to download the model from huggingface
60
- modelUseHostPath : /mnt/opea-models
20
+ extraArgs :
21
+ - " --max-input-length"
22
+ - " 1024"
23
+ - " --max-total-tokens"
24
+ - " 2048"
Original file line number Diff line number Diff line change @@ -48,19 +48,17 @@ spec:
48
48
volumeMounts :
49
49
- mountPath : /tmp
50
50
name : tmp
51
- {{- if not .Values.noProbe }}
51
+ {{- if .Values.livenessProbe }}
52
+ livenessProbe :
53
+ {{- toYaml .Values.livenessProbe | nindent 12 }}
54
+ {{- end }}
55
+ {{- if .Values.readinessProbe }}
56
+ readinessProbe :
57
+ {{- toYaml .Values.readinessProbe | nindent 12 }}
58
+ {{- end }}
59
+ {{- if .Values.startupProbe }}
52
60
startupProbe :
53
- exec :
54
- command :
55
- - curl
56
- {{- if .Values.TEI_EMBEDDING_ENDPOINT }}
57
- - {{ .Values.TEI_EMBEDDING_ENDPOINT }}
58
- {{- else }}
59
- - http://{{ .Release.Name }}-tei
60
- {{- end }}
61
- initialDelaySeconds : 5
62
- periodSeconds : 5
63
- failureThreshold : 120
61
+ {{- toYaml .Values.startupProbe | nindent 12 }}
64
62
{{- end }}
65
63
resources :
66
64
{{- toYaml .Values.resources | nindent 12 }}
Original file line number Diff line number Diff line change @@ -15,12 +15,17 @@ spec:
15
15
- name : curl
16
16
# image: alpine/curl
17
17
image : python:3.10.14
18
- command : ['sh ', '-c']
18
+ command : ['bash ', '-c']
19
19
args :
20
20
- |
21
21
your_embedding=$(python3 -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)");
22
- curl http://{{ include "retriever-usvc.fullname" . }}:{{ .Values.service.port }}/v1/retrieval -sS --fail-with-body \
22
+ max_retry=20;
23
+ for ((i=1; i<=max_retry; i++)); do
24
+ curl http://{{ include "retriever-usvc.fullname" . }}:{{ .Values.service.port }}/v1/retrieval -sS --fail-with-body \
23
25
-X POST \
24
26
-d "{\"text\":\"What is the revenue of Nike in 2023?\",\"embedding\":${your_embedding}}" \
25
- -H 'Content-Type: application/json'
27
+ -H 'Content-Type: application/json' && break;
28
+ sleep 10;
29
+ done;
30
+ if [ $i -gt $max_retry ]; then echo "retriever test failed."; exit 1; fi
26
31
restartPolicy : Never
Original file line number Diff line number Diff line change @@ -55,6 +55,27 @@ resources: {}
55
55
# cpu: 100m
56
56
# memory: 128Mi
57
57
58
+ livenessProbe :
59
+ httpGet :
60
+ path : v1/health_check
61
+ port : retriever-usvc
62
+ initialDelaySeconds : 5
63
+ periodSeconds : 5
64
+ failureThreshold : 24
65
+ readinessProbe :
66
+ httpGet :
67
+ path : v1/health_check
68
+ port : retriever-usvc
69
+ initialDelaySeconds : 5
70
+ periodSeconds : 5
71
+ startupProbe :
72
+ httpGet :
73
+ path : v1/health_check
74
+ port : retriever-usvc
75
+ initialDelaySeconds : 5
76
+ periodSeconds : 5
77
+ failureThreshold : 120
78
+
58
79
nodeSelector : {}
59
80
60
81
tolerations : []
Original file line number Diff line number Diff line change 5
5
# This is a YAML-formatted file.
6
6
# Declare variables to be passed into your templates.
7
7
8
- replicaCount : 1
9
-
10
- port : 2080
11
-
12
8
image :
13
9
repository : ghcr.io/huggingface/text-generation-inference
14
- pullPolicy : IfNotPresent
15
- # Overrides the image tag whose default is the chart appVersion.
16
10
tag : " 2.0"
17
11
18
- imagePullSecrets : []
19
- nameOverride : " "
20
- fullnameOverride : " "
21
-
22
- podAnnotations : {}
23
-
24
- podSecurityContext : {}
25
- # fsGroup: 2000
26
-
27
- securityContext :
28
- readOnlyRootFilesystem : true
29
- allowPrivilegeEscalation : false
30
- runAsNonRoot : true
31
- runAsUser : 1000
32
- capabilities :
33
- drop :
34
- - ALL
35
- seccompProfile :
36
- type : RuntimeDefault
37
-
38
- service :
39
- type : ClusterIP
40
-
41
12
resources :
42
13
limits :
43
14
nvidia.com/gpu : 1
44
-
45
- nodeSelector : {}
46
-
47
- tolerations : []
48
-
49
- affinity : {}
50
-
51
- LLM_MODEL_ID : Intel/neural-chat-7b-v3-3
52
-
53
- global :
54
- http_proxy : " "
55
- https_proxy : " "
56
- no_proxy : " "
57
- HUGGINGFACEHUB_API_TOKEN : " insert-your-huggingface-token-here"
58
- # set modelUseHostPath to host directory if you want to use hostPath volume for model storage
59
- # comment out modeluseHostPath if you want to download the model from huggingface
60
- modelUseHostPath : /mnt/opea-models
Original file line number Diff line number Diff line change @@ -106,6 +106,26 @@ spec:
106
106
volumeMounts :
107
107
- mountPath : /tmp
108
108
name : tmp
109
+ livenessProbe :
110
+ failureThreshold : 24
111
+ httpGet :
112
+ path : v1/health_check
113
+ port : retriever-usvc
114
+ initialDelaySeconds : 5
115
+ periodSeconds : 5
116
+ readinessProbe :
117
+ httpGet :
118
+ path : v1/health_check
119
+ port : retriever-usvc
120
+ initialDelaySeconds : 5
121
+ periodSeconds : 5
122
+ startupProbe :
123
+ failureThreshold : 120
124
+ httpGet :
125
+ path : v1/health_check
126
+ port : retriever-usvc
127
+ initialDelaySeconds : 5
128
+ periodSeconds : 5
109
129
resources :
110
130
{}
111
131
volumes :
Original file line number Diff line number Diff line change @@ -16,10 +16,7 @@ metadata:
16
16
data :
17
17
MODEL_ID : " Intel/neural-chat-7b-v3-3"
18
18
PORT : " 2080"
19
- HUGGING_FACE_HUB_TOKEN : " insert-your-huggingface-token-here"
20
19
HF_TOKEN : " insert-your-huggingface-token-here"
21
- MAX_INPUT_TOKENS : " 1024"
22
- MAX_TOTAL_TOKENS : " 4096"
23
20
http_proxy : " "
24
21
https_proxy : " "
25
22
no_proxy : " "
@@ -102,6 +99,23 @@ spec:
102
99
- name : http
103
100
containerPort : 2080
104
101
protocol : TCP
102
+ livenessProbe :
103
+ failureThreshold : 24
104
+ initialDelaySeconds : 5
105
+ periodSeconds : 5
106
+ tcpSocket :
107
+ port : http
108
+ readinessProbe :
109
+ initialDelaySeconds : 5
110
+ periodSeconds : 5
111
+ tcpSocket :
112
+ port : http
113
+ startupProbe :
114
+ failureThreshold : 120
115
+ initialDelaySeconds : 5
116
+ periodSeconds : 5
117
+ tcpSocket :
118
+ port : http
105
119
resources :
106
120
limits :
107
121
nvidia.com/gpu : 1
You can’t perform that action at this time.
0 commit comments