Skip to content

Commit 66de41c

Browse files
authored
Helm Chart for AgentQnA (#539)
Signed-off-by: Dolpher Du <dolpher.du@intel.com>
1 parent 6ab0b9f commit 66de41c

22 files changed

+1073
-0
lines changed

helm-charts/agentqna/.helmignore

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
# Patterns to ignore when building packages.
2+
# This supports shell glob matching, relative path matching, and
3+
# negation (prefixed with !). Only one pattern per line.
4+
.DS_Store
5+
# Common VCS dirs
6+
.git/
7+
.gitignore
8+
.bzr/
9+
.bzrignore
10+
.hg/
11+
.hgignore
12+
.svn/
13+
# Common backup files
14+
*.swp
15+
*.bak
16+
*.tmp
17+
*.orig
18+
*~
19+
# Various IDEs
20+
.project
21+
.idea/
22+
*.tmproj
23+
.vscode/

helm-charts/agentqna/Chart.yaml

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
# Copyright (C) 2024 Intel Corporation
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
apiVersion: v2
5+
name: agentqna
6+
description: The Helm chart to deploy AgentQnA
7+
type: application
8+
dependencies:
9+
- name: agent
10+
version: 1.0.0
11+
alias: worker
12+
repository: "file://../common/agent"
13+
- name: agent
14+
version: 1.0.0
15+
alias: supervisor
16+
repository: "file://../common/agent"
17+
- name: tgi
18+
version: 1.0.0
19+
repository: "file://../common/tgi"
20+
condition: tgi.enabled
21+
- name: tei
22+
version: 1.0.0
23+
repository: "file://../common/tei"
24+
- name: embedding-usvc
25+
version: 1.0.0
26+
repository: "file://../common/embedding-usvc"
27+
- name: teirerank
28+
version: 1.0.0
29+
repository: "file://../common/teirerank"
30+
- name: reranking-usvc
31+
version: 1.0.0
32+
repository: "file://../common/reranking-usvc"
33+
- name: redis-vector-db
34+
version: 1.0.0
35+
repository: "file://../common/redis-vector-db"
36+
- name: retriever-usvc
37+
version: 1.0.0
38+
repository: "file://../common/retriever-usvc"
39+
- name: data-prep
40+
version: 1.0.0
41+
repository: "file://../common/data-prep"
42+
version: 1.0.0
43+
appVersion: "v1.0"

helm-charts/agentqna/README.md

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
# AgentQnA
2+
3+
Helm chart for deploying AgentQnA service.
4+
5+
## Deploy
6+
7+
helm repo add opea https://opea-project.github.io/GenAIInfra
8+
9+
helm install agentqna opea/agentqna --set global.HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} --set tgi.enabled=True
10+
11+
## Verify
12+
13+
To verify the installation, run the command `kubectl get pod` to make sure all pods are running.
14+
15+
### Verify the workload through curl command
16+
17+
Run the command `kubectl port-forward svc/agentqna-supervisor 9090:9090` to expose the service for access.
18+
19+
Open another terminal and run the following command to verify the service if working:
20+
21+
```console
22+
curl http://localhost:9090/v1/chat/completions \
23+
-X POST \
24+
-H "Content-Type: application/json" \
25+
-d '{"query": "Most recent album by Michael Jackson"}'
26+
```
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
# Copyright (C) 2024 Intel Corporation
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
# Accelerate inferencing in heaviest components to improve performance
5+
# by overriding their subchart values
6+
7+
tgi:
8+
accelDevice: "gaudi"
9+
image:
10+
repository: ghcr.io/huggingface/tgi-gaudi
11+
tag: "2.0.5"
12+
resources:
13+
limits:
14+
habana.ai/gaudi: 4
15+
MAX_INPUT_LENGTH: "4096"
16+
MAX_TOTAL_TOKENS: "8192"
17+
CUDA_GRAPHS: ""
18+
OMPI_MCA_btl_vader_single_copy_mechanism: "none"
19+
PT_HPU_ENABLE_LAZY_COLLECTIVES: "true"
20+
ENABLE_HPU_GRAPH: "true"
21+
LIMIT_HPU_GRAPH: "true"
22+
USE_FLASH_ATTENTION: "true"
23+
FLASH_ATTENTION_RECOMPUTE: "true"
24+
extraCmdArgs: ["--sharded","true","--num-shard","4"]
25+
livenessProbe:
26+
initialDelaySeconds: 5
27+
periodSeconds: 5
28+
timeoutSeconds: 1
29+
readinessProbe:
30+
initialDelaySeconds: 5
31+
periodSeconds: 5
32+
timeoutSeconds: 1
33+
startupProbe:
34+
initialDelaySeconds: 5
35+
periodSeconds: 5
36+
timeoutSeconds: 1
37+
failureThreshold: 120
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
{{/*
2+
Expand the name of the chart.
3+
*/}}
4+
{{- define "agentqna.name" -}}
5+
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
6+
{{- end }}
7+
8+
{{/*
9+
Create a default fully qualified app name.
10+
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
11+
If release name contains chart name it will be used as a full name.
12+
*/}}
13+
{{- define "agentqna.fullname" -}}
14+
{{- if .Values.fullnameOverride }}
15+
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
16+
{{- else }}
17+
{{- $name := default .Chart.Name .Values.nameOverride }}
18+
{{- if contains $name .Release.Name }}
19+
{{- .Release.Name | trunc 63 | trimSuffix "-" }}
20+
{{- else }}
21+
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
22+
{{- end }}
23+
{{- end }}
24+
{{- end }}
25+
26+
{{/*
27+
Create chart name and version as used by the chart label.
28+
*/}}
29+
{{- define "agentqna.chart" -}}
30+
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
31+
{{- end }}
32+
33+
{{/*
34+
Common labels
35+
*/}}
36+
{{- define "agentqna.labels" -}}
37+
helm.sh/chart: {{ include "agentqna.chart" . }}
38+
{{ include "agentqna.selectorLabels" . }}
39+
{{- if .Chart.AppVersion }}
40+
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
41+
{{- end }}
42+
app.kubernetes.io/managed-by: {{ .Release.Service }}
43+
{{- end }}
44+
45+
{{/*
46+
Selector labels
47+
*/}}
48+
{{- define "agentqna.selectorLabels" -}}
49+
app.kubernetes.io/name: {{ include "agentqna.name" . }}
50+
app.kubernetes.io/instance: {{ .Release.Name }}
51+
{{- end }}
52+
53+
{{/*
54+
Create the name of the service account to use
55+
*/}}
56+
{{- define "agentqna.serviceAccountName" -}}
57+
{{- if .Values.serviceAccount.create }}
58+
{{- default (include "agentqna.fullname" .) .Values.serviceAccount.name }}
59+
{{- else }}
60+
{{- default "default" .Values.serviceAccount.name }}
61+
{{- end }}
62+
{{- end }}
Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
# Copyright (C) 2024 Intel Corporation
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
apiVersion: apps/v1
5+
kind: Deployment
6+
metadata:
7+
name: {{ .Release.Name }}-crag
8+
labels:
9+
{{- include "agentqna.labels" . | nindent 4 }}
10+
app: {{ .Release.Name }}-crag
11+
spec:
12+
replicas: {{ .Values.replicaCount }}
13+
selector:
14+
matchLabels:
15+
{{- include "agentqna.selectorLabels" . | nindent 6 }}
16+
app: {{ .Release.Name }}-crag
17+
template:
18+
metadata:
19+
{{- with .Values.podAnnotations }}
20+
annotations:
21+
{{- toYaml . | nindent 8 }}
22+
{{- end }}
23+
labels:
24+
{{- include "agentqna.selectorLabels" . | nindent 8 }}
25+
app: {{ .Release.Name }}-crag
26+
spec:
27+
{{- with .Values.imagePullSecrets }}
28+
imagePullSecrets:
29+
{{- toYaml . | nindent 8 }}
30+
{{- end }}
31+
securityContext:
32+
{{- toYaml .Values.podSecurityContext | nindent 8 }}
33+
containers:
34+
- name: {{ .Release.Name }}
35+
env:
36+
{{- if .Values.LOGFLAG }}
37+
- name: LOGFLAG
38+
value: {{ .Values.LOGFLAG | quote }}
39+
{{- end }}
40+
securityContext:
41+
{{- toYaml .Values.securityContext | nindent 12 }}
42+
image: "{{ .Values.crag.image.repository }}:{{ .Values.crag.image.tag | default .Chart.AppVersion }}"
43+
imagePullPolicy: {{ .Values.crag.image.pullPolicy }}
44+
volumeMounts:
45+
- mountPath: /tmp
46+
name: tmp
47+
ports:
48+
- name: crag
49+
containerPort: 8000
50+
protocol: TCP
51+
resources:
52+
{{- toYaml .Values.resources | nindent 12 }}
53+
volumes:
54+
- name: tmp
55+
emptyDir: {}
56+
{{- with .Values.nodeSelector }}
57+
nodeSelector:
58+
{{- toYaml . | nindent 8 }}
59+
{{- end }}
60+
{{- with .Values.affinity }}
61+
affinity:
62+
{{- toYaml . | nindent 8 }}
63+
{{- end }}
64+
{{- with .Values.tolerations }}
65+
tolerations:
66+
{{- toYaml . | nindent 8 }}
67+
{{- end }}
68+
{{- if .Values.evenly_distributed }}
69+
topologySpreadConstraints:
70+
- maxSkew: 1
71+
topologyKey: kubernetes.io/hostname
72+
whenUnsatisfiable: ScheduleAnyway
73+
labelSelector:
74+
matchLabels:
75+
{{- include "agentqna.selectorLabels" . | nindent 14 }}
76+
app: {{ .Release.Name }}-crag
77+
{{- end }}
78+
---
79+
apiVersion: v1
80+
kind: Service
81+
metadata:
82+
name: {{ .Release.Name }}-crag
83+
labels:
84+
{{- include "agentqna.labels" . | nindent 4 }}
85+
spec:
86+
type: ClusterIP
87+
ports:
88+
- port: 8080
89+
targetPort: 8000
90+
protocol: TCP
91+
name: crag
92+
selector:
93+
{{- include "agentqna.selectorLabels" . | nindent 4 }}
94+
app: {{ .Release.Name }}-crag

0 commit comments

Comments
 (0)