Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Helm charts to deploy models #27

Merged
merged 7 commits into from
Nov 16, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions helm/.helmignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
docs/
integration-tests/
load_tests/
4 changes: 4 additions & 0 deletions helm/Chart.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
apiVersion: v2
name: LoRAX
description: A Helm chart for LoRAX
version: 0.1.0
3 changes: 3 additions & 0 deletions helm/templates/_helpers.tpl
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{{- define "app.name" -}}
{{- printf "%s-%s" .Chart.Name .Release.Name | lower -}}
{{- end -}}
86 changes: 86 additions & 0 deletions helm/templates/deployment.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
apiVersion: apps/v1
kind: Deployment
metadata:
labels:
app: {{ template "app.name" . }}
{{- if .Values.deployment.additionalLabels }}
{{- toYaml .Values.deployment.additionalLabels | nindent 4 }}
{{- end }}
name: {{ template "app.name" . }}
namespace: {{ .Release.Namespace }}
spec:
replicas: {{ .Values.deployment.replicas }}
selector:
matchLabels:
app: {{ template "app.name" . }}
{{- if .Values.deployment.updateStrategy }}
strategy: {{- toYaml .Values.deployment.updateStrategy | nindent 4 }}
{{- end }}
template:
metadata:
labels:
app: {{ template "app.name" . }}
{{- if .Values.deployment.additionalPodLabels }}
{{- toYaml .Values.deployment.additionalPodLabels | nindent 8 }}
{{- end }}
{{- if .Values.deployment.additionalPodAnnotations }}
annotations: {{- toYaml .Values.deployment.additionalPodAnnotations | nindent 8 }}
{{- end }}
spec:
{{- if .Values.deployment.affinity }}
affinity:
{{- toYaml .Values.deployment.affinity | nindent 8 }}
{{- end }}
containers:
- args:
- --model-id
- {{ .Values.deployment.args.modelId }}
- --max-input-length
- {{ .Values.deployment.args.maxInputLength | quote }}
- --max-total-tokens
- {{ .Values.deployment.args.maxTotalTokens | quote }}
- --max-batch-total-tokens
- {{ .Values.deployment.args.maxBatchTotalTokens | quote }}
- --max-batch-prefill-tokens
- {{ .Values.deployment.args.maxBatchPrefillTokens | quote }}
- --sharded
- {{ .Values.deployment.args.sharded | quote }}
env:
- name: PORT
value: "8000"
- name: HUGGING_FACE_HUB_TOKEN
value: {{ .Values.deployment.env.huggingFaceHubToken | quote }}
- name: LORAX_ENABLED_MODEL_TYPES
value: {{ .Values.deployment.env.loraxEnabledModelTypes | quote }}
image: {{ .Values.deployment.image.repository }}:{{ .Values.deployment.image.tag }}
imagePullPolicy: IfNotPresent
livenessProbe: {{ toYaml .Values.deployment.livenessProbe | nindent 10 }}
name: lorax
ports:
- containerPort: 8000
name: http
protocol: TCP
readinessProbe: {{ toYaml .Values.deployment.readinessProbe | nindent 10 }}
resources: {{ toYaml .Values.deployment.resources | nindent 10 }}
volumeMounts:
- mountPath: /data
name: data
- mountPath: /dev/shm
name: shm
{{- if .Values.deployment.tolerations }}
tolerations:
{{- toYaml .Values.deployment.tolerations | nindent 6 }}
{{- end }}
nodeSelector: {{ toYaml .Values.deployment.nodeSelector | nindent 8 }}
restartPolicy: Always
schedulerName: default-scheduler
terminationGracePeriodSeconds: 30
{{- if .Values.priorityClassName }}
priorityClassName: {{ .Values.deployment.priorityClassName | quote }}
{{- end }}
volumes:
- emptyDir:
medium: Memory
name: shm
- emptyDir:
name: data
21 changes: 21 additions & 0 deletions helm/templates/service.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
apiVersion: v1
kind: Service
metadata:
labels:
app: {{ template "app.name" . }}
app.kubernetes.io/name: {{ template "app.name" . }}
{{ if .Values.service.additionalLabels }}
{{- toYaml .Values.service.additionalLabels | nindent 4 }}
{{ end }}
name: {{ .Values.service.name }}
namespace: {{ .Release.Namespace }}
spec:
ports:
- name: http
port: {{ .Values.service.port }}
protocol: TCP
targetPort: http
selector:
app: {{ template "app.name" . }}
# sessionAffinity: None
type: {{ .Values.service.serviceType }}
74 changes: 74 additions & 0 deletions helm/values.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
deployment:
replicas: 1
updateStrategy: {}

image:
repository: "ghcr.io/predibase/lorax"
tag: "f76119a"

args:
modelId: "mistralai/Mistral-7B-Instruct-v0.1"
maxInputLength: 512
maxTotalTokens: 1024
maxBatchTotalTokens: 4096
maxBatchPrefillTokens: 2048
sharded: true

env:
# Your huggingface hub token. Required for some models such as the llama-2 family.
huggingFaceHubToken: ""

# Model types that support dynamic adapter loading
loraxEnabledModelTypes: "llama,mistral"

resources:
limits:
cpu: "8"
ephemeral-storage: 100Gi
memory: 27041Mi
nvidia.com/gpu: "1"
requests:
cpu: "8"
ephemeral-storage: 100Gi
memory: 27041Mi
nvidia.com/gpu: "1"

livenessProbe:
failureThreshold: 240
httpGet:
path: /health
port: http
scheme: HTTP
initialDelaySeconds: 5
periodSeconds: 5
successThreshold: 1
timeoutSeconds: 1

readinessProbe:
failureThreshold: 240
httpGet:
path: /health
port: http
scheme: HTTP
initialDelaySeconds: 5
periodSeconds: 5
successThreshold: 1
timeoutSeconds: 1

nodeSelector: {}
tolerations: []
additionalLabels: {}
additionalPodLabels: {}

additionalAnnotations: {}
additionalPodAnnotations: {}
affinity: {}

priorityClassName: ""

service:
name: "lorax"
serviceType: ClusterIP
port: 80
additionalLabels: {}