Skip to content

Commit

Permalink
Add Helm charts to deploy models (#27)
Browse files Browse the repository at this point in the history
Co-authored-by: Noah Yoshida <noahcy117@gmail.com>
  • Loading branch information
abidwael and noyoshi committed Nov 16, 2023
1 parent e59993b commit c2a24e1
Show file tree
Hide file tree
Showing 6 changed files with 191 additions and 0 deletions.
3 changes: 3 additions & 0 deletions helm/.helmignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
docs/
integration-tests/
load_tests/
4 changes: 4 additions & 0 deletions helm/Chart.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
apiVersion: v2
name: LoRAX
description: A Helm chart for LoRAX
version: 0.1.0
3 changes: 3 additions & 0 deletions helm/templates/_helpers.tpl
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{{- define "app.name" -}}
{{- printf "%s-%s" .Chart.Name .Release.Name | lower -}}
{{- end -}}
86 changes: 86 additions & 0 deletions helm/templates/deployment.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
apiVersion: apps/v1
kind: Deployment
metadata:
labels:
app: {{ template "app.name" . }}
{{- if .Values.deployment.additionalLabels }}
{{- toYaml .Values.deployment.additionalLabels | nindent 4 }}
{{- end }}
name: {{ template "app.name" . }}
namespace: {{ .Release.Namespace }}
spec:
replicas: {{ .Values.deployment.replicas }}
selector:
matchLabels:
app: {{ template "app.name" . }}
{{- if .Values.deployment.updateStrategy }}
strategy: {{- toYaml .Values.deployment.updateStrategy | nindent 4 }}
{{- end }}
template:
metadata:
labels:
app: {{ template "app.name" . }}
{{- if .Values.deployment.additionalPodLabels }}
{{- toYaml .Values.deployment.additionalPodLabels | nindent 8 }}
{{- end }}
{{- if .Values.deployment.additionalPodAnnotations }}
annotations: {{- toYaml .Values.deployment.additionalPodAnnotations | nindent 8 }}
{{- end }}
spec:
{{- if .Values.deployment.affinity }}
affinity:
{{- toYaml .Values.deployment.affinity | nindent 8 }}
{{- end }}
containers:
- args:
- --model-id
- {{ .Values.deployment.args.modelId }}
- --max-input-length
- {{ .Values.deployment.args.maxInputLength | quote }}
- --max-total-tokens
- {{ .Values.deployment.args.maxTotalTokens | quote }}
- --max-batch-total-tokens
- {{ .Values.deployment.args.maxBatchTotalTokens | quote }}
- --max-batch-prefill-tokens
- {{ .Values.deployment.args.maxBatchPrefillTokens | quote }}
- --sharded
- {{ .Values.deployment.args.sharded | quote }}
env:
- name: PORT
value: "8000"
- name: HUGGING_FACE_HUB_TOKEN
value: {{ .Values.deployment.env.huggingFaceHubToken | quote }}
- name: LORAX_ENABLED_MODEL_TYPES
value: {{ .Values.deployment.env.loraxEnabledModelTypes | quote }}
image: {{ .Values.deployment.image.repository }}:{{ .Values.deployment.image.tag }}
imagePullPolicy: IfNotPresent
livenessProbe: {{ toYaml .Values.deployment.livenessProbe | nindent 10 }}
name: lorax
ports:
- containerPort: 8000
name: http
protocol: TCP
readinessProbe: {{ toYaml .Values.deployment.readinessProbe | nindent 10 }}
resources: {{ toYaml .Values.deployment.resources | nindent 10 }}
volumeMounts:
- mountPath: /data
name: data
- mountPath: /dev/shm
name: shm
{{- if .Values.deployment.tolerations }}
tolerations:
{{- toYaml .Values.deployment.tolerations | nindent 6 }}
{{- end }}
nodeSelector: {{ toYaml .Values.deployment.nodeSelector | nindent 8 }}
restartPolicy: Always
schedulerName: default-scheduler
terminationGracePeriodSeconds: 30
{{- if .Values.priorityClassName }}
priorityClassName: {{ .Values.deployment.priorityClassName | quote }}
{{- end }}
volumes:
- emptyDir:
medium: Memory
name: shm
- emptyDir:
name: data
21 changes: 21 additions & 0 deletions helm/templates/service.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
apiVersion: v1
kind: Service
metadata:
labels:
app: {{ template "app.name" . }}
app.kubernetes.io/name: {{ template "app.name" . }}
{{ if .Values.service.additionalLabels }}
{{- toYaml .Values.service.additionalLabels | nindent 4 }}
{{ end }}
name: {{ .Values.service.name }}
namespace: {{ .Release.Namespace }}
spec:
ports:
- name: http
port: {{ .Values.service.port }}
protocol: TCP
targetPort: http
selector:
app: {{ template "app.name" . }}
# sessionAffinity: None
type: {{ .Values.service.serviceType }}
74 changes: 74 additions & 0 deletions helm/values.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
deployment:
replicas: 1
updateStrategy: {}

image:
repository: "ghcr.io/predibase/lorax"
tag: "f76119a"

args:
modelId: "mistralai/Mistral-7B-Instruct-v0.1"
maxInputLength: 512
maxTotalTokens: 1024
maxBatchTotalTokens: 4096
maxBatchPrefillTokens: 2048
sharded: true

env:
# Your huggingface hub token. Required for some models such as the llama-2 family.
huggingFaceHubToken: ""

# Model types that support dynamic adapter loading
loraxEnabledModelTypes: "llama,mistral"

resources:
limits:
cpu: "8"
ephemeral-storage: 100Gi
memory: 27041Mi
nvidia.com/gpu: "1"
requests:
cpu: "8"
ephemeral-storage: 100Gi
memory: 27041Mi
nvidia.com/gpu: "1"

livenessProbe:
failureThreshold: 240
httpGet:
path: /health
port: http
scheme: HTTP
initialDelaySeconds: 5
periodSeconds: 5
successThreshold: 1
timeoutSeconds: 1

readinessProbe:
failureThreshold: 240
httpGet:
path: /health
port: http
scheme: HTTP
initialDelaySeconds: 5
periodSeconds: 5
successThreshold: 1
timeoutSeconds: 1

nodeSelector: {}
tolerations: []
additionalLabels: {}
additionalPodLabels: {}

additionalAnnotations: {}
additionalPodAnnotations: {}
affinity: {}

priorityClassName: ""

service:
name: "lorax"
serviceType: ClusterIP
port: 80
additionalLabels: {}

0 comments on commit c2a24e1

Please sign in to comment.