Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Helm charts to deploy models #27

Merged
merged 7 commits into from
Nov 16, 2023
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions helm/.helmignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
docs/
integration-tests/
load_tests/
4 changes: 4 additions & 0 deletions helm/Chart.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
apiVersion: v2
name: LoRAX
description: A Helm chart for LoRAX
version: 0.1.0
Empty file added helm/Chart.yaml~
abidwael marked this conversation as resolved.
Show resolved Hide resolved
Empty file.
3 changes: 3 additions & 0 deletions helm/templates/_helpers.tpl
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{{- define "llmDeployment.name" -}}
{{ .Values.llmDeployment.name }}
{{- end -}}
112 changes: 112 additions & 0 deletions helm/templates/deployment.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
apiVersion: apps/v1
kind: Deployment
metadata:
labels:
app: {{ template "llmDeployment.name" . }}
{{- if .Values.llmDeployment.k8sDeploymentObject.metadata.additionalLabels }}
{{- toYaml .Values.llmDeployment.k8sDeploymentObject.metadata.additionalLabels | nindent 4 }}
{{- end }}
name: {{ template "llmDeployment.name" . }}
namespace: {{ default .Release.Namespace "default" }}
spec:
progressDeadlineSeconds: 600
replicas: {{ .Values.llmDeployment.replicas }}
revisionHistoryLimit: 10
selector:
matchLabels:
app: {{ template "llmDeployment.name" . }}
strategy:
type: {{ .Values.llmDeployment.updateStrategy }}
template:
metadata:
labels:
app: {{ template "llmDeployment.name" . }}
{{- if .Values.llmDeployment.k8sDeploymentObject.spec.template.metadata.additionalLabels }}
{{- toYaml .Values.llmDeployment.k8sDeploymentObject.spec.template.metadata.additionalLabels | nindent 8 }}
{{- end }}
{{- if .Values.llmDeployment.k8sDeploymentObject.spec.template.metadata.additionalAnnotations }}
annotations: {{- toYaml .Values.llmDeployment.k8sDeploymentObject.spec.template.metadata.additionalAnnotations | nindent 8 }}
{{- end }}

spec:
{{- if .Values.llmDeployment.k8sDeploymentObject.spec.template.spec.affinity }}
affinity:
{{- toYaml .Values.llmDeployment.k8sDeploymentObject.spec.template.spec.affinity | nindent 8 }}
{{- end }}
containers:
- args:
- --model-id
- {{ .Values.llmDeployment.args.modelId }}
- --max-input-length
- {{ .Values.llmDeployment.args.maxInputLength | quote }}
- --max-total-tokens
- {{ .Values.llmDeployment.args.maxTotalTokens | quote }}
- --max-batch-total-tokens
- {{ .Values.llmDeployment.args.maxBatchTotalTokens | quote }}
- --max-batch-prefill-tokens
- {{ .Values.llmDeployment.args.maxBatchPrefillTokens | quote }}
- --sharded
- {{ .Values.llmDeployment.args.sharded | quote }}
env:
- name: PORT
value: "8000"
- name: HUGGING_FACE_HUB_TOKEN
value: {{ .Values.llmDeployment.env.huggingFaceHubToken | quote }}
- name: LORAX_ENABLED_MODEL_TYPES
value: {{ .Values.llmDeployment.env.loraxEnabledModelTypes | quote }}
image: {{ .Values.llmDeployment.image.repository }}:{{ .Values.llmDeployment.image.tag }}
imagePullPolicy: IfNotPresent
livenessProbe:
failureThreshold: 240
httpGet:
path: /health
port: http
scheme: HTTP
initialDelaySeconds: 5
periodSeconds: 5
successThreshold: 1
timeoutSeconds: 1
name: lorax
ports:
- containerPort: 8000
name: http
protocol: TCP
readinessProbe:
failureThreshold: 240
httpGet:
path: /health
port: http
scheme: HTTP
initialDelaySeconds: 5
periodSeconds: 5
successThreshold: 1
timeoutSeconds: 1
resources: {{- toYaml .Values.resources | nindent 10 }}
terminationMessagePath: /dev/termination-log
terminationMessagePolicy: File
volumeMounts:
- mountPath: /data
name: data
- mountPath: /dev/shm
name: shm
dnsPolicy: ClusterFirst
enableServiceLinks: false
{{- if .Values.llmDeployment.k8sDeploymentObject.spec.template.spec.nodeSelector }}
nodeSelector:
{{- toYaml .Values.llmDeployment.k8sDeploymentObject.spec.template.spec.nodeSelector | nindent 8 }}
{{- end }}
{{- if .Values.llmDeployment.k8sDeploymentObject.spec.template.spec.tolerations }}
tolerations:
{{- toYaml .Values.llmDeployment.k8sDeploymentObject.spec.template.spec.tolerations | nindent 6 }}
{{- end }}
nodeSelector: {{- toYaml .Values.llmDeployment.nodeSelector | nindent 8 }}
restartPolicy: Always
schedulerName: default-scheduler
securityContext: {}
terminationGracePeriodSeconds: 30
volumes:
- emptyDir:
medium: Memory
name: shm
- emptyDir:
name: data
26 changes: 26 additions & 0 deletions helm/templates/service.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
apiVersion: v1
kind: Service
metadata:
labels:
app: {{ template "llmDeployment.name" . }}
app.kubernetes.io/name: {{ template "llmDeployment.name" . }}
{{- if .Values.llmService.additionalLabels }}
{{- toYaml .Values.llmService.additionalLabels | nindent 4 }}
{{- end }}

name: {{ template "llmDeployment.name" . }}
namespace: {{ .Release.Namespace }}
spec:
internalTrafficPolicy: Cluster
ipFamilies:
- IPv4
ipFamilyPolicy: SingleStack
ports:
- name: http
port: 80
protocol: TCP
targetPort: http
selector:
app: {{ template "llmDeployment.name" . }}
sessionAffinity: None
type: {{ .Values.llmService.serviceType }}
43 changes: 43 additions & 0 deletions helm/values.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
llmDeployment:
name: "llm-deployment-example"
namespace: "default"
replicas: 1
updateStrategy: RollingUpdate

# additionalLabels:
# some-label: your-deployment-label

# specTemplate:
# metadata:
# ... add metadata here ...

image:
repository: "ghcr.io/predibase/lorax"
tag: "f76119a"
args:
modelId: "model-id-from-huggingface"
abidwael marked this conversation as resolved.
Show resolved Hide resolved
maxInputLength: 512
maxTotalTokens: 1024
maxBatchTotalTokens: 4096
maxBatchPrefillTokens: 2048
sharded: true
env:
huggingFaceHubToken: "your-hub-token"
loraxEnabledModelTypes: "llama,mistral"
nodeSelector:
node.kubernetes.io/instance-type: g5.2xlarge # this will only work on AWS. Update this to your liking.
resources:
limits:
cpu: "8"
ephemeral-storage: 100Gi
memory: 27041Mi
nvidia.com/gpu: "1"
requests:
cpu: "8"
ephemeral-storage: 100Gi
memory: 27041Mi
nvidia.com/gpu: "1"
llmService:
serviceType: LoadBalancer
# additionalLabels:
# some-label: your-service-label