Add Helm charts to deploy models (#27)

Co-authored-by: Noah Yoshida <noahcy117@gmail.com>
predibase · Nov 16, 2023 · c2a24e1 · c2a24e1
1 parent e59993b
commit c2a24e1
Show file tree

Hide file tree

Showing 6 changed files with 191 additions and 0 deletions.
diff --git a/helm/.helmignore b/helm/.helmignore
@@ -0,0 +1,3 @@
+docs/
+integration-tests/
+load_tests/
diff --git a/helm/Chart.yaml b/helm/Chart.yaml
@@ -0,0 +1,4 @@
+apiVersion: v2
+name: LoRAX
+description: A Helm chart for LoRAX
+version: 0.1.0
diff --git a/helm/templates/_helpers.tpl b/helm/templates/_helpers.tpl
@@ -0,0 +1,3 @@
+{{- define "app.name" -}}
+{{- printf "%s-%s" .Chart.Name .Release.Name | lower -}}
+{{- end -}}
diff --git a/helm/templates/deployment.yaml b/helm/templates/deployment.yaml
@@ -0,0 +1,86 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  labels:
+    app: {{ template "app.name" . }}
+    {{- if .Values.deployment.additionalLabels }}
+      {{- toYaml .Values.deployment.additionalLabels | nindent 4 }}
+    {{- end }}
+  name: {{ template "app.name" . }}
+  namespace: {{ .Release.Namespace }}
+spec:
+  replicas: {{ .Values.deployment.replicas }}
+  selector:
+    matchLabels:
+      app: {{ template "app.name" . }}
+  {{- if .Values.deployment.updateStrategy }}
+  strategy: {{- toYaml .Values.deployment.updateStrategy | nindent 4 }}
+  {{- end }}
+  template:
+    metadata:
+      labels:
+        app: {{ template "app.name" . }}
+        {{- if .Values.deployment.additionalPodLabels }}
+          {{- toYaml .Values.deployment.additionalPodLabels | nindent 8 }}
+        {{- end }}
+      {{- if .Values.deployment.additionalPodAnnotations }}
+      annotations: {{- toYaml .Values.deployment.additionalPodAnnotations | nindent 8 }}
+      {{- end }}
+    spec:
+      {{- if .Values.deployment.affinity }}
+      affinity:
+        {{- toYaml .Values.deployment.affinity | nindent 8 }}
+      {{- end }}
+      containers:
+      - args:
+        - --model-id
+        - {{ .Values.deployment.args.modelId }}
+        - --max-input-length
+        - {{ .Values.deployment.args.maxInputLength | quote }}
+        - --max-total-tokens
+        - {{ .Values.deployment.args.maxTotalTokens | quote }}
+        - --max-batch-total-tokens
+        - {{ .Values.deployment.args.maxBatchTotalTokens | quote }}
+        - --max-batch-prefill-tokens
+        - {{ .Values.deployment.args.maxBatchPrefillTokens | quote }}
+        - --sharded
+        - {{ .Values.deployment.args.sharded | quote }}
+        env:
+        - name: PORT
+          value: "8000"
+        - name: HUGGING_FACE_HUB_TOKEN
+          value: {{ .Values.deployment.env.huggingFaceHubToken | quote }}
+        - name: LORAX_ENABLED_MODEL_TYPES
+          value: {{ .Values.deployment.env.loraxEnabledModelTypes | quote }}
+        image: {{ .Values.deployment.image.repository }}:{{ .Values.deployment.image.tag }}
+        imagePullPolicy: IfNotPresent
+        livenessProbe: {{ toYaml .Values.deployment.livenessProbe | nindent 10 }}
+        name: lorax
+        ports:
+        - containerPort: 8000
+          name: http
+          protocol: TCP
+        readinessProbe: {{ toYaml .Values.deployment.readinessProbe | nindent 10 }}
+        resources: {{ toYaml .Values.deployment.resources | nindent 10 }}
+        volumeMounts:
+        - mountPath: /data
+          name: data
+        - mountPath: /dev/shm
+          name: shm
+      {{- if .Values.deployment.tolerations }}
+      tolerations: 
+        {{- toYaml .Values.deployment.tolerations | nindent 6 }}
+      {{- end }}
+      nodeSelector: {{ toYaml .Values.deployment.nodeSelector | nindent 8 }}
+      restartPolicy: Always
+      schedulerName: default-scheduler
+      terminationGracePeriodSeconds: 30
+      {{- if .Values.priorityClassName }}
+      priorityClassName: {{ .Values.deployment.priorityClassName | quote }}
+      {{- end }}
+      volumes:
+      - emptyDir:
+          medium: Memory
+        name: shm
+      - emptyDir:
+        name: data
diff --git a/helm/templates/service.yaml b/helm/templates/service.yaml
@@ -0,0 +1,21 @@
+apiVersion: v1
+kind: Service
+metadata:
+  labels:
+    app: {{ template "app.name" . }}
+    app.kubernetes.io/name: {{ template "app.name" . }}
+    {{ if .Values.service.additionalLabels }}
+      {{- toYaml .Values.service.additionalLabels | nindent 4 }}
+    {{ end }}
+  name: {{ .Values.service.name }}
+  namespace: {{ .Release.Namespace }}
+spec:
+  ports:
+  - name: http
+    port: {{ .Values.service.port }}
+    protocol: TCP
+    targetPort: http
+  selector:
+    app: {{ template "app.name" . }}
+  # sessionAffinity: None
+  type: {{ .Values.service.serviceType }}
diff --git a/helm/values.yaml b/helm/values.yaml
@@ -0,0 +1,74 @@
+deployment:
+  replicas: 1
+  updateStrategy: {}
+
+  image:
+    repository: "ghcr.io/predibase/lorax"
+    tag: "f76119a"
+
+  args:
+    modelId: "mistralai/Mistral-7B-Instruct-v0.1"
+    maxInputLength: 512
+    maxTotalTokens: 1024
+    maxBatchTotalTokens: 4096
+    maxBatchPrefillTokens: 2048
+    sharded: true
+
+  env:
+    # Your huggingface hub token. Required for some models such as the llama-2 family.
+    huggingFaceHubToken: ""
+
+    # Model types that support dynamic adapter loading
+    loraxEnabledModelTypes: "llama,mistral"
+
+  resources:
+    limits:
+      cpu: "8"
+      ephemeral-storage: 100Gi
+      memory: 27041Mi
+      nvidia.com/gpu: "1"
+    requests:
+      cpu: "8"
+      ephemeral-storage: 100Gi
+      memory: 27041Mi
+      nvidia.com/gpu: "1"
+
+  livenessProbe: 
+    failureThreshold: 240
+    httpGet:
+      path: /health
+      port: http
+      scheme: HTTP
+    initialDelaySeconds: 5
+    periodSeconds: 5
+    successThreshold: 1
+    timeoutSeconds: 1
+
+  readinessProbe: 
+    failureThreshold: 240
+    httpGet:
+      path: /health
+      port: http
+      scheme: HTTP
+    initialDelaySeconds: 5
+    periodSeconds: 5
+    successThreshold: 1
+    timeoutSeconds: 1
+
+  nodeSelector: {}
+  tolerations: []
+  additionalLabels: {}
+  additionalPodLabels: {}
+
+  additionalAnnotations: {}
+  additionalPodAnnotations: {}
+  affinity: {}
+
+  priorityClassName: ""
+
+service:
+  name: "lorax"
+  serviceType: ClusterIP
+  port: 80
+  additionalLabels: {}
+