From 7933a59712ddabd98c699d57e0ee8cf3ff7eef64 Mon Sep 17 00:00:00 2001 From: Stephanie Date: Wed, 12 Jun 2024 12:02:10 -0400 Subject: [PATCH 1/2] extract model server out Signed-off-by: Stephanie --- .../http/base/deployment-model-server.yaml | 90 +++++++++++++++++++ templates/http/base/deployment.yaml | 69 +------------- templates/http/base/kustomization.yaml | 2 + templates/http/base/service-model-server.yaml | 15 ++++ 4 files changed, 109 insertions(+), 67 deletions(-) create mode 100644 templates/http/base/deployment-model-server.yaml create mode 100644 templates/http/base/service-model-server.yaml diff --git a/templates/http/base/deployment-model-server.yaml b/templates/http/base/deployment-model-server.yaml new file mode 100644 index 0000000..4409cf2 --- /dev/null +++ b/templates/http/base/deployment-model-server.yaml @@ -0,0 +1,90 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + annotations: + tad.gitops.set/image: ".spec.template.spec.containers[0].image" + tad.gitops.get/image: ".spec.template.spec.containers[0].image" + tad.gitops.set/replicas: ".spec.replicas" + tad.gitops.get/replicas: ".spec.replicas" + labels: + app.kubernetes.io/instance: {{values.name}}-model-server + app.kubernetes.io/managed-by: kustomize + app.kubernetes.io/name: {{values.name}}-model-server + app.kubernetes.io/part-of: {{values.name}} + name: {{values.name}}-model-server +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/instance: {{values.name}}-model-server + template: + metadata: + labels: + app.kubernetes.io/instance: {{values.name}}-model-server + spec: + {%- if values.vllmSelected == nil or not(values.vllmSelected) %} + initContainers: + - name: model-file + image: {{values.initContainer}} + command: ['/usr/bin/install', "/model/model.file", "/shared/"] + volumeMounts: + - name: model-file + mountPath: /shared + {%- endif %} + containers: + {%- if values.vllmSelected %} + - image: {{values.vllmModelServiceContainer}} + args: [ + "--model", + "{{values.vllmModelName}}", + "--port", + "{{values.modelServicePort}}", + "--download-dir", + "/models-cache", + "--max-model-len", + "{{values.maxModelLength}}"] + resources: + limits: + nvidia.com/gpu: '1' + volumeMounts: + - name: dshm + mountPath: /dev/shm + - name: models-cache + mountPath: /models-cache + {%- else %} + - env: + - name: HOST + value: "0.0.0.0" + - name: PORT + value: "{{values.modelServicePort}}" + - name: MODEL_PATH + value: /model/model.file + image: {{values.modelServiceContainer}} + volumeMounts: + - name: model-file + mountPath: /model + {%- endif %} + name: app-model-service + ports: + - containerPort: {{values.modelServicePort}} + securityContext: + runAsNonRoot: true + {%- if values.vllmSelected %} + volumes: + - name: dshm + emptyDir: + medium: Memory + sizeLimit: "2Gi" + - name: models-cache + persistentVolumeClaim: + claimName: {{values.name}} + + tolerations: + - key: nvidia.com/gpu + operator: Exists + effect: NoSchedule + {%- else %} + volumes: + - name: model-file + emptyDir: {} + {%- endif %} diff --git a/templates/http/base/deployment.yaml b/templates/http/base/deployment.yaml index aab6a21..d316e47 100644 --- a/templates/http/base/deployment.yaml +++ b/templates/http/base/deployment.yaml @@ -22,19 +22,10 @@ spec: labels: app.kubernetes.io/instance: {{values.name}} spec: - {%- if values.vllmSelected == nil or not(values.vllmSelected) %} - initContainers: - - name: model-file - image: {{values.initContainer}} - command: ['/usr/bin/install', "/model/model.file", "/shared/"] - volumeMounts: - - name: model-file - mountPath: /shared - {%- endif %} containers: - env: - name: MODEL_ENDPOINT - value: http://0.0.0.0:{{values.modelServicePort}} + value: http://{{values.name}}-model-server:{{values.modelServicePort}} {%- if values.vllmSelected %} - name: MODEL_NAME value: "{{values.vllmModelName}}" @@ -44,60 +35,4 @@ spec: ports: - containerPort: {{values.appPort}} securityContext: - runAsNonRoot: true - {%- if values.vllmSelected %} - - image: {{values.vllmModelServiceContainer}} - args: [ - "--model", - "{{values.vllmModelName}}", - "--port", - "{{values.modelServicePort}}", - "--download-dir", - "/models-cache", - "--max-model-len", - "{{values.maxModelLength}}"] - resources: - limits: - nvidia.com/gpu: '1' - volumeMounts: - - name: dshm - mountPath: /dev/shm - - name: models-cache - mountPath: /models-cache - {%- else %} - - env: - - name: HOST - value: "0.0.0.0" - - name: PORT - value: "{{values.modelServicePort}}" - - name: MODEL_PATH - value: /model/model.file - image: {{values.modelServiceContainer}} - volumeMounts: - - name: model-file - mountPath: /model - {%- endif %} - name: app-model-service - ports: - - containerPort: {{values.modelServicePort}} - securityContext: - runAsNonRoot: true - {%- if values.vllmSelected %} - volumes: - - name: dshm - emptyDir: - medium: Memory - sizeLimit: "2Gi" - - name: models-cache - persistentVolumeClaim: - claimName: {{values.name}} - - tolerations: - - key: nvidia.com/gpu - operator: Exists - effect: NoSchedule - {%- else %} - volumes: - - name: model-file - emptyDir: {} - {%- endif %} + runAsNonRoot: true \ No newline at end of file diff --git a/templates/http/base/kustomization.yaml b/templates/http/base/kustomization.yaml index d226825..95bbaa9 100644 --- a/templates/http/base/kustomization.yaml +++ b/templates/http/base/kustomization.yaml @@ -11,6 +11,8 @@ resources: {%- if values.vllmSelected %} - pvc.yaml {%- endif %} +- deployment-model-server.yaml +- service-model-server.yaml - deployment.yaml - route.yaml - service.yaml diff --git a/templates/http/base/service-model-server.yaml b/templates/http/base/service-model-server.yaml new file mode 100644 index 0000000..8a496eb --- /dev/null +++ b/templates/http/base/service-model-server.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: Service +metadata: + labels: + app.kubernetes.io/instance: {{values.name}}-model-server + app.kubernetes.io/managed-by: kustomize + app.kubernetes.io/name: {{values.name}}-model-server + name: {{values.name}}-model-server +spec: + ports: + - port: {{values.modelServicePort}} + protocol: TCP + targetPort: {{values.modelServicePort}} + selector: + app.kubernetes.io/instance: {{values.name}}-model-server From b43580d0e941054f0e8e0e22ee400826f57933c6 Mon Sep 17 00:00:00 2001 From: Stephanie Date: Wed, 12 Jun 2024 14:30:23 -0400 Subject: [PATCH 2/2] fix chat format Signed-off-by: Stephanie --- templates/http/base/deployment-model-server.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/templates/http/base/deployment-model-server.yaml b/templates/http/base/deployment-model-server.yaml index 4409cf2..5d01d03 100644 --- a/templates/http/base/deployment-model-server.yaml +++ b/templates/http/base/deployment-model-server.yaml @@ -59,6 +59,8 @@ spec: value: "{{values.modelServicePort}}" - name: MODEL_PATH value: /model/model.file + - name: CHAT_FORMAT + value: openchat image: {{values.modelServiceContainer}} volumeMounts: - name: model-file