From 5704411802fad205809ef7d1c15f69f7c74ddd25 Mon Sep 17 00:00:00 2001 From: Stephanie Date: Mon, 3 Jun 2024 11:07:45 -0400 Subject: [PATCH 1/5] first commit with some vllm definitions Signed-off-by: Stephanie --- templates/http/base/deployment.yaml | 47 ++++++++++++++++++++++++-- templates/http/base/kustomization.yaml | 3 ++ templates/http/base/pvc.yaml | 12 +++++++ 3 files changed, 59 insertions(+), 3 deletions(-) create mode 100644 templates/http/base/pvc.yaml diff --git a/templates/http/base/deployment.yaml b/templates/http/base/deployment.yaml index 87fa735..bba75ab 100644 --- a/templates/http/base/deployment.yaml +++ b/templates/http/base/deployment.yaml @@ -22,6 +22,7 @@ spec: labels: app.kubernetes.io/instance: {{values.name}} spec: + {%- if !values.vllmSelected %} initContainers: - name: model-file image: {{values.initContainer}} @@ -29,6 +30,7 @@ spec: volumeMounts: - name: model-file mountPath: /shared + {%- endif %} containers: - env: - name: MODEL_ENDPOINT @@ -39,6 +41,28 @@ spec: - containerPort: {{values.appPort}} securityContext: runAsNonRoot: true + {%- if values.vllmSelected %} + - image: {{values.vllmModelServiceContainer}} + args: [ + "--model", + "{{values.vllmModelName}}", + "--port", + "{{values.modelServicePort}}", + "--download-dir", + "/models-cache"] + resources: + limits: + cpu: '8' + memory: 24Gi + nvidia.com/gpu: '1' + requests: + cpu: '6' + volumeMounts: + - name: dshm + mountPath: /dev/shm + - name: models-cache + mountPath: /models-cache + {%- else %} - env: - name: HOST value: "0.0.0.0" @@ -47,14 +71,31 @@ spec: - name: MODEL_PATH value: /model/model.file image: {{values.modelServiceContainer}} + volumeMounts: + - name: model-file + mountPath: /model + {%- endif %} name: app-model-service ports: - containerPort: {{values.modelServicePort}} securityContext: runAsNonRoot: true - volumeMounts: - - name: model-file - mountPath: /model + {%- if values.vllmSelected %} + volumes: + - name: dshm + emptyDir: + medium: Memory + sizeLimit: "2Gi" + - name: models-cache + persistentVolumeClaim: + claimName: vllm-models-cache + + tolerations: + - key: nvidia.com/gpu + operator: Exists + effect: NoSchedule + {%- else %} volumes: - name: model-file emptyDir: {} + {%- endif %} diff --git a/templates/http/base/kustomization.yaml b/templates/http/base/kustomization.yaml index 407ab41..d226825 100644 --- a/templates/http/base/kustomization.yaml +++ b/templates/http/base/kustomization.yaml @@ -8,6 +8,9 @@ commonLabels: app.kubernetes.io/part-of: {{values.name}} resources: - initialize-namespace.yaml +{%- if values.vllmSelected %} +- pvc.yaml +{%- endif %} - deployment.yaml - route.yaml - service.yaml diff --git a/templates/http/base/pvc.yaml b/templates/http/base/pvc.yaml new file mode 100644 index 0000000..f06aac0 --- /dev/null +++ b/templates/http/base/pvc.yaml @@ -0,0 +1,12 @@ +--- +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: vllm-models-cache +spec: + accessModes: + - ReadWriteOnce + volumeMode: Filesystem + resources: + requests: + storage: 100Gi \ No newline at end of file From d40584466eb9f0ca7cae29200f73ed49c429c85e Mon Sep 17 00:00:00 2001 From: Stephanie Date: Mon, 3 Jun 2024 17:34:32 -0400 Subject: [PATCH 2/5] add an ENV and also update the condition Signed-off-by: Stephanie --- templates/http/base/deployment.yaml | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/templates/http/base/deployment.yaml b/templates/http/base/deployment.yaml index bba75ab..3de6057 100644 --- a/templates/http/base/deployment.yaml +++ b/templates/http/base/deployment.yaml @@ -22,7 +22,7 @@ spec: labels: app.kubernetes.io/instance: {{values.name}} spec: - {%- if !values.vllmSelected %} + {%- if values.vllmSelected == "false" or values.vllmSelected == nil %} initContainers: - name: model-file image: {{values.initContainer}} @@ -42,7 +42,10 @@ spec: securityContext: runAsNonRoot: true {%- if values.vllmSelected %} - - image: {{values.vllmModelServiceContainer}} + - env: + - name: MODEL_NAME + value: "{{values.vllmModelName}}" + image: {{values.vllmModelServiceContainer}} args: [ "--model", "{{values.vllmModelName}}", @@ -52,11 +55,7 @@ spec: "/models-cache"] resources: limits: - cpu: '8' - memory: 24Gi nvidia.com/gpu: '1' - requests: - cpu: '6' volumeMounts: - name: dshm mountPath: /dev/shm From 41f6534e603aafcfe17e481e65af731908f04d0a Mon Sep 17 00:00:00 2001 From: Stephanie Date: Mon, 3 Jun 2024 17:37:51 -0400 Subject: [PATCH 3/5] update codition Signed-off-by: Stephanie --- templates/http/base/deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/templates/http/base/deployment.yaml b/templates/http/base/deployment.yaml index 3de6057..07c8f18 100644 --- a/templates/http/base/deployment.yaml +++ b/templates/http/base/deployment.yaml @@ -22,7 +22,7 @@ spec: labels: app.kubernetes.io/instance: {{values.name}} spec: - {%- if values.vllmSelected == "false" or values.vllmSelected == nil %} + {%- if values.vllmSelected == false or values.vllmSelected == nil %} initContainers: - name: model-file image: {{values.initContainer}} From 3a0f19a1b5c326725a127aa45dc83ad7ef3241c2 Mon Sep 17 00:00:00 2001 From: Stephanie Date: Mon, 3 Jun 2024 17:43:52 -0400 Subject: [PATCH 4/5] condition worked Signed-off-by: Stephanie --- templates/http/base/deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/templates/http/base/deployment.yaml b/templates/http/base/deployment.yaml index 07c8f18..3d40ed1 100644 --- a/templates/http/base/deployment.yaml +++ b/templates/http/base/deployment.yaml @@ -22,7 +22,7 @@ spec: labels: app.kubernetes.io/instance: {{values.name}} spec: - {%- if values.vllmSelected == false or values.vllmSelected == nil %} + {%- if values.vllmSelected == nil or not(values.vllmSelected) %} initContainers: - name: model-file image: {{values.initContainer}} From ddf04dbf01116eecf80d13072a66c7e9a0e05260 Mon Sep 17 00:00:00 2001 From: Stephanie Date: Mon, 3 Jun 2024 17:57:13 -0400 Subject: [PATCH 5/5] move the env to right place Signed-off-by: Stephanie --- templates/http/base/deployment.yaml | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/templates/http/base/deployment.yaml b/templates/http/base/deployment.yaml index 3d40ed1..5c94a43 100644 --- a/templates/http/base/deployment.yaml +++ b/templates/http/base/deployment.yaml @@ -35,6 +35,10 @@ spec: - env: - name: MODEL_ENDPOINT value: http://0.0.0.0:{{values.modelServicePort}} + {%- if values.vllmSelected %} + - name: MODEL_NAME + value: "{{values.vllmModelName}}" + {%- endif %} image: {{values.appContainer}} name: app-inference ports: @@ -42,10 +46,7 @@ spec: securityContext: runAsNonRoot: true {%- if values.vllmSelected %} - - env: - - name: MODEL_NAME - value: "{{values.vllmModelName}}" - image: {{values.vllmModelServiceContainer}} + - image: {{values.vllmModelServiceContainer}} args: [ "--model", "{{values.vllmModelName}}",