From 5704411802fad205809ef7d1c15f69f7c74ddd25 Mon Sep 17 00:00:00 2001
From: Stephanie <yangcao@redhat.com>
Date: Mon, 3 Jun 2024 11:07:45 -0400
Subject: [PATCH 1/5] first commit with some vllm definitions

Signed-off-by: Stephanie <yangcao@redhat.com>
---
 templates/http/base/deployment.yaml    | 47 ++++++++++++++++++++++++--
 templates/http/base/kustomization.yaml |  3 ++
 templates/http/base/pvc.yaml           | 12 +++++++
 3 files changed, 59 insertions(+), 3 deletions(-)
 create mode 100644 templates/http/base/pvc.yaml

diff --git a/templates/http/base/deployment.yaml b/templates/http/base/deployment.yaml
index 87fa735..bba75ab 100644
--- a/templates/http/base/deployment.yaml
+++ b/templates/http/base/deployment.yaml
@@ -22,6 +22,7 @@ spec:
       labels:
         app.kubernetes.io/instance:  {{values.name}}
     spec:
+      {%- if !values.vllmSelected %}
       initContainers:
       - name: model-file
         image: {{values.initContainer}}
@@ -29,6 +30,7 @@ spec:
         volumeMounts:
         - name: model-file
           mountPath: /shared
+      {%- endif %}
       containers:
       - env:
         - name: MODEL_ENDPOINT
@@ -39,6 +41,28 @@ spec:
         - containerPort: {{values.appPort}}
         securityContext:
           runAsNonRoot: true
+      {%- if values.vllmSelected %}
+      - image: {{values.vllmModelServiceContainer}}
+        args: [
+            "--model",
+            "{{values.vllmModelName}}",
+            "--port",
+            "{{values.modelServicePort}}",
+            "--download-dir",
+            "/models-cache"]
+        resources:
+          limits:
+            cpu: '8'
+            memory: 24Gi
+            nvidia.com/gpu: '1'
+          requests:
+            cpu: '6'
+        volumeMounts:
+        - name: dshm
+          mountPath: /dev/shm
+        - name: models-cache
+          mountPath: /models-cache
+      {%- else %}
       - env:
         - name: HOST
           value: "0.0.0.0"
@@ -47,14 +71,31 @@ spec:
         - name: MODEL_PATH
           value: /model/model.file
         image: {{values.modelServiceContainer}}
+        volumeMounts:
+        - name: model-file
+          mountPath: /model
+      {%- endif %}
         name: app-model-service
         ports:
         - containerPort: {{values.modelServicePort}}
         securityContext:
           runAsNonRoot: true
-        volumeMounts:
-        - name: model-file
-          mountPath: /model
+      {%- if values.vllmSelected %}
+      volumes:
+      - name: dshm
+        emptyDir:
+          medium: Memory
+          sizeLimit: "2Gi"
+      - name: models-cache
+        persistentVolumeClaim:
+          claimName: vllm-models-cache
+
+      tolerations:
+        - key: nvidia.com/gpu
+          operator: Exists
+          effect: NoSchedule
+      {%- else %}
       volumes:
       - name: model-file
         emptyDir: {}
+      {%- endif %}
diff --git a/templates/http/base/kustomization.yaml b/templates/http/base/kustomization.yaml
index 407ab41..d226825 100644
--- a/templates/http/base/kustomization.yaml
+++ b/templates/http/base/kustomization.yaml
@@ -8,6 +8,9 @@ commonLabels:
   app.kubernetes.io/part-of: {{values.name}}
 resources: 
 - initialize-namespace.yaml
+{%- if values.vllmSelected %}
+- pvc.yaml
+{%- endif %}
 - deployment.yaml
 - route.yaml
 - service.yaml
diff --git a/templates/http/base/pvc.yaml b/templates/http/base/pvc.yaml
new file mode 100644
index 0000000..f06aac0
--- /dev/null
+++ b/templates/http/base/pvc.yaml
@@ -0,0 +1,12 @@
+---
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: vllm-models-cache
+spec:
+  accessModes:
+    - ReadWriteOnce
+  volumeMode: Filesystem
+  resources:
+    requests:
+      storage: 100Gi
\ No newline at end of file

From d40584466eb9f0ca7cae29200f73ed49c429c85e Mon Sep 17 00:00:00 2001
From: Stephanie <yangcao@redhat.com>
Date: Mon, 3 Jun 2024 17:34:32 -0400
Subject: [PATCH 2/5] add an ENV and also update the condition

Signed-off-by: Stephanie <yangcao@redhat.com>
---
 templates/http/base/deployment.yaml | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/templates/http/base/deployment.yaml b/templates/http/base/deployment.yaml
index bba75ab..3de6057 100644
--- a/templates/http/base/deployment.yaml
+++ b/templates/http/base/deployment.yaml
@@ -22,7 +22,7 @@ spec:
       labels:
         app.kubernetes.io/instance:  {{values.name}}
     spec:
-      {%- if !values.vllmSelected %}
+      {%- if values.vllmSelected == "false" or values.vllmSelected == nil %}
       initContainers:
       - name: model-file
         image: {{values.initContainer}}
@@ -42,7 +42,10 @@ spec:
         securityContext:
           runAsNonRoot: true
       {%- if values.vllmSelected %}
-      - image: {{values.vllmModelServiceContainer}}
+      - env:
+        - name: MODEL_NAME
+          value: "{{values.vllmModelName}}"
+        image: {{values.vllmModelServiceContainer}}
         args: [
             "--model",
             "{{values.vllmModelName}}",
@@ -52,11 +55,7 @@ spec:
             "/models-cache"]
         resources:
           limits:
-            cpu: '8'
-            memory: 24Gi
             nvidia.com/gpu: '1'
-          requests:
-            cpu: '6'
         volumeMounts:
         - name: dshm
           mountPath: /dev/shm

From 41f6534e603aafcfe17e481e65af731908f04d0a Mon Sep 17 00:00:00 2001
From: Stephanie <yangcao@redhat.com>
Date: Mon, 3 Jun 2024 17:37:51 -0400
Subject: [PATCH 3/5] update codition

Signed-off-by: Stephanie <yangcao@redhat.com>
---
 templates/http/base/deployment.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/templates/http/base/deployment.yaml b/templates/http/base/deployment.yaml
index 3de6057..07c8f18 100644
--- a/templates/http/base/deployment.yaml
+++ b/templates/http/base/deployment.yaml
@@ -22,7 +22,7 @@ spec:
       labels:
         app.kubernetes.io/instance:  {{values.name}}
     spec:
-      {%- if values.vllmSelected == "false" or values.vllmSelected == nil %}
+      {%- if values.vllmSelected == false or values.vllmSelected == nil %}
       initContainers:
       - name: model-file
         image: {{values.initContainer}}

From 3a0f19a1b5c326725a127aa45dc83ad7ef3241c2 Mon Sep 17 00:00:00 2001
From: Stephanie <yangcao@redhat.com>
Date: Mon, 3 Jun 2024 17:43:52 -0400
Subject: [PATCH 4/5] condition worked

Signed-off-by: Stephanie <yangcao@redhat.com>
---
 templates/http/base/deployment.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/templates/http/base/deployment.yaml b/templates/http/base/deployment.yaml
index 07c8f18..3d40ed1 100644
--- a/templates/http/base/deployment.yaml
+++ b/templates/http/base/deployment.yaml
@@ -22,7 +22,7 @@ spec:
       labels:
         app.kubernetes.io/instance:  {{values.name}}
     spec:
-      {%- if values.vllmSelected == false or values.vllmSelected == nil %}
+      {%- if values.vllmSelected == nil or not(values.vllmSelected) %}
       initContainers:
       - name: model-file
         image: {{values.initContainer}}

From ddf04dbf01116eecf80d13072a66c7e9a0e05260 Mon Sep 17 00:00:00 2001
From: Stephanie <yangcao@redhat.com>
Date: Mon, 3 Jun 2024 17:57:13 -0400
Subject: [PATCH 5/5] move the env to right place

Signed-off-by: Stephanie <yangcao@redhat.com>
---
 templates/http/base/deployment.yaml | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/templates/http/base/deployment.yaml b/templates/http/base/deployment.yaml
index 3d40ed1..5c94a43 100644
--- a/templates/http/base/deployment.yaml
+++ b/templates/http/base/deployment.yaml
@@ -35,6 +35,10 @@ spec:
       - env:
         - name: MODEL_ENDPOINT
           value: http://0.0.0.0:{{values.modelServicePort}}
+        {%- if values.vllmSelected %}
+        - name: MODEL_NAME
+          value: "{{values.vllmModelName}}"
+        {%- endif %}
         image:  {{values.appContainer}}
         name: app-inference
         ports:
@@ -42,10 +46,7 @@ spec:
         securityContext:
           runAsNonRoot: true
       {%- if values.vllmSelected %}
-      - env:
-        - name: MODEL_NAME
-          value: "{{values.vllmModelName}}"
-        image: {{values.vllmModelServiceContainer}}
+      - image: {{values.vllmModelServiceContainer}}
         args: [
             "--model",
             "{{values.vllmModelName}}",