diff --git a/templates/http/base/deployment.yaml b/templates/http/base/deployment.yaml
index 87fa735..5c94a43 100644
--- a/templates/http/base/deployment.yaml
+++ b/templates/http/base/deployment.yaml
@@ -22,6 +22,7 @@ spec:
       labels:
         app.kubernetes.io/instance:  {{values.name}}
     spec:
+      {%- if values.vllmSelected == nil or not(values.vllmSelected) %}
       initContainers:
       - name: model-file
         image: {{values.initContainer}}
@@ -29,16 +30,39 @@ spec:
         volumeMounts:
         - name: model-file
           mountPath: /shared
+      {%- endif %}
       containers:
       - env:
         - name: MODEL_ENDPOINT
           value: http://0.0.0.0:{{values.modelServicePort}}
+        {%- if values.vllmSelected %}
+        - name: MODEL_NAME
+          value: "{{values.vllmModelName}}"
+        {%- endif %}
         image:  {{values.appContainer}}
         name: app-inference
         ports:
         - containerPort: {{values.appPort}}
         securityContext:
           runAsNonRoot: true
+      {%- if values.vllmSelected %}
+      - image: {{values.vllmModelServiceContainer}}
+        args: [
+            "--model",
+            "{{values.vllmModelName}}",
+            "--port",
+            "{{values.modelServicePort}}",
+            "--download-dir",
+            "/models-cache"]
+        resources:
+          limits:
+            nvidia.com/gpu: '1'
+        volumeMounts:
+        - name: dshm
+          mountPath: /dev/shm
+        - name: models-cache
+          mountPath: /models-cache
+      {%- else %}
       - env:
         - name: HOST
           value: "0.0.0.0"
@@ -47,14 +71,31 @@ spec:
         - name: MODEL_PATH
           value: /model/model.file
         image: {{values.modelServiceContainer}}
+        volumeMounts:
+        - name: model-file
+          mountPath: /model
+      {%- endif %}
         name: app-model-service
         ports:
         - containerPort: {{values.modelServicePort}}
         securityContext:
           runAsNonRoot: true
-        volumeMounts:
-        - name: model-file
-          mountPath: /model
+      {%- if values.vllmSelected %}
+      volumes:
+      - name: dshm
+        emptyDir:
+          medium: Memory
+          sizeLimit: "2Gi"
+      - name: models-cache
+        persistentVolumeClaim:
+          claimName: vllm-models-cache
+
+      tolerations:
+        - key: nvidia.com/gpu
+          operator: Exists
+          effect: NoSchedule
+      {%- else %}
       volumes:
       - name: model-file
         emptyDir: {}
+      {%- endif %}
diff --git a/templates/http/base/kustomization.yaml b/templates/http/base/kustomization.yaml
index 407ab41..d226825 100644
--- a/templates/http/base/kustomization.yaml
+++ b/templates/http/base/kustomization.yaml
@@ -8,6 +8,9 @@ commonLabels:
   app.kubernetes.io/part-of: {{values.name}}
 resources: 
 - initialize-namespace.yaml
+{%- if values.vllmSelected %}
+- pvc.yaml
+{%- endif %}
 - deployment.yaml
 - route.yaml
 - service.yaml
diff --git a/templates/http/base/pvc.yaml b/templates/http/base/pvc.yaml
new file mode 100644
index 0000000..f06aac0
--- /dev/null
+++ b/templates/http/base/pvc.yaml
@@ -0,0 +1,12 @@
+---
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: vllm-models-cache
+spec:
+  accessModes:
+    - ReadWriteOnce
+  volumeMode: Filesystem
+  resources:
+    requests:
+      storage: 100Gi
\ No newline at end of file