From a8db773974f153cd66d66308de0badbceb92e1eb Mon Sep 17 00:00:00 2001 From: seongsukwon-moreh Date: Tue, 3 Feb 2026 11:42:09 +0900 Subject: [PATCH 1/2] MAF-19231: feat(preset): add new InferenceServiceTemplates for Qwen and vllm-meta-llama models - Introduced InferenceServiceTemplates for Qwen/Qwen3-1.7B and vllm-meta-llama-3.2-1B-Instruct across AMD MI250 and MI300x configurations. - Configured environment variables and resource requests/limits for optimal performance. - Added support for different roles (consumer, producer) in the extra arguments for each template. - Ensured consistent naming conventions and labels across all new templates. --- ...a-3.2-1b-instruct-amd-mi250-tp2.helm.yaml} | 0 ...-3.2-1b-instruct-amd-mi300x-tp2.helm.yaml} | 0 ...b-instruct-decode-amd-mi250-tp2.helm.yaml} | 0 ...-instruct-decode-amd-mi300x-tp2.helm.yaml} | 0 ...-instruct-prefill-amd-mi250-tp2.helm.yaml} | 0 ...instruct-prefill-amd-mi300x-tp2.helm.yaml} | 0 ...lm-qwen-qwen3-1.7b-amd-mi250-tp2.helm.yaml | 36 ++++++++++++++++++ ...m-qwen-qwen3-1.7b-amd-mi300x-tp2.helm.yaml | 36 ++++++++++++++++++ ...-qwen3-1.7b-decode-amd-mi250-tp2.helm.yaml | 37 +++++++++++++++++++ ...qwen3-1.7b-decode-amd-mi300x-tp2.helm.yaml | 37 +++++++++++++++++++ ...qwen3-1.7b-prefill-amd-mi250-tp2.helm.yaml | 37 +++++++++++++++++++ ...wen3-1.7b-prefill-amd-mi300x-tp2.helm.yaml | 37 +++++++++++++++++++ 12 files changed, 220 insertions(+) rename deploy/helm/moai-inference-preset/templates/presets/quickstart/{vllm-meta-llama-llama-3.2-1b-instruct-amd-mi250-tp2.helm.yaml => quickstart-vllm-meta-llama-llama-3.2-1b-instruct-amd-mi250-tp2.helm.yaml} (100%) rename deploy/helm/moai-inference-preset/templates/presets/quickstart/{vllm-meta-llama-llama-3.2-1b-instruct-amd-mi300x-tp2.helm.yaml => quickstart-vllm-meta-llama-llama-3.2-1b-instruct-amd-mi300x-tp2.helm.yaml} (100%) rename deploy/helm/moai-inference-preset/templates/presets/quickstart/{vllm-meta-llama-llama-3.2-1b-instruct-decode-amd-mi250-tp2.helm.yaml => quickstart-vllm-meta-llama-llama-3.2-1b-instruct-decode-amd-mi250-tp2.helm.yaml} (100%) rename deploy/helm/moai-inference-preset/templates/presets/quickstart/{vllm-meta-llama-llama-3.2-1b-instruct-decode-amd-mi300x-tp2.helm.yaml => quickstart-vllm-meta-llama-llama-3.2-1b-instruct-decode-amd-mi300x-tp2.helm.yaml} (100%) rename deploy/helm/moai-inference-preset/templates/presets/quickstart/{vllm-meta-llama-llama-3.2-1b-instruct-prefill-amd-mi250-tp2.helm.yaml => quickstart-vllm-meta-llama-llama-3.2-1b-instruct-prefill-amd-mi250-tp2.helm.yaml} (100%) rename deploy/helm/moai-inference-preset/templates/presets/quickstart/{vllm-meta-llama-llama-3.2-1b-instruct-prefill-amd-mi300x-tp2.helm.yaml => quickstart-vllm-meta-llama-llama-3.2-1b-instruct-prefill-amd-mi300x-tp2.helm.yaml} (100%) create mode 100644 deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-qwen-qwen3-1.7b-amd-mi250-tp2.helm.yaml create mode 100644 deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-qwen-qwen3-1.7b-amd-mi300x-tp2.helm.yaml create mode 100644 deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-qwen-qwen3-1.7b-decode-amd-mi250-tp2.helm.yaml create mode 100644 deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-qwen-qwen3-1.7b-decode-amd-mi300x-tp2.helm.yaml create mode 100644 deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-qwen-qwen3-1.7b-prefill-amd-mi250-tp2.helm.yaml create mode 100644 deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-qwen-qwen3-1.7b-prefill-amd-mi300x-tp2.helm.yaml diff --git a/deploy/helm/moai-inference-preset/templates/presets/quickstart/vllm-meta-llama-llama-3.2-1b-instruct-amd-mi250-tp2.helm.yaml b/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-meta-llama-llama-3.2-1b-instruct-amd-mi250-tp2.helm.yaml similarity index 100% rename from deploy/helm/moai-inference-preset/templates/presets/quickstart/vllm-meta-llama-llama-3.2-1b-instruct-amd-mi250-tp2.helm.yaml rename to deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-meta-llama-llama-3.2-1b-instruct-amd-mi250-tp2.helm.yaml diff --git a/deploy/helm/moai-inference-preset/templates/presets/quickstart/vllm-meta-llama-llama-3.2-1b-instruct-amd-mi300x-tp2.helm.yaml b/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-meta-llama-llama-3.2-1b-instruct-amd-mi300x-tp2.helm.yaml similarity index 100% rename from deploy/helm/moai-inference-preset/templates/presets/quickstart/vllm-meta-llama-llama-3.2-1b-instruct-amd-mi300x-tp2.helm.yaml rename to deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-meta-llama-llama-3.2-1b-instruct-amd-mi300x-tp2.helm.yaml diff --git a/deploy/helm/moai-inference-preset/templates/presets/quickstart/vllm-meta-llama-llama-3.2-1b-instruct-decode-amd-mi250-tp2.helm.yaml b/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-meta-llama-llama-3.2-1b-instruct-decode-amd-mi250-tp2.helm.yaml similarity index 100% rename from deploy/helm/moai-inference-preset/templates/presets/quickstart/vllm-meta-llama-llama-3.2-1b-instruct-decode-amd-mi250-tp2.helm.yaml rename to deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-meta-llama-llama-3.2-1b-instruct-decode-amd-mi250-tp2.helm.yaml diff --git a/deploy/helm/moai-inference-preset/templates/presets/quickstart/vllm-meta-llama-llama-3.2-1b-instruct-decode-amd-mi300x-tp2.helm.yaml b/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-meta-llama-llama-3.2-1b-instruct-decode-amd-mi300x-tp2.helm.yaml similarity index 100% rename from deploy/helm/moai-inference-preset/templates/presets/quickstart/vllm-meta-llama-llama-3.2-1b-instruct-decode-amd-mi300x-tp2.helm.yaml rename to deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-meta-llama-llama-3.2-1b-instruct-decode-amd-mi300x-tp2.helm.yaml diff --git a/deploy/helm/moai-inference-preset/templates/presets/quickstart/vllm-meta-llama-llama-3.2-1b-instruct-prefill-amd-mi250-tp2.helm.yaml b/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-meta-llama-llama-3.2-1b-instruct-prefill-amd-mi250-tp2.helm.yaml similarity index 100% rename from deploy/helm/moai-inference-preset/templates/presets/quickstart/vllm-meta-llama-llama-3.2-1b-instruct-prefill-amd-mi250-tp2.helm.yaml rename to deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-meta-llama-llama-3.2-1b-instruct-prefill-amd-mi250-tp2.helm.yaml diff --git a/deploy/helm/moai-inference-preset/templates/presets/quickstart/vllm-meta-llama-llama-3.2-1b-instruct-prefill-amd-mi300x-tp2.helm.yaml b/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-meta-llama-llama-3.2-1b-instruct-prefill-amd-mi300x-tp2.helm.yaml similarity index 100% rename from deploy/helm/moai-inference-preset/templates/presets/quickstart/vllm-meta-llama-llama-3.2-1b-instruct-prefill-amd-mi300x-tp2.helm.yaml rename to deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-meta-llama-llama-3.2-1b-instruct-prefill-amd-mi300x-tp2.helm.yaml diff --git a/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-qwen-qwen3-1.7b-amd-mi250-tp2.helm.yaml b/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-qwen-qwen3-1.7b-amd-mi250-tp2.helm.yaml new file mode 100644 index 00000000..61a0caa1 --- /dev/null +++ b/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-qwen-qwen3-1.7b-amd-mi250-tp2.helm.yaml @@ -0,0 +1,36 @@ +apiVersion: odin.moreh.io/v1alpha1 +kind: InferenceServiceTemplate +metadata: + name: quickstart-vllm-qwen-qwen3-1.7b-amd-mi250-tp2 + namespace: {{ include "common.names.namespace" . }} + labels: + {{- include "mif.preset.labels" . | nindent 4 }} +spec: + parallelism: + tensor: 2 + template: + spec: + containers: + - name: main + image: 255250787067.dkr.ecr.ap-northeast-2.amazonaws.com/quickstart/moreh-vllm:20250915.1 + env: + - name: ISVC_MODEL_NAME + value: Qwen/Qwen3-1.7B + - name: ISVC_EXTRA_ARGS + value: >- + --disable-uvicorn-access-log + --no-enable-log-requests + --max-model-len 16384 + --max-num-batched-tokens 8192 + resources: + requests: + amd.com/gpu: 2 + limits: + amd.com/gpu: 2 + nodeSelector: + moai.moreh.io/accelerator.vendor: amd + moai.moreh.io/accelerator.model: mi250 + tolerations: + - key: "amd.com/gpu" + operator: "Exists" + effect: "NoSchedule" diff --git a/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-qwen-qwen3-1.7b-amd-mi300x-tp2.helm.yaml b/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-qwen-qwen3-1.7b-amd-mi300x-tp2.helm.yaml new file mode 100644 index 00000000..d10807c0 --- /dev/null +++ b/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-qwen-qwen3-1.7b-amd-mi300x-tp2.helm.yaml @@ -0,0 +1,36 @@ +apiVersion: odin.moreh.io/v1alpha1 +kind: InferenceServiceTemplate +metadata: + name: quickstart-vllm-qwen-qwen3-1.7b-amd-mi300x-tp2 + namespace: {{ include "common.names.namespace" . }} + labels: + {{- include "mif.preset.labels" . | nindent 4 }} +spec: + parallelism: + tensor: 2 + template: + spec: + containers: + - name: main + image: 255250787067.dkr.ecr.ap-northeast-2.amazonaws.com/quickstart/moreh-vllm:20250915.1 + env: + - name: ISVC_MODEL_NAME + value: Qwen/Qwen3-1.7B + - name: ISVC_EXTRA_ARGS + value: >- + --disable-uvicorn-access-log + --no-enable-log-requests + --max-model-len 16384 + --max-num-batched-tokens 8192 + resources: + requests: + amd.com/gpu: 2 + limits: + amd.com/gpu: 2 + nodeSelector: + moai.moreh.io/accelerator.vendor: amd + moai.moreh.io/accelerator.model: mi300x + tolerations: + - key: "amd.com/gpu" + operator: "Exists" + effect: "NoSchedule" diff --git a/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-qwen-qwen3-1.7b-decode-amd-mi250-tp2.helm.yaml b/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-qwen-qwen3-1.7b-decode-amd-mi250-tp2.helm.yaml new file mode 100644 index 00000000..b56fa846 --- /dev/null +++ b/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-qwen-qwen3-1.7b-decode-amd-mi250-tp2.helm.yaml @@ -0,0 +1,37 @@ +apiVersion: odin.moreh.io/v1alpha1 +kind: InferenceServiceTemplate +metadata: + name: quickstart-vllm-qwen-qwen3-1.7b-decode-amd-mi250-tp2 + namespace: {{ include "common.names.namespace" . }} + labels: + {{- include "mif.preset.labels" . | nindent 4 }} +spec: + parallelism: + tensor: 2 + template: + spec: + containers: + - name: main + image: 255250787067.dkr.ecr.ap-northeast-2.amazonaws.com/quickstart/moreh-vllm:20250915.1 + env: + - name: ISVC_MODEL_NAME + value: Qwen/Qwen3-1.7B + - name: ISVC_EXTRA_ARGS + value: >- + --disable-uvicorn-access-log + --no-enable-log-requests + --max-model-len 16384 + --max-num-batched-tokens 8192 + --kv-transfer-config '{"kv_connector":"NixlConnector","kv_role":"kv_consumer"}' + resources: + requests: + amd.com/gpu: 2 + limits: + amd.com/gpu: 2 + nodeSelector: + moai.moreh.io/accelerator.vendor: amd + moai.moreh.io/accelerator.model: mi250 + tolerations: + - key: "amd.com/gpu" + operator: "Exists" + effect: "NoSchedule" diff --git a/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-qwen-qwen3-1.7b-decode-amd-mi300x-tp2.helm.yaml b/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-qwen-qwen3-1.7b-decode-amd-mi300x-tp2.helm.yaml new file mode 100644 index 00000000..a9455339 --- /dev/null +++ b/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-qwen-qwen3-1.7b-decode-amd-mi300x-tp2.helm.yaml @@ -0,0 +1,37 @@ +apiVersion: odin.moreh.io/v1alpha1 +kind: InferenceServiceTemplate +metadata: + name: quickstart-vllm-qwen-qwen3-1.7b-decode-amd-mi300x-tp2 + namespace: {{ include "common.names.namespace" . }} + labels: + {{- include "mif.preset.labels" . | nindent 4 }} +spec: + parallelism: + tensor: 2 + template: + spec: + containers: + - name: main + image: 255250787067.dkr.ecr.ap-northeast-2.amazonaws.com/quickstart/moreh-vllm:20250915.1 + env: + - name: ISVC_MODEL_NAME + value: Qwen/Qwen3-1.7B + - name: ISVC_EXTRA_ARGS + value: >- + --disable-uvicorn-access-log + --no-enable-log-requests + --max-model-len 16384 + --max-num-batched-tokens 8192 + --kv-transfer-config '{"kv_connector":"NixlConnector","kv_role":"kv_consumer"}' + resources: + requests: + amd.com/gpu: 2 + limits: + amd.com/gpu: 2 + nodeSelector: + moai.moreh.io/accelerator.vendor: amd + moai.moreh.io/accelerator.model: mi300x + tolerations: + - key: "amd.com/gpu" + operator: "Exists" + effect: "NoSchedule" diff --git a/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-qwen-qwen3-1.7b-prefill-amd-mi250-tp2.helm.yaml b/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-qwen-qwen3-1.7b-prefill-amd-mi250-tp2.helm.yaml new file mode 100644 index 00000000..24586212 --- /dev/null +++ b/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-qwen-qwen3-1.7b-prefill-amd-mi250-tp2.helm.yaml @@ -0,0 +1,37 @@ +apiVersion: odin.moreh.io/v1alpha1 +kind: InferenceServiceTemplate +metadata: + name: quickstart-vllm-qwen-qwen3-1.7b-prefill-amd-mi250-tp2 + namespace: {{ include "common.names.namespace" . }} + labels: + {{- include "mif.preset.labels" . | nindent 4 }} +spec: + parallelism: + tensor: 2 + template: + spec: + containers: + - name: main + image: 255250787067.dkr.ecr.ap-northeast-2.amazonaws.com/quickstart/moreh-vllm:20250915.1 + env: + - name: ISVC_MODEL_NAME + value: Qwen/Qwen3-1.7B + - name: ISVC_EXTRA_ARGS + value: >- + --disable-uvicorn-access-log + --no-enable-log-requests + --max-model-len 16384 + --max-num-batched-tokens 8192 + --kv-transfer-config '{"kv_connector":"NixlConnector","kv_role":"kv_producer"}' + resources: + requests: + amd.com/gpu: 2 + limits: + amd.com/gpu: 2 + nodeSelector: + moai.moreh.io/accelerator.vendor: amd + moai.moreh.io/accelerator.model: mi250 + tolerations: + - key: "amd.com/gpu" + operator: "Exists" + effect: "NoSchedule" diff --git a/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-qwen-qwen3-1.7b-prefill-amd-mi300x-tp2.helm.yaml b/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-qwen-qwen3-1.7b-prefill-amd-mi300x-tp2.helm.yaml new file mode 100644 index 00000000..2c93b778 --- /dev/null +++ b/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-qwen-qwen3-1.7b-prefill-amd-mi300x-tp2.helm.yaml @@ -0,0 +1,37 @@ +apiVersion: odin.moreh.io/v1alpha1 +kind: InferenceServiceTemplate +metadata: + name: quickstart-vllm-qwen-qwen3-1.7b-prefill-amd-mi300x-tp2 + namespace: {{ include "common.names.namespace" . }} + labels: + {{- include "mif.preset.labels" . | nindent 4 }} +spec: + parallelism: + tensor: 2 + template: + spec: + containers: + - name: main + image: 255250787067.dkr.ecr.ap-northeast-2.amazonaws.com/quickstart/moreh-vllm:20250915.1 + env: + - name: ISVC_MODEL_NAME + value: Qwen/Qwen3-1.7B + - name: ISVC_EXTRA_ARGS + value: >- + --disable-uvicorn-access-log + --no-enable-log-requests + --max-model-len 16384 + --max-num-batched-tokens 8192 + --kv-transfer-config '{"kv_connector":"NixlConnector","kv_role":"kv_producer"}' + resources: + requests: + amd.com/gpu: 2 + limits: + amd.com/gpu: 2 + nodeSelector: + moai.moreh.io/accelerator.vendor: amd + moai.moreh.io/accelerator.model: mi300x + tolerations: + - key: "amd.com/gpu" + operator: "Exists" + effect: "NoSchedule" From fc562d3cf8441bc65f3e3d0c780c991a7d7814df Mon Sep 17 00:00:00 2001 From: seongsukwon-moreh Date: Tue, 3 Feb 2026 11:58:32 +0900 Subject: [PATCH 2/2] MAF-19231: Update Helm templates to remove quotes from toleration keys for consistency across AMD configurations --- ...meta-llama-llama-3.2-1b-instruct-amd-mi250-tp2.helm.yaml | 6 +++--- ...eta-llama-llama-3.2-1b-instruct-amd-mi300x-tp2.helm.yaml | 6 +++--- ...ama-llama-3.2-1b-instruct-decode-amd-mi250-tp2.helm.yaml | 6 +++--- ...ma-llama-3.2-1b-instruct-decode-amd-mi300x-tp2.helm.yaml | 6 +++--- ...ma-llama-3.2-1b-instruct-prefill-amd-mi250-tp2.helm.yaml | 6 +++--- ...a-llama-3.2-1b-instruct-prefill-amd-mi300x-tp2.helm.yaml | 6 +++--- .../quickstart-vllm-qwen-qwen3-1.7b-amd-mi250-tp2.helm.yaml | 6 +++--- ...quickstart-vllm-qwen-qwen3-1.7b-amd-mi300x-tp2.helm.yaml | 6 +++--- ...tart-vllm-qwen-qwen3-1.7b-decode-amd-mi250-tp2.helm.yaml | 6 +++--- ...art-vllm-qwen-qwen3-1.7b-decode-amd-mi300x-tp2.helm.yaml | 6 +++--- ...art-vllm-qwen-qwen3-1.7b-prefill-amd-mi250-tp2.helm.yaml | 6 +++--- ...rt-vllm-qwen-qwen3-1.7b-prefill-amd-mi300x-tp2.helm.yaml | 6 +++--- 12 files changed, 36 insertions(+), 36 deletions(-) diff --git a/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-meta-llama-llama-3.2-1b-instruct-amd-mi250-tp2.helm.yaml b/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-meta-llama-llama-3.2-1b-instruct-amd-mi250-tp2.helm.yaml index 055d88b4..fe382090 100644 --- a/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-meta-llama-llama-3.2-1b-instruct-amd-mi250-tp2.helm.yaml +++ b/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-meta-llama-llama-3.2-1b-instruct-amd-mi250-tp2.helm.yaml @@ -31,6 +31,6 @@ spec: moai.moreh.io/accelerator.vendor: amd moai.moreh.io/accelerator.model: mi250 tolerations: - - key: "amd.com/gpu" - operator: "Exists" - effect: "NoSchedule" + - key: amd.com/gpu + operator: Exists + effect: NoSchedule diff --git a/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-meta-llama-llama-3.2-1b-instruct-amd-mi300x-tp2.helm.yaml b/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-meta-llama-llama-3.2-1b-instruct-amd-mi300x-tp2.helm.yaml index 0d79e1d9..561ae37d 100644 --- a/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-meta-llama-llama-3.2-1b-instruct-amd-mi300x-tp2.helm.yaml +++ b/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-meta-llama-llama-3.2-1b-instruct-amd-mi300x-tp2.helm.yaml @@ -31,6 +31,6 @@ spec: moai.moreh.io/accelerator.vendor: amd moai.moreh.io/accelerator.model: mi300x tolerations: - - key: "amd.com/gpu" - operator: "Exists" - effect: "NoSchedule" \ No newline at end of file + - key: amd.com/gpu + operator: Exists + effect: NoSchedule \ No newline at end of file diff --git a/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-meta-llama-llama-3.2-1b-instruct-decode-amd-mi250-tp2.helm.yaml b/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-meta-llama-llama-3.2-1b-instruct-decode-amd-mi250-tp2.helm.yaml index 19f39af8..7f12bb61 100644 --- a/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-meta-llama-llama-3.2-1b-instruct-decode-amd-mi250-tp2.helm.yaml +++ b/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-meta-llama-llama-3.2-1b-instruct-decode-amd-mi250-tp2.helm.yaml @@ -32,6 +32,6 @@ spec: moai.moreh.io/accelerator.vendor: amd moai.moreh.io/accelerator.model: mi250 tolerations: - - key: "amd.com/gpu" - operator: "Exists" - effect: "NoSchedule" \ No newline at end of file + - key: amd.com/gpu + operator: Exists + effect: NoSchedule \ No newline at end of file diff --git a/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-meta-llama-llama-3.2-1b-instruct-decode-amd-mi300x-tp2.helm.yaml b/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-meta-llama-llama-3.2-1b-instruct-decode-amd-mi300x-tp2.helm.yaml index 067c7ab7..33b51c85 100644 --- a/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-meta-llama-llama-3.2-1b-instruct-decode-amd-mi300x-tp2.helm.yaml +++ b/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-meta-llama-llama-3.2-1b-instruct-decode-amd-mi300x-tp2.helm.yaml @@ -32,6 +32,6 @@ spec: moai.moreh.io/accelerator.vendor: amd moai.moreh.io/accelerator.model: mi300x tolerations: - - key: "amd.com/gpu" - operator: "Exists" - effect: "NoSchedule" \ No newline at end of file + - key: amd.com/gpu + operator: Exists + effect: NoSchedule \ No newline at end of file diff --git a/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-meta-llama-llama-3.2-1b-instruct-prefill-amd-mi250-tp2.helm.yaml b/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-meta-llama-llama-3.2-1b-instruct-prefill-amd-mi250-tp2.helm.yaml index e164b9e9..4f342988 100644 --- a/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-meta-llama-llama-3.2-1b-instruct-prefill-amd-mi250-tp2.helm.yaml +++ b/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-meta-llama-llama-3.2-1b-instruct-prefill-amd-mi250-tp2.helm.yaml @@ -32,6 +32,6 @@ spec: moai.moreh.io/accelerator.vendor: amd moai.moreh.io/accelerator.model: mi250 tolerations: - - key: "amd.com/gpu" - operator: "Exists" - effect: "NoSchedule" \ No newline at end of file + - key: amd.com/gpu + operator: Exists + effect: NoSchedule \ No newline at end of file diff --git a/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-meta-llama-llama-3.2-1b-instruct-prefill-amd-mi300x-tp2.helm.yaml b/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-meta-llama-llama-3.2-1b-instruct-prefill-amd-mi300x-tp2.helm.yaml index e9c42af1..946cc380 100644 --- a/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-meta-llama-llama-3.2-1b-instruct-prefill-amd-mi300x-tp2.helm.yaml +++ b/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-meta-llama-llama-3.2-1b-instruct-prefill-amd-mi300x-tp2.helm.yaml @@ -32,6 +32,6 @@ spec: moai.moreh.io/accelerator.vendor: amd moai.moreh.io/accelerator.model: mi300x tolerations: - - key: "amd.com/gpu" - operator: "Exists" - effect: "NoSchedule" \ No newline at end of file + - key: amd.com/gpu + operator: Exists + effect: NoSchedule \ No newline at end of file diff --git a/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-qwen-qwen3-1.7b-amd-mi250-tp2.helm.yaml b/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-qwen-qwen3-1.7b-amd-mi250-tp2.helm.yaml index 61a0caa1..49a6b790 100644 --- a/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-qwen-qwen3-1.7b-amd-mi250-tp2.helm.yaml +++ b/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-qwen-qwen3-1.7b-amd-mi250-tp2.helm.yaml @@ -31,6 +31,6 @@ spec: moai.moreh.io/accelerator.vendor: amd moai.moreh.io/accelerator.model: mi250 tolerations: - - key: "amd.com/gpu" - operator: "Exists" - effect: "NoSchedule" + - key: amd.com/gpu + operator: Exists + effect: NoSchedule diff --git a/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-qwen-qwen3-1.7b-amd-mi300x-tp2.helm.yaml b/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-qwen-qwen3-1.7b-amd-mi300x-tp2.helm.yaml index d10807c0..9cc538e9 100644 --- a/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-qwen-qwen3-1.7b-amd-mi300x-tp2.helm.yaml +++ b/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-qwen-qwen3-1.7b-amd-mi300x-tp2.helm.yaml @@ -31,6 +31,6 @@ spec: moai.moreh.io/accelerator.vendor: amd moai.moreh.io/accelerator.model: mi300x tolerations: - - key: "amd.com/gpu" - operator: "Exists" - effect: "NoSchedule" + - key: amd.com/gpu + operator: Exists + effect: NoSchedule diff --git a/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-qwen-qwen3-1.7b-decode-amd-mi250-tp2.helm.yaml b/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-qwen-qwen3-1.7b-decode-amd-mi250-tp2.helm.yaml index b56fa846..2fa25183 100644 --- a/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-qwen-qwen3-1.7b-decode-amd-mi250-tp2.helm.yaml +++ b/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-qwen-qwen3-1.7b-decode-amd-mi250-tp2.helm.yaml @@ -32,6 +32,6 @@ spec: moai.moreh.io/accelerator.vendor: amd moai.moreh.io/accelerator.model: mi250 tolerations: - - key: "amd.com/gpu" - operator: "Exists" - effect: "NoSchedule" + - key: amd.com/gpu + operator: Exists + effect: NoSchedule diff --git a/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-qwen-qwen3-1.7b-decode-amd-mi300x-tp2.helm.yaml b/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-qwen-qwen3-1.7b-decode-amd-mi300x-tp2.helm.yaml index a9455339..037bbfad 100644 --- a/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-qwen-qwen3-1.7b-decode-amd-mi300x-tp2.helm.yaml +++ b/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-qwen-qwen3-1.7b-decode-amd-mi300x-tp2.helm.yaml @@ -32,6 +32,6 @@ spec: moai.moreh.io/accelerator.vendor: amd moai.moreh.io/accelerator.model: mi300x tolerations: - - key: "amd.com/gpu" - operator: "Exists" - effect: "NoSchedule" + - key: amd.com/gpu + operator: Exists + effect: NoSchedule diff --git a/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-qwen-qwen3-1.7b-prefill-amd-mi250-tp2.helm.yaml b/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-qwen-qwen3-1.7b-prefill-amd-mi250-tp2.helm.yaml index 24586212..8b3ccb40 100644 --- a/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-qwen-qwen3-1.7b-prefill-amd-mi250-tp2.helm.yaml +++ b/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-qwen-qwen3-1.7b-prefill-amd-mi250-tp2.helm.yaml @@ -32,6 +32,6 @@ spec: moai.moreh.io/accelerator.vendor: amd moai.moreh.io/accelerator.model: mi250 tolerations: - - key: "amd.com/gpu" - operator: "Exists" - effect: "NoSchedule" + - key: amd.com/gpu + operator: Exists + effect: NoSchedule diff --git a/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-qwen-qwen3-1.7b-prefill-amd-mi300x-tp2.helm.yaml b/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-qwen-qwen3-1.7b-prefill-amd-mi300x-tp2.helm.yaml index 2c93b778..936ed2fd 100644 --- a/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-qwen-qwen3-1.7b-prefill-amd-mi300x-tp2.helm.yaml +++ b/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-qwen-qwen3-1.7b-prefill-amd-mi300x-tp2.helm.yaml @@ -32,6 +32,6 @@ spec: moai.moreh.io/accelerator.vendor: amd moai.moreh.io/accelerator.model: mi300x tolerations: - - key: "amd.com/gpu" - operator: "Exists" - effect: "NoSchedule" + - key: amd.com/gpu + operator: Exists + effect: NoSchedule