diff --git a/.github/workflows/e2e-inference-perf-p-cluster.yaml b/.github/workflows/e2e-inference-perf-p-cluster.yaml index bb37bb00..52c4990d 100644 --- a/.github/workflows/e2e-inference-perf-p-cluster.yaml +++ b/.github/workflows/e2e-inference-perf-p-cluster.yaml @@ -14,6 +14,11 @@ name: e2e-inference-perf-p-cluster # - MIF infrastructure: moai-inference-framework (Odin controller, etc.) # - Preset: moai-inference-preset # The test will only deploy heimdall and InferenceService in the test workload namespace. +# +# Result retrieval (after run): +# Inference-perf uploads results to S3 and then cleans up the workload namespace. +# To list or download result files from S3: +# aws s3 ls --profile s3 s3://moreh-benchmark/ --recursive on: workflow_dispatch: diff --git a/AGENTS.md b/AGENTS.md index fd83319f..e981d81a 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -56,6 +56,10 @@ The commit message should be structured as follows: ### E2E Test +- **Version scope**: + - E2E tests cover only `vX.Y.Z` (release) and `vX.Y.Z-rc.N` (release candidate) version formats. + - Other version formats (e.g. dev builds, custom tags) are out of scope and should not be tested in E2E. + - **Do not test resource specifications**: - Do not validate individual fields of the YAML file declaring the resource (resource spec). - Instead, create the resource and verify that its status reaches the expected state. diff --git a/test/e2e/performance/config/heimdall-values.yaml.tmpl b/test/e2e/performance/config/heimdall-values.yaml.tmpl index 88028ba8..a4d5fd1c 100644 --- a/test/e2e/performance/config/heimdall-values.yaml.tmpl +++ b/test/e2e/performance/config/heimdall-values.yaml.tmpl @@ -34,3 +34,14 @@ gateway: inferencePool: targetPorts: - number: 8000 + +{{- if not .IsKind }} +extraVolumes: + - name: models + persistentVolumeClaim: + claimName: models + +extraVolumeMounts: + - name: models + mountPath: /mnt/models +{{- end }} \ No newline at end of file diff --git a/test/e2e/performance/config/inference-service.yaml.tmpl b/test/e2e/performance/config/inference-service.yaml.tmpl index 5240c49f..d4acae8a 100644 --- a/test/e2e/performance/config/inference-service.yaml.tmpl +++ b/test/e2e/performance/config/inference-service.yaml.tmpl @@ -4,7 +4,7 @@ metadata: name: {{ .Name }} namespace: {{ .Namespace }} spec: - replicas: 1 + replicas: {{ .Replicas }} inferencePoolRefs: - name: heimdall templateRefs: @@ -31,4 +31,30 @@ spec: limits: mellanox/hca: "1" {{- end }} - + {{- if not .IsKind }} + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: moai.moreh.io/accelerator.vendor + operator: In + values: + - amd + - key: moai.moreh.io/accelerator.model + operator: In + values: + - mi250 + - key: kubernetes.io/hostname + operator: In + values: + - mi250-050 + - mi250-054 + tolerations: + - key: amd.com/gpu + operator: Exists + effect: NoSchedule + - key: benchmark + operator: Exists + effect: NoSchedule + {{- end }} diff --git a/test/e2e/performance/performance_test.go b/test/e2e/performance/performance_test.go index 433864d3..4dbe4ae8 100644 --- a/test/e2e/performance/performance_test.go +++ b/test/e2e/performance/performance_test.go @@ -39,48 +39,84 @@ var _ = Describe("Inference Performance", Label("performance"), Ordered, func() SetDefaultEventuallyPollingInterval(settings.IntervalShort) BeforeAll(func() { + isKind := !envs.SkipKind + By("creating workload namespace") Expect(utils.CreateWorkloadNamespace(envs.WorkloadNamespace, envs.MIFNamespace)).To(Succeed()) By("creating Gateway resources") Expect(utils.CreateGatewayResource(envs.WorkloadNamespace, envs.GatewayClassName, envs.IstioRev)).To(Succeed()) + var err error + if !isKind { + By("creating model PV") + pvName, err = utils.CreateModelPV(envs.WorkloadNamespace) + Expect(err).NotTo(HaveOccurred(), "failed to create model PV") + + By("creating model PVC") + pvcName, err = utils.CreateModelPVC(envs.WorkloadNamespace) + Expect(err).NotTo(HaveOccurred(), "failed to create model PVC") + } + By("installing Heimdall") data := struct { MorehRegistrySecretName string GatewayName string GatewayClass string IstioRev string + IsKind bool }{ MorehRegistrySecretName: settings.MorehRegistrySecretName, GatewayName: settings.GatewayName, GatewayClass: envs.GatewayClassName, IstioRev: envs.IstioRev, + IsKind: isKind, } values, err := utils.RenderTemplate(HeimdallValues, data) Expect(err).NotTo(HaveOccurred(), "failed to render Heimdall values template") Expect(utils.InstallHeimdall(envs.WorkloadNamespace, values)).To(Succeed()) - if envs.SkipKind { - By("creating model PV") - pvName, err = utils.CreateModelPV(envs.WorkloadNamespace) - Expect(err).NotTo(HaveOccurred(), "failed to create model PV") - - By("creating model PVC") - pvcName, err = utils.CreateModelPVC(envs.WorkloadNamespace) - Expect(err).NotTo(HaveOccurred(), "failed to create model PVC") - } - By("creating InferenceServices") - isKind := !envs.SkipKind var prefillData, decodeData utils.InferenceServiceData if isKind { - prefillData = utils.GetInferenceServiceData("prefill", envs.WorkloadNamespace, []string{"sim-prefill"}, envs.HFToken, envs.HFEndpoint, isKind) - decodeData = utils.GetInferenceServiceData("decode", envs.WorkloadNamespace, []string{"sim-decode"}, envs.HFToken, envs.HFEndpoint, isKind) + prefillData = utils.InferenceServiceData{ + Name: "prefill", + Namespace: envs.WorkloadNamespace, + Replicas: 3, + TemplateRefs: []string{"sim-prefill"}, + HFToken: envs.HFToken, + HFEndpoint: envs.HFEndpoint, + IsKind: isKind, + } + decodeData = utils.InferenceServiceData{ + Name: "decode", + Namespace: envs.WorkloadNamespace, + Replicas: 5, + TemplateRefs: []string{"sim-decode"}, + HFToken: envs.HFToken, + HFEndpoint: envs.HFEndpoint, + IsKind: isKind, + } } else { - prefillData = utils.GetInferenceServiceData("prefill", envs.WorkloadNamespace, []string{"vllm-prefill", envs.TestTemplatePrefill, "vllm-hf-hub-offline"}, envs.HFToken, envs.HFEndpoint, isKind) - decodeData = utils.GetInferenceServiceData("decode", envs.WorkloadNamespace, []string{"vllm-decode", envs.TestTemplateDecode, "vllm-hf-hub-offline"}, envs.HFToken, envs.HFEndpoint, isKind) + prefillData = utils.InferenceServiceData{ + Name: "prefill", + Namespace: envs.WorkloadNamespace, + Replicas: 3, + TemplateRefs: []string{"vllm-prefill", envs.TestTemplatePrefill, "vllm-hf-hub-offline"}, + HFToken: envs.HFToken, + HFEndpoint: envs.HFEndpoint, + IsKind: isKind, + } + decodeData = utils.InferenceServiceData{ + Name: "decode", + Namespace: envs.WorkloadNamespace, + Replicas: 5, + TemplateRefs: []string{"vllm-decode", envs.TestTemplateDecode, "vllm-hf-hub-offline"}, + HFToken: envs.HFToken, + HFEndpoint: envs.HFEndpoint, + IsKind: isKind, + } } prefillServiceName, err = utils.CreateInferenceService(envs.WorkloadNamespace, InferenceServicePath, prefillData) Expect(err).NotTo(HaveOccurred(), "failed to create prefill InferenceService") @@ -103,6 +139,9 @@ var _ = Describe("Inference Performance", Label("performance"), Ordered, func() utils.DeleteInferenceService(envs.WorkloadNamespace, prefillServiceName) utils.DeleteInferenceService(envs.WorkloadNamespace, decodeServiceName) + By("deleting Heimdall") + utils.UninstallHeimdall(envs.WorkloadNamespace) + if envs.SkipKind { By("deleting model PVC") utils.DeleteModelPVC(envs.WorkloadNamespace, pvcName) @@ -111,9 +150,6 @@ var _ = Describe("Inference Performance", Label("performance"), Ordered, func() utils.DeleteModelPV(pvName) } - By("deleting Heimdall") - utils.UninstallHeimdall(envs.WorkloadNamespace) - By("deleting Gateway resources") utils.DeleteGatewayResource(envs.WorkloadNamespace, envs.GatewayClassName) diff --git a/test/e2e/quality/config/inference-service.yaml.tmpl b/test/e2e/quality/config/inference-service.yaml.tmpl index 1238197e..f91b553e 100644 --- a/test/e2e/quality/config/inference-service.yaml.tmpl +++ b/test/e2e/quality/config/inference-service.yaml.tmpl @@ -4,7 +4,7 @@ metadata: name: {{ .Name }} namespace: {{ .Namespace }} spec: - replicas: 2 + replicas: {{ .Replicas }} inferencePoolRefs: - name: heimdall templateRefs: diff --git a/test/e2e/quality/quality_test.go b/test/e2e/quality/quality_test.go index 3a5dcd13..0d31778e 100644 --- a/test/e2e/quality/quality_test.go +++ b/test/e2e/quality/quality_test.go @@ -38,6 +38,8 @@ var _ = Describe("Quality Benchmark", Label("quality"), Ordered, func() { SetDefaultEventuallyPollingInterval(settings.IntervalShort) BeforeAll(func() { + isKind := !envs.SkipKind + By("creating workload namespace") Expect(utils.CreateWorkloadNamespace(envs.WorkloadNamespace, envs.MIFNamespace)).To(Succeed()) @@ -73,12 +75,27 @@ var _ = Describe("Quality Benchmark", Label("quality"), Ordered, func() { By("creating InferenceServices") // PD disaggregation environment cannot run tests normally, so we test in aggregate environment - isKind := !envs.SkipKind var vllmData utils.InferenceServiceData if isKind { - vllmData = utils.GetInferenceServiceData("vllm", envs.WorkloadNamespace, []string{"sim"}, envs.HFToken, envs.HFEndpoint, isKind) + vllmData = utils.InferenceServiceData{ + Name: "vllm", + Namespace: envs.WorkloadNamespace, + Replicas: 2, + TemplateRefs: []string{"sim"}, + HFToken: envs.HFToken, + HFEndpoint: envs.HFEndpoint, + IsKind: isKind, + } } else { - vllmData = utils.GetInferenceServiceData("vllm", envs.WorkloadNamespace, []string{"vllm", envs.TestTemplateDecode, "vllm-hf-hub-offline"}, envs.HFToken, envs.HFEndpoint, isKind) + vllmData = utils.InferenceServiceData{ + Name: "vllm", + Namespace: envs.WorkloadNamespace, + Replicas: 2, + TemplateRefs: []string{"vllm", envs.TestTemplateDecode, "vllm-hf-hub-offline"}, + HFToken: envs.HFToken, + HFEndpoint: envs.HFEndpoint, + IsKind: isKind, + } } vllmServiceName, err = utils.CreateInferenceService(envs.WorkloadNamespace, InferenceServicePath, vllmData) Expect(err).NotTo(HaveOccurred(), "failed to create vllm InferenceService") diff --git a/test/utils/inference_service.go b/test/utils/inference_service.go index a1d888b0..4b1a432a 100644 --- a/test/utils/inference_service.go +++ b/test/utils/inference_service.go @@ -12,23 +12,13 @@ import ( type InferenceServiceData struct { Name string Namespace string + Replicas int TemplateRefs []string HFToken string HFEndpoint string IsKind bool } -func GetInferenceServiceData(name string, namespace string, templateRefs []string, hfToken string, hfEndpoint string, isKind bool) InferenceServiceData { - return InferenceServiceData{ - Name: name, - Namespace: namespace, - TemplateRefs: templateRefs, - HFToken: hfToken, - HFEndpoint: hfEndpoint, - IsKind: isKind, - } -} - // CreateInferenceService creates an InferenceService CR in the given namespace. func CreateInferenceService(namespace string, manifestPath string, data InferenceServiceData) (string, error) { rendered, err := RenderTemplate(manifestPath, data)