Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .github/workflows/e2e-inference-perf-p-cluster.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,11 @@ name: e2e-inference-perf-p-cluster
# - MIF infrastructure: moai-inference-framework (Odin controller, etc.)
# - Preset: moai-inference-preset
# The test will only deploy heimdall and InferenceService in the test workload namespace.
#
# Result retrieval (after run):
# Inference-perf uploads results to S3 and then cleans up the workload namespace.
# To list or download result files from S3:
# aws s3 ls --profile s3 s3://moreh-benchmark/ --recursive

on:
workflow_dispatch:
Expand Down
4 changes: 4 additions & 0 deletions AGENTS.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,10 @@ The commit message should be structured as follows:

### E2E Test

- **Version scope**:
- E2E tests cover only `vX.Y.Z` (release) and `vX.Y.Z-rc.N` (release candidate) version formats.
- Other version formats (e.g. dev builds, custom tags) are out of scope and should not be tested in E2E.

- **Do not test resource specifications**:
- Do not validate individual fields of the YAML file declaring the resource (resource spec).
- Instead, create the resource and verify that its status reaches the expected state.
Expand Down
11 changes: 11 additions & 0 deletions test/e2e/performance/config/heimdall-values.yaml.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -34,3 +34,14 @@ gateway:
inferencePool:
targetPorts:
- number: 8000

{{- if not .IsKind }}
extraVolumes:
- name: models
persistentVolumeClaim:
claimName: models

Comment thread
This conversation was marked as resolved.
extraVolumeMounts:
- name: models
mountPath: /mnt/models
Comment thread
This conversation was marked as resolved.
{{- end }}
30 changes: 28 additions & 2 deletions test/e2e/performance/config/inference-service.yaml.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ metadata:
name: {{ .Name }}
namespace: {{ .Namespace }}
spec:
replicas: 1
replicas: {{ .Replicas }}
inferencePoolRefs:
- name: heimdall
templateRefs:
Expand All @@ -31,4 +31,30 @@ spec:
limits:
mellanox/hca: "1"
{{- end }}

{{- if not .IsKind }}
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: moai.moreh.io/accelerator.vendor
operator: In
values:
- amd
- key: moai.moreh.io/accelerator.model
operator: In
values:
- mi250
- key: kubernetes.io/hostname
operator: In
values:
- mi250-050
- mi250-054
Comment thread
This conversation was marked as resolved.
tolerations:
- key: amd.com/gpu
operator: Exists
effect: NoSchedule
- key: benchmark
operator: Exists
effect: NoSchedule
{{- end }}
72 changes: 54 additions & 18 deletions test/e2e/performance/performance_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,48 +39,84 @@ var _ = Describe("Inference Performance", Label("performance"), Ordered, func()
SetDefaultEventuallyPollingInterval(settings.IntervalShort)

BeforeAll(func() {
isKind := !envs.SkipKind

By("creating workload namespace")
Expect(utils.CreateWorkloadNamespace(envs.WorkloadNamespace, envs.MIFNamespace)).To(Succeed())

By("creating Gateway resources")
Expect(utils.CreateGatewayResource(envs.WorkloadNamespace, envs.GatewayClassName, envs.IstioRev)).To(Succeed())

var err error
if !isKind {
By("creating model PV")
pvName, err = utils.CreateModelPV(envs.WorkloadNamespace)
Expect(err).NotTo(HaveOccurred(), "failed to create model PV")

By("creating model PVC")
pvcName, err = utils.CreateModelPVC(envs.WorkloadNamespace)
Expect(err).NotTo(HaveOccurred(), "failed to create model PVC")
}

By("installing Heimdall")
data := struct {
MorehRegistrySecretName string
GatewayName string
GatewayClass string
IstioRev string
IsKind bool
}{
MorehRegistrySecretName: settings.MorehRegistrySecretName,
GatewayName: settings.GatewayName,
GatewayClass: envs.GatewayClassName,
IstioRev: envs.IstioRev,
IsKind: isKind,
}

values, err := utils.RenderTemplate(HeimdallValues, data)
Expect(err).NotTo(HaveOccurred(), "failed to render Heimdall values template")
Expect(utils.InstallHeimdall(envs.WorkloadNamespace, values)).To(Succeed())

if envs.SkipKind {
By("creating model PV")
pvName, err = utils.CreateModelPV(envs.WorkloadNamespace)
Expect(err).NotTo(HaveOccurred(), "failed to create model PV")

By("creating model PVC")
pvcName, err = utils.CreateModelPVC(envs.WorkloadNamespace)
Expect(err).NotTo(HaveOccurred(), "failed to create model PVC")
}

By("creating InferenceServices")
isKind := !envs.SkipKind
var prefillData, decodeData utils.InferenceServiceData
if isKind {
prefillData = utils.GetInferenceServiceData("prefill", envs.WorkloadNamespace, []string{"sim-prefill"}, envs.HFToken, envs.HFEndpoint, isKind)
decodeData = utils.GetInferenceServiceData("decode", envs.WorkloadNamespace, []string{"sim-decode"}, envs.HFToken, envs.HFEndpoint, isKind)
prefillData = utils.InferenceServiceData{
Name: "prefill",
Namespace: envs.WorkloadNamespace,
Replicas: 3,
TemplateRefs: []string{"sim-prefill"},
HFToken: envs.HFToken,
HFEndpoint: envs.HFEndpoint,
IsKind: isKind,
}
decodeData = utils.InferenceServiceData{
Name: "decode",
Namespace: envs.WorkloadNamespace,
Replicas: 5,
TemplateRefs: []string{"sim-decode"},
HFToken: envs.HFToken,
HFEndpoint: envs.HFEndpoint,
IsKind: isKind,
}
} else {
prefillData = utils.GetInferenceServiceData("prefill", envs.WorkloadNamespace, []string{"vllm-prefill", envs.TestTemplatePrefill, "vllm-hf-hub-offline"}, envs.HFToken, envs.HFEndpoint, isKind)
decodeData = utils.GetInferenceServiceData("decode", envs.WorkloadNamespace, []string{"vllm-decode", envs.TestTemplateDecode, "vllm-hf-hub-offline"}, envs.HFToken, envs.HFEndpoint, isKind)
prefillData = utils.InferenceServiceData{
Name: "prefill",
Namespace: envs.WorkloadNamespace,
Replicas: 3,
TemplateRefs: []string{"vllm-prefill", envs.TestTemplatePrefill, "vllm-hf-hub-offline"},
HFToken: envs.HFToken,
HFEndpoint: envs.HFEndpoint,
IsKind: isKind,
}
decodeData = utils.InferenceServiceData{
Name: "decode",
Namespace: envs.WorkloadNamespace,
Replicas: 5,
TemplateRefs: []string{"vllm-decode", envs.TestTemplateDecode, "vllm-hf-hub-offline"},
HFToken: envs.HFToken,
HFEndpoint: envs.HFEndpoint,
IsKind: isKind,
}
}
prefillServiceName, err = utils.CreateInferenceService(envs.WorkloadNamespace, InferenceServicePath, prefillData)
Expect(err).NotTo(HaveOccurred(), "failed to create prefill InferenceService")
Expand All @@ -103,6 +139,9 @@ var _ = Describe("Inference Performance", Label("performance"), Ordered, func()
utils.DeleteInferenceService(envs.WorkloadNamespace, prefillServiceName)
utils.DeleteInferenceService(envs.WorkloadNamespace, decodeServiceName)

By("deleting Heimdall")
utils.UninstallHeimdall(envs.WorkloadNamespace)

if envs.SkipKind {
By("deleting model PVC")
utils.DeleteModelPVC(envs.WorkloadNamespace, pvcName)
Expand All @@ -111,9 +150,6 @@ var _ = Describe("Inference Performance", Label("performance"), Ordered, func()
utils.DeleteModelPV(pvName)
}

By("deleting Heimdall")
utils.UninstallHeimdall(envs.WorkloadNamespace)

By("deleting Gateway resources")
utils.DeleteGatewayResource(envs.WorkloadNamespace, envs.GatewayClassName)

Expand Down
2 changes: 1 addition & 1 deletion test/e2e/quality/config/inference-service.yaml.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ metadata:
name: {{ .Name }}
namespace: {{ .Namespace }}
spec:
replicas: 2
replicas: {{ .Replicas }}
inferencePoolRefs:
- name: heimdall
templateRefs:
Expand Down
23 changes: 20 additions & 3 deletions test/e2e/quality/quality_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ var _ = Describe("Quality Benchmark", Label("quality"), Ordered, func() {
SetDefaultEventuallyPollingInterval(settings.IntervalShort)

BeforeAll(func() {
isKind := !envs.SkipKind

By("creating workload namespace")
Expect(utils.CreateWorkloadNamespace(envs.WorkloadNamespace, envs.MIFNamespace)).To(Succeed())

Expand Down Expand Up @@ -73,12 +75,27 @@ var _ = Describe("Quality Benchmark", Label("quality"), Ordered, func() {

By("creating InferenceServices")
// PD disaggregation environment cannot run tests normally, so we test in aggregate environment
isKind := !envs.SkipKind
var vllmData utils.InferenceServiceData
if isKind {
vllmData = utils.GetInferenceServiceData("vllm", envs.WorkloadNamespace, []string{"sim"}, envs.HFToken, envs.HFEndpoint, isKind)
vllmData = utils.InferenceServiceData{
Name: "vllm",
Namespace: envs.WorkloadNamespace,
Replicas: 2,
TemplateRefs: []string{"sim"},
HFToken: envs.HFToken,
HFEndpoint: envs.HFEndpoint,
IsKind: isKind,
}
} else {
vllmData = utils.GetInferenceServiceData("vllm", envs.WorkloadNamespace, []string{"vllm", envs.TestTemplateDecode, "vllm-hf-hub-offline"}, envs.HFToken, envs.HFEndpoint, isKind)
vllmData = utils.InferenceServiceData{
Name: "vllm",
Namespace: envs.WorkloadNamespace,
Replicas: 2,
TemplateRefs: []string{"vllm", envs.TestTemplateDecode, "vllm-hf-hub-offline"},
HFToken: envs.HFToken,
HFEndpoint: envs.HFEndpoint,
IsKind: isKind,
}
}
vllmServiceName, err = utils.CreateInferenceService(envs.WorkloadNamespace, InferenceServicePath, vllmData)
Expect(err).NotTo(HaveOccurred(), "failed to create vllm InferenceService")
Expand Down
12 changes: 1 addition & 11 deletions test/utils/inference_service.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,23 +12,13 @@ import (
type InferenceServiceData struct {
Name string
Namespace string
Replicas int
TemplateRefs []string
HFToken string
HFEndpoint string
IsKind bool
}

func GetInferenceServiceData(name string, namespace string, templateRefs []string, hfToken string, hfEndpoint string, isKind bool) InferenceServiceData {
return InferenceServiceData{
Name: name,
Namespace: namespace,
TemplateRefs: templateRefs,
HFToken: hfToken,
HFEndpoint: hfEndpoint,
IsKind: isKind,
}
}

// CreateInferenceService creates an InferenceService CR in the given namespace.
func CreateInferenceService(namespace string, manifestPath string, data InferenceServiceData) (string, error) {
rendered, err := RenderTemplate(manifestPath, data)
Expand Down
Loading