Skip to content

Commit

Permalink
Enable MI300 CI testing. (iree-org#17842)
Browse files Browse the repository at this point in the history
This commit enables mi300 gpu and model testing.

ci-exactly: build_all, test_amd_mi300, build_packages, regression_test

---------

Signed-off-by: saienduri <saimanas.enduri@amd.com>
Co-authored-by: Scott Todd <scott.todd0@gmail.com>
  • Loading branch information
saienduri and ScottTodd committed Jul 10, 2024
1 parent 6f25718 commit 9ac1015
Show file tree
Hide file tree
Showing 7 changed files with 243 additions and 3 deletions.
45 changes: 45 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -430,6 +430,50 @@ jobs:
run: |
./build_tools/cmake/ctest_all.sh ${BUILD_DIR}
test_amd_mi300:
needs: [setup, build_all]
if: contains(fromJson(needs.setup.outputs.enabled-jobs), 'test_amd_mi300')
env:
BUILD_DIR: build-tests
INSTALL_DIR: ${{ needs.build_all.outputs.install-dir }}
INSTALL_DIR_ARCHIVE: ${{ needs.build_all.outputs.install-dir-archive }}
INSTALL_DIR_GCS_URL: ${{ needs.build_all.outputs.install-dir-gcs-url }}
IREE_CPU_DISABLE: 1
IREE_VULKAN_DISABLE: 1
IREE_CUDA_DISABLE: 1
IREE_HIP_DISABLE: 0
IREE_HIP_TEST_TARGET_CHIP: "gfx942"
LD_LIBRARY_PATH: /home/esaimana/Python-3.11.9
runs-on: nodai-amdgpu-mi300-x86-64
steps:
- name: Pre Checkout MI300 Step
if: contains(matrix.name, 'gfx942')
run: |
sudo chmod -R 777 ~/actions-runner/_work
- name: "Checking out repository"
uses: actions/checkout@v4.1.7
- name: "Checking out runtime submodules"
run: ./build_tools/scripts/git/update_runtime_submodules.sh
- name: "Downloading install dir archive"
run: wget "${INSTALL_DIR_GCS_URL}" -O "${INSTALL_DIR_ARCHIVE}"
- name: "Extracting install directory"
run: tar -xf "${INSTALL_DIR_ARCHIVE}"
- name: "Building tests"
run: |
./build_tools/pkgci/build_tests_using_package.sh ${INSTALL_DIR}
- name: "Running GPU tests"
env:
IREE_CTEST_LABEL_REGEX: ^requires-gpu|^driver=hip$
IREE_NVIDIA_SM80_TESTS_DISABLE: 1
IREE_MULTI_DEVICE_TESTS_DISABLE: 0
IREE_AMD_RDNA3_TESTS_DISABLE: 1
IREE_NVIDIA_GPU_TESTS_DISABLE: 0
IREE_CUDA_DISABLE: 1
IREE_CPU_DISABLE: 1
IREE_HIP_DISABLE: 0
run: |
./build_tools/cmake/ctest_all.sh ${BUILD_DIR}
test_amd_w7900:
needs: [setup, build_all]
if: contains(fromJson(needs.setup.outputs.enabled-jobs), 'test_amd_w7900')
Expand Down Expand Up @@ -939,6 +983,7 @@ jobs:
- test_nvidia_gpu
- test_nvidia_a100
- test_amd_mi250
- test_amd_mi300
- test_amd_w7900

# Configurations
Expand Down
42 changes: 39 additions & 3 deletions .github/workflows/pkgci_regression_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -144,13 +144,20 @@ jobs:
runs-on: nodai-amdgpu-w7900-x86-64

# AMD GPU
- name: amdgpu_rocm_gfx90a
- name: amdgpu_rocm_mi250_gfx90a
models-config-file: models_gpu_rocm_gfx90a.json
models-extra-flags-config-file: models_gpu_rocm_gfx90a_additional_flags.json
sdxl-unet-config-file: sdxl_scheduled_unet_gpu_rocm_gfx90a.json
sdxl-vae-config-file: sdxl_vae_decode_gpu_rocm_gfx90a.json
sdxl-clip-config-file: sdxl_prompt_encoder_gpu_rocm_gfx90a.json
runs-on: nodai-amdgpu-mi250-x86-64
- name: amdgpu_rocm_mi300_gfx942
models-config-file: models_gpu_rocm_gfx942.json
models-extra-flags-config-file: models_gpu_rocm_gfx942_additional_flags.json
sdxl-unet-config-file: sdxl_scheduled_unet_gpu_rocm_gfx942.json
sdxl-vae-config-file: sdxl_vae_decode_gpu_rocm_gfx942.json
sdxl-clip-config-file: sdxl_prompt_encoder_gpu_rocm_gfx942.json
runs-on: nodai-amdgpu-mi300-x86-64
- name: amdgpu_vulkan
models-config-file: models_gpu_vulkan.json
runs-on: nodai-amdgpu-w7900-x86-64
Expand All @@ -174,7 +181,14 @@ jobs:
SDXL_CLIP_CONFIG_FILE_PATH: build_tools/pkgci/external_test_suite/${{ matrix.sdxl-clip-config-file }}
SDXL_VAE_CONFIG_FILE_PATH: build_tools/pkgci/external_test_suite/${{ matrix.sdxl-vae-config-file }}
VENV_DIR: ${{ github.workspace }}/venv
LD_LIBRARY_PATH: /home/esaimana/Python-3.11.9
steps:
# TODO(saienduri): Find alternative to this temporary step that manipulates permission of github actions
# directory to be able to clean after every PR
- name: Pre Checkout MI300 Step
if: contains(matrix.name, 'gfx942')
run: |
sudo chmod -R 777 ~/actions-runner/_work
- name: Checking out IREE repository
uses: actions/checkout@v4.1.7
with:
Expand Down Expand Up @@ -293,8 +307,8 @@ jobs:
--durations=0 \
--config-files=${SDXL_VAE_CONFIG_FILE_PATH}
- name: "Running SDXL rocm pipeline benchmark"
if: contains(matrix.name, 'rocm')
- name: "Running SDXL rocm pipeline benchmark (mi250)"
if: contains(matrix.name, 'rocm_mi250_gfx90a')
run: |
source ${VENV_DIR}/bin/activate
pytest SHARK-TestSuite/iree_tests/benchmarks/sdxl/benchmark_sdxl_rocm.py \
Expand All @@ -313,3 +327,25 @@ jobs:
--log-cli-level=info \
--retries 7
echo "$(<job_summary.md )" >> $GITHUB_STEP_SUMMARY
rm job_summary.md
- name: "Running SDXL rocm pipeline benchmark (mi300)"
if: contains(matrix.name, 'rocm_mi300_gfx942')
run: |
source ${VENV_DIR}/bin/activate
pytest SHARK-TestSuite/iree_tests/benchmarks/sdxl/benchmark_sdxl_rocm.py \
--goldentime-rocm-e2e-ms 320 \
--goldentime-rocm-unet-ms 77 \
--goldentime-rocm-clip-ms 15 \
--goldentime-rocm-vae-ms 74 \
--goldendispatch-rocm-unet 1714 \
--goldendispatch-rocm-clip 1569 \
--goldendispatch-rocm-vae 248 \
--goldensize-rocm-unet-bytes 2054938 \
--goldensize-rocm-clip-bytes 780328 \
--goldensize-rocm-vae-bytes 758509 \
--gpu-number 0 \
--rocm-chip gfx942 \
--log-cli-level=info \
--retries 7
echo "$(<job_summary.md )" >> $GITHUB_STEP_SUMMARY
28 changes: 28 additions & 0 deletions build_tools/pkgci/external_test_suite/models_gpu_rocm_gfx942.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
{
"config_name": "gpu_rocm",
"iree_compile_flags": [
"--iree-hal-target-backends=rocm",
"--iree-rocm-target-chip=gfx942",
"--iree-input-demote-f64-to-f32"
],
"iree_run_module_flags": [
"--device=hip"
],
"skip_compile_tests": [
"pytorch/models/sdxl-scheduled-unet-3-tank",
"pytorch/models/sdxl-prompt-encoder-tank",
"pytorch/models/sdxl-vae-decode-tank"
],
"skip_run_tests": [],
"expected_compile_failures": [
// TODO(#17344): need to regenerate .mlirbc
"pytorch/models/opt-125M",
"pytorch/models/resnet50",
"pytorch/models/sdxl-vae-decode-tank",

// error: 'builtin.module' op failed to run transform dialect passes
// (transform spec file is specific to SDXL?)
"sharktank/llama/open-llama-3b-v2-f16"
],
"expected_run_failures": []
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
{
"config_name": "gpu_rocm",
"iree_compile_flags": [
"--iree-hal-target-backends=rocm",
"--iree-rocm-target-chip=gfx942",
"--iree-input-demote-f64-to-f32",
"--iree-opt-const-eval=false",
"--iree-codegen-transform-dialect-library=${IREE_TEST_PATH_EXTENSION}/attention_and_matmul_spec.mlir"
],
"iree_run_module_flags": [
"--device=hip"
],
"skip_compile_tests": [
"pytorch/models/sdxl-scheduled-unet-3-tank",
"pytorch/models/sdxl-prompt-encoder-tank",
"pytorch/models/sdxl-vae-decode-tank"
],
"skip_run_tests": [],
"expected_compile_failures": [
// TODO(#17344): need to regenerate .mlirbc
"pytorch/models/opt-125M",
"pytorch/models/resnet50"
],
"expected_run_failures": []
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
{
"config_name": "gpu_rocm",
"iree_compile_flags": [
"--iree-hal-target-backends=rocm",
"--iree-rocm-target-chip=gfx942",
"--iree-input-type=torch",
"--iree-opt-const-eval=false",
"--iree-global-opt-propagate-transposes=true",
"--iree-opt-outer-dim-concat=true",
"--iree-rocm-waves-per-eu=2",
"--iree-llvmgpu-enable-prefetch",
"--iree-flow-enable-aggressive-fusion",
"--iree-global-opt-enable-fuse-horizontal-contractions=true",
"--iree-opt-aggressively-propagate-transposes=true",
"--iree-codegen-llvmgpu-use-vector-distribution=true",
"--iree-execution-model=async-external",
"--iree-preprocessing-pass-pipeline=builtin.module(iree-preprocessing-transpose-convolution-pipeline, util.func(iree-preprocessing-pad-to-intrinsics{pad-target-type=conv}))",
"--iree-scheduling-dump-statistics-format=json",
"--iree-scheduling-dump-statistics-file=compilation_info.json"
],
"iree_run_module_flags": [
"--device=hip",
"--parameters=model=real_weights.irpa",
"--input=1x64xi64=@inference_input.0.bin",
"--input=1x64xi64=@inference_input.1.bin",
"--input=1x64xi64=@inference_input.2.bin",
"--input=1x64xi64=@inference_input.3.bin",
"--expected_output=2x64x2048xf16=@inference_output.0.bin",
"--expected_output=2x1280xf16=@inference_output.1.bin",
"--expected_f16_threshold=1.0f"
],
"skip_compile_tests": [],
"skip_run_tests": [],
"expected_compile_failures": [],
"expected_run_failures": []
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
{
"config_name": "gpu_rocm",
"iree_compile_flags" : [
"--iree-hal-target-backends=rocm",
"--iree-rocm-target-chip=gfx942",
"--iree-opt-const-eval=false",
"--iree-codegen-transform-dialect-library=${IREE_TEST_PATH_EXTENSION}/attention_and_matmul_spec.mlir",
"--iree-global-opt-propagate-transposes=true",
"--iree-global-opt-enable-fuse-horizontal-contractions=true",
"--iree-flow-enable-aggressive-fusion=true",
"--iree-opt-aggressively-propagate-transposes=true",
"--iree-opt-outer-dim-concat=true",
"--iree-vm-target-truncate-unsupported-floats",
"--iree-llvmgpu-enable-prefetch=true",
"--iree-opt-data-tiling=false",
"--iree-codegen-gpu-native-math-precision=true",
"--iree-codegen-llvmgpu-use-vector-distribution",
"--iree-rocm-waves-per-eu=2",
"--iree-execution-model=async-external",
"--iree-preprocessing-pass-pipeline=builtin.module(iree-preprocessing-transpose-convolution-pipeline, util.func(iree-preprocessing-pad-to-intrinsics))",
"--iree-scheduling-dump-statistics-format=json",
"--iree-scheduling-dump-statistics-file=compilation_info.json"
],
"iree_run_module_flags": [
"--device=hip",
"--parameters=model=real_weights.irpa",
"--module=sdxl_scheduled_unet_pipeline_fp16_rocm.vmfb",
"--input=1x4x128x128xf16=@inference_input.0.bin",
"--input=2x64x2048xf16=@inference_input.1.bin",
"--input=2x1280xf16=@inference_input.2.bin",
"--input=1xf16=@inference_input.3.bin",
"--expected_output=1x4x128x128xf16=@inference_output.0.bin",
"--expected_f16_threshold=0.7f"
],
"skip_compile_tests": [],
"skip_run_tests": [],
"expected_compile_failures": [],
"expected_run_failures": [
"pytorch/models/sdxl-scheduled-unet-3-tank",
]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
{
"config_name": "gpu_rocm",
"iree_compile_flags" : [
"--iree-hal-target-backends=rocm",
"--iree-rocm-target-chip=gfx942",
"--iree-opt-const-eval=false",
"--iree-global-opt-propagate-transposes=true",
"--iree-opt-outer-dim-concat=true",
"--iree-llvmgpu-enable-prefetch=true",
"--iree-rocm-waves-per-eu=2",
"--iree-flow-enable-aggressive-fusion",
"--iree-codegen-llvmgpu-use-vector-distribution=true",
"--iree-execution-model=async-external",
"--iree-preprocessing-pass-pipeline=builtin.module(iree-preprocessing-transpose-convolution-pipeline, util.func(iree-preprocessing-pad-to-intrinsics))",
"--iree-scheduling-dump-statistics-format=json",
"--iree-scheduling-dump-statistics-file=compilation_info.json"
],
"iree_run_module_flags": [
"--device=hip",
"--parameters=model=real_weights.irpa",
"--input=1x4x128x128xf16=@inference_input.0.bin",
"--expected_output=1x3x1024x1024xf16=@inference_output.0.bin",
"--expected_f16_threshold=0.4f"
],
"skip_compile_tests": [],
"skip_run_tests": [],
"expected_compile_failures": [],
"expected_run_failures": []
}

0 comments on commit 9ac1015

Please sign in to comment.