diff --git a/.github/workflows/build-image.yml b/.github/workflows/build-image.yml index 1be2b624a..f1d7fe5db 100644 --- a/.github/workflows/build-image.yml +++ b/.github/workflows/build-image.yml @@ -17,7 +17,7 @@ on: jobs: docker: name: Docker build ${{ matrix.name }} - runs-on: ubuntu-latest + runs-on: ${{ matrix.runner }} permissions: contents: read packages: write @@ -27,29 +27,15 @@ jobs: - name: cuda12.2 dockerfile: cuda12.2 tags: superbench/main:cuda12.2 + runner: ubuntu-latest - name: cuda11.1.1 dockerfile: cuda11.1.1 tags: superbench/main:cuda11.1.1,superbench/superbench:latest - - name: rocm5.1.3 - dockerfile: rocm5.1.x - tags: superbench/main:rocm5.1.3 - extra_args: >- - BASE_IMAGE=rocm/pytorch:rocm5.1.3_ubuntu20.04_py3.7_pytorch_1.11.0 - - name: rocm5.1.1 - dockerfile: rocm5.1.x - tags: superbench/main:rocm5.1.1 - extra_args: >- - BASE_IMAGE=rocm/pytorch:rocm5.1.1_ubuntu20.04_py3.7_pytorch_1.10.0 - - name: rocm5.0.1 - dockerfile: rocm5.0.x - tags: superbench/main:rocm5.0.1 - extra_args: >- - BASE_IMAGE=rocm/pytorch:rocm5.0.1_ubuntu18.04_py3.7_pytorch_1.9.0 - - name: rocm5.0 - dockerfile: rocm5.0.x - tags: superbench/main:rocm5.0 - extra_args: >- - BASE_IMAGE=rocm/pytorch:rocm5.0_ubuntu18.04_py3.7_pytorch_1.9.0 + runner: ubuntu-latest + - name: rocm5.7 + dockerfile: rocm5.7.x + tags: superbench/main:rocm5.7 + runner: [self-hosted, rocm-build] steps: - name: Checkout uses: actions/checkout@v2 @@ -57,24 +43,18 @@ jobs: submodules: recursive - name: Free disk space run: | - mkdir /tmp/emptydir + mkdir -p /tmp/emptydir for dir in /usr/share/swift /usr/share/dotnet /usr/local/share/powershell /usr/local/share/chromium /usr/local/lib/android /opt/ghc; do sudo rsync -a --delete /tmp/emptydir/ ${dir} done sudo apt-get clean - sudo docker rmi $(sudo docker images --format "{{.Repository}}:{{.Tag}}" --filter=reference="node" --filter=reference="buildpack-deps") + # Check if Docker images exist before trying to remove them + if sudo docker images -q --filter=reference="node" --filter=reference="buildpack-deps" | grep -q .; then + sudo docker rmi $(sudo docker images --format "{{.Repository}}:{{.Tag}}" --filter=reference="node" --filter=reference="buildpack-deps") + else + echo "No Docker images found with the specified references." + fi df -h - - name: Free Up GitHub Actions Ubuntu Runner Disk Space 🔧 - uses: hirnidrin/free-disk-space@main - with: - # This might remove tools that are actually needed, if set to "true" but frees about 6 GB - tool-cache: false - # All of these default to true, but feel free to set to "false" if necessary for your workflow - android: true - dotnet: true - haskell: true - large-packages: true - swap-storage: true - name: Prepare metadata id: metadata run: | diff --git a/dockerfile/rocm5.7.x.dockerfile b/dockerfile/rocm5.7.x.dockerfile new file mode 100644 index 000000000..ee762e9ee --- /dev/null +++ b/dockerfile/rocm5.7.x.dockerfile @@ -0,0 +1,175 @@ +ARG BASE_IMAGE=rocm/pytorch:rocm5.7_ubuntu22.04_py3.10_pytorch_2.0.1 + +FROM ${BASE_IMAGE} + +# OS: +# - Ubuntu: 22.04 +# - Docker Client: 20.10.8 +# ROCm: +# - ROCm: 5.7 +# Intel: +# - mlc: v3.10 + +LABEL maintainer="SuperBench" + +ENV DEBIAN_FRONTEND=noninteractive +RUN apt-get update && \ + apt-get -q install -y --no-install-recommends \ + autoconf \ + automake \ + build-essential \ + curl \ + dmidecode \ + git \ + hipify-clang \ + iproute2 \ + jq \ + libaio-dev \ + libboost-program-options-dev \ + libcap2 \ + libnuma-dev \ + libpci-dev \ + libssl-dev \ + libtinfo5 \ + libtool \ + lshw \ + net-tools \ + numactl \ + openssh-client \ + openssh-server \ + pciutils \ + rsync \ + sudo \ + util-linux \ + vim \ + wget \ + && \ + rm -rf /tmp/* + +ARG NUM_MAKE_JOBS=16 + +# Check if CMake is installed and its version +RUN cmake_version=$(cmake --version 2>/dev/null | grep -oP "(?<=cmake version )(\d+\.\d+)" || echo "0.0") && \ + required_version="3.26.4" && \ + if [ "$(printf "%s\n" "$required_version" "$cmake_version" | sort -V | head -n 1)" != "$required_version" ]; then \ + echo "existing cmake version is ${cmake_version}" && \ + cd /tmp && \ + wget -q https://github.com/Kitware/CMake/releases/download/v${required_version}/cmake-${required_version}.tar.gz && \ + tar xzf cmake-${required_version}.tar.gz && \ + cd cmake-${required_version} && \ + ./bootstrap --prefix=/usr --no-system-curl --parallel=16 && \ + make -j ${NUM_MAKE_JOBS} && \ + make install && \ + rm -rf /tmp/cmake-${required_version}* \ + else \ + echo "CMake version is greater than or equal to 3.23"; \ + fi + +# Install Docker +ENV DOCKER_VERSION=20.10.8 +RUN cd /tmp && \ + wget -q https://download.docker.com/linux/static/stable/x86_64/docker-${DOCKER_VERSION}.tgz -O docker.tgz && \ + tar --extract --file docker.tgz --strip-components 1 --directory /usr/local/bin/ && \ + rm docker.tgz + +# Update system config +RUN mkdir -p /root/.ssh && \ + touch /root/.ssh/authorized_keys && \ + mkdir -p /var/run/sshd && \ + sed -i "s/[# ]*PermitRootLogin prohibit-password/PermitRootLogin yes/" /etc/ssh/sshd_config && \ + sed -i "s/[# ]*PermitUserEnvironment no/PermitUserEnvironment yes/" /etc/ssh/sshd_config && \ + sed -i "s/[# ]*Port.*/Port 22/" /etc/ssh/sshd_config && \ + echo "* soft nofile 1048576\n* hard nofile 1048576" >> /etc/security/limits.conf && \ + echo "root soft nofile 1048576\nroot hard nofile 1048576" >> /etc/security/limits.conf + + +# Get Ubuntu version and set as an environment variable +RUN export UBUNTU_VERSION=$(lsb_release -r -s) +RUN echo "Ubuntu version: $UBUNTU_VERSION" +ENV UBUNTU_VERSION=${UBUNTU_VERSION} + +# Install OFED +ENV OFED_VERSION=5.9-0.5.6.0 +# Check if ofed_info is present and has a version +RUN if ! command -v ofed_info >/dev/null 2>&1; then \ + echo "OFED not found. Installing OFED..."; \ + cd /tmp && \ + wget -q http://content.mellanox.com/ofed/MLNX_OFED-${OFED_VERSION}/MLNX_OFED_LINUX-${OFED_VERSION}-ubuntu${UBUNTU_VERSION}-x86_64.tgz && \ + tar xzf MLNX_OFED_LINUX-${OFED_VERSION}-ubuntu${UBUNTU_VERSION}-x86_64.tgz && \ + PATH=/usr/bin:${PATH} MLNX_OFED_LINUX-${OFED_VERSION}-ubuntu${UBUNTU_VERSION}-x86_64/mlnxofedinstall --user-space-only --without-fw-update --force --all && \ + rm -rf MLNX_OFED_LINUX-${OFED_VERSION}* ; \ + fi + +# Install UCX +ENV UCX_VERSION=1.14.1 +RUN if [ -z "$(ls -A /opt/ucx)" ]; then \ + echo "/opt/ucx is empty. Installing UCX..."; \ + cd /tmp && \ + git clone https://github.com/openucx/ucx.git -b v${UCX_VERSION} && \ + cd ucx && \ + ./autogen.sh && \ + mkdir build && \ + cd build && \ + ../configure -prefix=$UCX_DIR --with-rocm=/opt/rocm --without-knem && \ + make -j $(nproc) && make -j $(nproc) install && rm -rf /tmp/ucx-${UCX_VERSION} ; \ + else \ + echo "/opt/ucx is not empty. Skipping UCX installation."; \ + fi + +# Install OpenMPI +ENV OPENMPI_VERSION=4.1.x +# Check if Open MPI is installed +RUN [ -d /usr/local/bin/mpirun ] || { \ + echo "Open MPI not found. Installing Open MPI..." && \ + cd /tmp && \ + git clone --recursive https://github.com/open-mpi/ompi.git -b v${OPENMPI_VERSION} && \ + cd ompi && \ + ./autogen.pl && \ + mkdir build && \ + cd build && \ + ../configure --prefix=/usr/local --enable-orterun-prefix-by-default --enable-mpirun-prefix-by-default --enable-prte-prefix-by-default --enable-mca-no-build=btl-uct --with-ucx=/opt/ucx --with-rocm=/opt/rocm && \ + make -j $(nproc) && \ + make -j $(nproc) install && \ + ldconfig && \ + cd / && \ + rm -rf /tmp/openmpi-${OPENMPI_VERSION}* ;\ + } + +# Install Intel MLC +RUN cd /tmp && \ + wget -q https://downloadmirror.intel.com/763324/mlc_v3.10.tgz -O mlc.tgz && \ + tar xzf mlc.tgz Linux/mlc && \ + cp ./Linux/mlc /usr/local/bin/ && \ + rm -rf ./Linux mlc.tgz + +# Install RCCL +RUN cd /opt/ && \ + git clone https://github.com/ROCmSoftwarePlatform/rccl.git && \ + cd rccl && \ + mkdir build && \ + cd build && \ + CXX=/opt/rocm/bin/hipcc cmake -DCMAKE_PREFIX_PATH=/opt/rocm/ .. && \ + make -j${NUM_MAKE_JOBS} + +ENV PATH="/opt/superbench/bin:/usr/local/bin/:/opt/rocm/hip/bin/:/opt/rocm/bin/:${PATH}" \ + LD_PRELOAD="/opt/rccl/build/librccl.so:$LD_PRELOAD" \ + LD_LIBRARY_PATH="/opt/ucx/lib:/usr/local/lib/:/opt/rocm/lib:${LD_LIBRARY_PATH}" \ + SB_HOME=/opt/superbench \ + SB_MICRO_PATH=/opt/superbench \ + ANSIBLE_DEPRECATION_WARNINGS=FALSE \ + ANSIBLE_COLLECTIONS_PATH=/usr/share/ansible/collections + +RUN echo PATH="$PATH" > /etc/environment && \ + echo LD_LIBRARY_PATH="$LD_LIBRARY_PATH" >> /etc/environment && \ + echo SB_MICRO_PATH="$SB_MICRO_PATH" >> /etc/environment + +WORKDIR ${SB_HOME} + +ADD . . +RUN apt install rocm-cmake -y && \ + python3 -m pip install --upgrade pip wheel setuptools==65.7 && \ + python3 -m pip install .[amdworker] && \ + make postinstall +RUN make cppbuild +ADD third_party third_party +RUN make RCCL_HOME=/opt/rccl/build/ ROCBLAS_BRANCH=release/rocm-rel-5.7.1.1 HIPBLASLT_BRANCH=release-staging/rocm-rel-5.7 ROCM_VER=rocm-5.5.0 -C third_party rocm -o cpu_hpl -o cpu_stream -o megatron_lm diff --git a/docs/developer-guides/using-docker.mdx b/docs/developer-guides/using-docker.mdx index b73891853..a4b0fb1d2 100644 --- a/docs/developer-guides/using-docker.mdx +++ b/docs/developer-guides/using-docker.mdx @@ -39,7 +39,7 @@ docker buildx build \ export DOCKER_BUILDKIT=1 docker buildx build \ --platform linux/amd64 --cache-to type=inline,mode=max \ - --tag superbench-dev --file dockerfile/rocm5.1.x.dockerfile . + --tag superbench-dev --file dockerfile/rocm5.7.x.dockerfile . ``` diff --git a/setup.py b/setup.py index c78988d11..a0b859c14 100644 --- a/setup.py +++ b/setup.py @@ -184,7 +184,7 @@ def run(self): **x, 'develop': x['dev'] + x['test'], 'cpuworker': x['torch'], - 'amdworker': x['torch'] + x['ort'] + x['amd'], + 'amdworker': x['torch'] + x['amd'], 'nvworker': x['torch'] + x['ort'] + x['nvidia'], } )( diff --git a/superbench/benchmarks/micro_benchmarks/gpu_copy_performance/CMakeLists.txt b/superbench/benchmarks/micro_benchmarks/gpu_copy_performance/CMakeLists.txt index 6b9b2d38f..9d12f90ce 100644 --- a/superbench/benchmarks/micro_benchmarks/gpu_copy_performance/CMakeLists.txt +++ b/superbench/benchmarks/micro_benchmarks/gpu_copy_performance/CMakeLists.txt @@ -14,25 +14,24 @@ if(CUDAToolkit_FOUND) include(../cuda_common.cmake) add_executable(gpu_copy gpu_copy.cu) set_property(TARGET gpu_copy PROPERTY CUDA_ARCHITECTURES ${NVCC_ARCHS_SUPPORTED}) + target_link_libraries(gpu_copy numa) else() # ROCm environment include(../rocm_common.cmake) - find_package(HIP QUIET) - if(HIP_FOUND) + find_package(hip QUIET) + if(hip_FOUND) message(STATUS "Found ROCm: " ${HIP_VERSION}) - # Convert cuda code to hip code inplace - execute_process(COMMAND hipify-perl -inplace -print-stats gpu_copy.cu - WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/) + # Convert cuda code to hip code in cpp + execute_process(COMMAND hipify-perl -print-stats -o gpu_copy.cpp gpu_copy.cu WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/) - # Add HIP targets - set_source_files_properties(gpu_copy.cu PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1) - # Link with HIP - hip_add_executable(gpu_copy gpu_copy.cu) + # link hip device lib + add_executable(gpu_copy gpu_copy.cpp) + add_compile_options(-O2) + target_link_libraries(gpu_copy numa hip::device) else() message(FATAL_ERROR "No CUDA or ROCm environment found.") endif() endif() -install(TARGETS gpu_copy RUNTIME DESTINATION bin) -target_link_libraries(gpu_copy numa) +install(TARGETS gpu_copy RUNTIME DESTINATION bin) \ No newline at end of file diff --git a/superbench/benchmarks/micro_benchmarks/kernel_launch_overhead/CMakeLists.txt b/superbench/benchmarks/micro_benchmarks/kernel_launch_overhead/CMakeLists.txt index 3c98bc7da..5f890e0f3 100644 --- a/superbench/benchmarks/micro_benchmarks/kernel_launch_overhead/CMakeLists.txt +++ b/superbench/benchmarks/micro_benchmarks/kernel_launch_overhead/CMakeLists.txt @@ -18,18 +18,16 @@ if(CUDAToolkit_FOUND) else() # ROCm environment include(../rocm_common.cmake) - find_package(HIP QUIET) - if(HIP_FOUND) + find_package(hip QUIET) + if(hip_FOUND) message(STATUS "Found HIP: " ${HIP_VERSION}) - # Convert cuda code to hip code inplace - execute_process(COMMAND hipify-perl -inplace -print-stats kernel_launch.cu - WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/) + # Convert cuda code to hip code in cpp + execute_process(COMMAND hipify-perl -print-stats -o kernel_launch.cpp kernel_launch.cu WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/) - # Add HIP targets - set_source_files_properties(kernel_launch.cu PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1) - # Link with HIP - hip_add_executable(kernel_launch_overhead kernel_launch.cu) + # link hip device lib + add_executable(kernel_launch_overhead kernel_launch.cpp) + target_link_libraries(kernel_launch_overhead hip::device) # Install tergets install(TARGETS kernel_launch_overhead RUNTIME DESTINATION bin) else() diff --git a/superbench/benchmarks/micro_benchmarks/rocm_common.cmake b/superbench/benchmarks/micro_benchmarks/rocm_common.cmake index d42a25b5f..be60df127 100644 --- a/superbench/benchmarks/micro_benchmarks/rocm_common.cmake +++ b/superbench/benchmarks/micro_benchmarks/rocm_common.cmake @@ -3,14 +3,36 @@ # Set ROCM_PATH if(NOT DEFINED ENV{ROCM_PATH}) - set(ROCM_PATH /opt/rocm) + # Run hipconfig -p to get ROCm path + execute_process( + COMMAND hipconfig -R + RESULT_VARIABLE HIPCONFIG_RESULT + OUTPUT_VARIABLE ROCM_PATH + OUTPUT_STRIP_TRAILING_WHITESPACE + ) + + # Check if hipconfig was successful + if(NOT HIPCONFIG_RESULT EQUAL 0) + message(FATAL_ERROR "Failed to run hipconfig -p. Make sure ROCm is installed and hipconfig is available.") + endif() + else() set(ROCM_PATH $ENV{ROCM_PATH}) endif() # Set HIP_PATH if(NOT DEFINED ENV{HIP_PATH}) - set(HIP_PATH ${ROCM_PATH}/hip) + execute_process( + COMMAND hipconfig -p + RESULT_VARIABLE HIPCONFIG_RESULT + OUTPUT_VARIABLE HIP_PATH + OUTPUT_STRIP_TRAILING_WHITESPACE + ) + + # Check if hipconfig was successful + if(NOT HIPCONFIG_RESULT EQUAL 0) + message(FATAL_ERROR "Failed to run hipconfig -p. Make sure ROCm is installed and hipconfig is available.") + endif() else() set(HIP_PATH $ENV{HIP_PATH}) endif() @@ -24,6 +46,8 @@ message(STATUS "CMAKE HIP ARCHITECTURES: ${CMAKE_HIP_ARCHITECTURES}") if(EXISTS ${HIP_PATH}) # Search for hip in common locations list(APPEND CMAKE_PREFIX_PATH ${HIP_PATH} ${ROCM_PATH}) + set(CMAKE_PREFIX_PATH /opt/rocm ROCM_PATH) + set(CMAKE_CXX_COMPILER "${HIP_PATH}/bin/hipcc") set(CMAKE_MODULE_PATH "${HIP_PATH}/cmake" ${CMAKE_MODULE_PATH}) set(CMAKE_MODULE_PATH "${HIP_PATH}/lib/cmake/hip" ${CMAKE_MODULE_PATH}) endif() diff --git a/third_party/Makefile b/third_party/Makefile index db0397cac..41550ea89 100755 --- a/third_party/Makefile +++ b/third_party/Makefile @@ -10,6 +10,7 @@ HPCX_HOME ?= /opt/hpcx CUDA_VER ?= $(shell nvcc --version | grep 'release' | awk '{print $$6}' | cut -c2- | cut -d '.' -f1-2) ROCBLAS_BRANCH ?= rocm-$(shell dpkg -l | grep 'rocm-dev ' | awk '{print $$3}' | cut -d '.' -f1-3) +HIPBLASLT_BRANCH ?= rocm-$(shell dpkg -l | grep 'rocm-dev ' | awk '{print $$3}' | cut -d '.' -f1-3) .PHONY: all cuda_with_msccl cuda rocm common cuda_cutlass cuda_bandwidthTest cuda_nccl_tests cuda_perftest cuda_msccl rocm_perftest fio rocm_rccl_tests rocm_rocblas rocm_bandwidthTest gpcnet cuda_gpuburn cpu_stream cpu_hpl directx_amf_encoding_latency directx_amd rocm_hipblaslt megatron_lm megatron_deepspeed @@ -97,12 +98,13 @@ endif # The version we use is the released tag which is consistent with the rocm version in the environment or docker. # Since it takes several hours to build, avoid to build again if rocblas-bench exsists. rocm_rocblas: sb_micro_path -ifeq (, $(wildcard $(SB_MICRO_PATH)/bin/rocblas-bench)) - if [ -d rocBLAS ]; then rm -rf rocBLAS; fi - git clone -b ${ROCBLAS_BRANCH} https://github.com/ROCmSoftwarePlatform/rocBLAS.git ./rocBLAS - cd ./rocBLAS && ./install.sh --dependencies --clients-only - cp -v ./rocBLAS/build/release/clients/staging/rocblas-bench $(SB_MICRO_PATH)/bin/ -endif + @if [ ! -e $(SB_MICRO_PATH)/bin/rocblas-bench ] && [ -z `which rocblas-bench` ]; then \ + if [ -d rocBLAS ]; then rm -rf rocBLAS; fi; \ + git clone -b ${ROCBLAS_BRANCH} https://github.com/ROCmSoftwarePlatform/rocBLAS.git ./rocBLAS; \ + sed -i 's|#include "gemm.hpp"|#include "Tensile/gemm.hpp"|' rocBLAS/clients/benchmarks/../../library/src/blas3/rocblas_trtri.hpp; \ + cd ./rocBLAS && ./install.sh --dependencies --clients-only; \ + cp -v $(SB_MICRO_PATH)/third_party/rocBLAS/build/release/clients/staging/rocblas-bench $(SB_MICRO_PATH)/bin/; \ + fi # Build hipblaslt-bench. # hipBLASLt is released with rocm, like rocm-4.2.0 and so on. @@ -111,18 +113,18 @@ endif rocm_hipblaslt: sb_micro_path @if [ ! -e $(SB_MICRO_PATH)/bin/hipblaslt-bench ] && [ -z `which hipblaslt-bench` ]; then \ if [ -d hipBLASLt ]; then rm -rf hipBLASLt; fi; \ - git clone -b ${ROCBLAS_BRANCH} https://github.com/ROCmSoftwarePlatform/hipBLASLt.git ./hipBLASLt; \ + git clone -b ${HIPBLASLT_BRANCH} https://github.com/ROCmSoftwarePlatform/hipBLASLt.git ./hipBLASLt; \ cd ./hipBLASLt && ./install.sh -dc; \ - cp -v ./hipBLASLt/build/release/clients/staging/hipblaslt-bench $(SB_MICRO_PATH)/bin/; \ + cp -v $(SB_MICRO_PATH)/third_party/hipBLASLt/build/release/clients/staging/hipblaslt-bench $(SB_MICRO_PATH)/bin/; \ fi # Build hipBusBandwidth. # HIP is released with rocm, like rocm-4.2.0 and so on. # The version we use is the released tag which is consistent with the rocm version in the environment or docker. rocm_bandwidthTest: sb_micro_path - cp -r -v $(shell hipconfig -p)/samples/1_Utils/hipBusBandwidth ./ - cd ./hipBusBandwidth/ && mkdir -p build && cd build && cmake .. && make - cp -v ./hipBusBandwidth/build/hipBusBandwidth $(SB_MICRO_PATH)/bin/ + git clone -b ${ROCM_VER} https://github.com/ROCm-Developer-Tools/HIP.git + cd ./HIP/samples/1_Utils/hipBusBandwidth/ && mkdir -p build && cd build && cmake .. && make + cp -v ./HIP/samples/1_Utils/hipBusBandwidth/build/hipBusBandwidth $(SB_MICRO_PATH)/bin/ # Build GPCNET from commit c56fd9. gpcnet: sb_micro_path