Skip to content

Commit

Permalink
Release - SuperBench v0.3.0 (#212)
Browse files Browse the repository at this point in the history
**Description**

Cherry-pick  bug fixes from v0.3.0 to main.

**Major Revisions**
* Docs - Upgrade version and release note (#209)
* Benchmarks: Build Pipeline - Update rccl-test git submodule to dc1ad48 (#210)
* Benchmarks: Update - Update benchmarks in configuration file (#208)
* CI/CD - Update GitHub Action VM (#211)
* Benchmarks: Fix Bug - Fix wrong parameters for gpu-sm-copy-bw in configuration examples (#203)
* CI/CD - Fix bug in build image for push event (#205)
* Benchmark: Fix Bug - fix error message of communication-computation-overlap (#204)
* Tool: Fix bug - Fix function naming issue in system info  (#200)
* CI/CD - Push images in GitHub Action (#202)
* Bug - Fix torch.distributed command for single node (#201)
* CLI - Integrate system info for node (#199)
* Benchmarks: Code Revision - Revise CMake files for microbenchmarks. (#196)
* CI/CD - Add ROCm image build in GitHub Actions (#194)
* Bug: Fix bug - fix bug of hipBusBandwidth build (#193)
* Benchmarks: Build Pipeline - Restore rocblas build logic (#197)
* Bug: Fix Bug - Add barrier before 'destroy_process_group' in model benchmarks (#198)
* Bug - Revise 'docker run' in sb deploy (#195)
* Bug - Fix Bug : fix bug of error param operations to operation in rccl-bw of hpe config (#190)

Co-authored-by: Yuting Jiang <v-yujiang@microsoft.com>
Co-authored-by: Guoshuai Zhao <guzhao@microsoft.com>
Co-authored-by: Ziyue Yang <ziyyang@microsoft.com>
  • Loading branch information
4 people committed Sep 26, 2021
1 parent 37b15db commit dfbd70b
Show file tree
Hide file tree
Showing 35 changed files with 538 additions and 251 deletions.
57 changes: 47 additions & 10 deletions .github/workflows/build-image.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,32 @@ on:
push:
branches:
- main
- release/*
pull_request:
branches:
- main
- release/*
release:
types:
- published
workflow_dispatch:

jobs:
docker:
name: Docker build
name: Docker build ${{ matrix.name }}
runs-on: ubuntu-latest
permissions:
contents: read
packages: write
strategy:
matrix:
include:
- name: cuda11.1.1
tags: superbench/main:cuda11.1.1,superbench/superbench:latest
- name: rocm4.2-pytorch1.7.0
tags: superbench/main:rocm4.2-pytorch1.7.0
- name: rocm4.0-pytorch1.7.0
tags: superbench/main:rocm4.0-pytorch1.7.0
steps:
- name: Checkout
uses: actions/checkout@v2
Expand All @@ -26,43 +43,63 @@ jobs:
done
sudo apt-get clean
df -h
echo 'nproc: '$(nproc)
- name: Prepare metadata
id: metadata
run: |
DOCKER_IMAGE=superbench/superbench
IMAGE_TAG=latest
TAGS=${{ matrix.tags }}
if [[ "${{ github.event_name }}" == "push" ]] && [[ "${{ github.ref }}" == "refs/heads/release/"* ]]; then
TAGS=$(sed "s/main:/release:${GITHUB_REF##*/}-/g" <<< ${TAGS})
fi
if [[ "${{ github.event_name }}" == "pull_request" ]] && [[ "${{ github.base_ref }}" == "release/"* ]]; then
TAGS=$(sed "s/main:/release:${GITHUB_BASE_REF##*/}-/g" <<< ${TAGS})
fi
if [[ "${{ github.event_name }}" == "release" ]]; then
TAGS=$(sed "s/main:/superbench:${GITHUB_REF##*/}-/g" <<< ${TAGS})
GHCR_TAG=$(cut -d, -f1 <<< ${TAGS} | sed "s#superbench/superbench#ghcr.io/${{ github.repository }}/superbench#g")
TAGS="${TAGS},${GHCR_TAG}"
fi
if [[ "${{ github.event_name }}" == "workflow_dispatch" ]]; then
TAGS=$(sed "s/main:/dev:/g" <<< ${TAGS})
fi
DOCKERFILE=dockerfile/${{ matrix.name }}.dockerfile
DOCKERFILE=dockerfile/cuda11.1.1.dockerfile
TAGS="${DOCKER_IMAGE}:${IMAGE_TAG}"
CACHE_FROM="type=registry,ref=${DOCKER_IMAGE}:${IMAGE_TAG}"
CACHE_FROM="type=registry,ref=$(cut -d, -f1 <<< ${TAGS})"
CACHE_TO=""
if [ "${{ github.event_name }}" = "push" ]; then
if [[ "${{ github.event_name }}" != "pull_request" ]]; then
CACHE_TO="type=inline,mode=max"
fi
echo ::set-output name=dockerfile::${DOCKERFILE}
echo ::set-output name=tags::${TAGS}
echo ::set-output name=cache_from::${CACHE_FROM}
echo ::set-output name=cache_to::${CACHE_TO}
- name: Echo image tag
run: echo ${{ steps.metadata.outputs.tags }}
- name: Set up QEMU
uses: docker/setup-qemu-action@v1
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v1
- name: Login to Docker Hub
uses: docker/login-action@v1
if: ${{ github.event_name == 'push' }}
if: ${{ github.event_name != 'pull_request' }}
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
- name: Login to the GitHub Container Registry
uses: docker/login-action@v1
if: ${{ github.event_name == 'release' }}
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Build and push
id: docker_build
uses: docker/build-push-action@v2
with:
platforms: linux/amd64
context: .
file: ${{ steps.metadata.outputs.dockerfile }}
push: ${{ github.event_name == 'push' }}
push: ${{ github.event_name != 'pull_request' }}
tags: ${{ steps.metadata.outputs.tags }}
cache-from: ${{ steps.metadata.outputs.cache_from }}
cache-to: ${{ steps.metadata.outputs.cache_to }}
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/lint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ on:
jobs:
spelling:
name: Spelling check
runs-on: ubuntu-16.04
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v2
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

__SuperBench__ is a validation and profiling tool for AI infrastructure.

📢 [v0.2.1](https://github.com/microsoft/superbenchmark/releases/tag/v0.2.1) has been released!
📢 [v0.3.0](https://github.com/microsoft/superbenchmark/releases/tag/v0.3.0) has been released!

## _Check [aka.ms/superbench](https://aka.ms/superbench) for more details._

Expand Down
2 changes: 1 addition & 1 deletion dockerfile/rocm4.0-pytorch1.7.0.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ ENV PATH="${PATH}" \
WORKDIR ${SB_HOME}

ADD third_party third_party
RUN ROCM_VERSION=rocm-4.0.0 make -j -C third_party rocm
RUN ROCM_VERSION=rocm-4.0.0 make -j -C third_party -o rocm_rocblas rocm

# Workaround for image having package installed in user path
RUN mv /root/.local/bin/* /opt/conda/bin/ && \
Expand Down
5 changes: 4 additions & 1 deletion docs/developer-guides/using-docker.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,10 @@ docker buildx build \
<TabItem value='rocm'>

```bash
# coming soon
export DOCKER_BUILDKIT=1
docker buildx build \
--platform linux/amd64 --cache-to type=inline,mode=max \
--tag superbench-dev --file dockerfile/rocm4.2-pytorch1.7.0.dockerfile .
```

</TabItem>
Expand Down
2 changes: 1 addition & 1 deletion docs/getting-started/installation.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ You can clone the source from GitHub and build it.
:::note Note
You should checkout corresponding tag to use release version, for example,

`git clone -b v0.2.1 https://github.com/microsoft/superbenchmark`
`git clone -b v0.3.0 https://github.com/microsoft/superbenchmark`
:::

```bash
Expand Down
2 changes: 1 addition & 1 deletion docs/getting-started/run-superbench.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ sb deploy -f remote.ini --host-password [password]
:::note Note
You should deploy corresponding Docker image to use release version, for example,

`sb deploy -f local.ini -i superbench/superbench:v0.2.1-cuda11.1.1`
`sb deploy -f local.ini -i superbench/superbench:v0.3.0-cuda11.1.1`
:::

## Run
Expand Down
2 changes: 1 addition & 1 deletion docs/superbench-config.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ superbench:
<TabItem value='example'>

```yaml
version: v0.2
version: v0.3
superbench:
enable: benchmark_1
var:
Expand Down
6 changes: 5 additions & 1 deletion docs/tutorial/container-images.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,17 @@ available tags are listed below for all stable versions.

| Tag | Description |
| ----------------- | ---------------------------------- |
| v0.3.0-cuda11.1.1 | SuperBench v0.3.0 with CUDA 11.1.1 |
| v0.2.1-cuda11.1.1 | SuperBench v0.2.1 with CUDA 11.1.1 |
| v0.2.0-cuda11.1.1 | SuperBench v0.2.0 with CUDA 11.1.1 |

</TabItem>
<TabItem value='rocm'>

Coming soon.
| Tag | Description |
| --------------------------- | ---------------------------------------------- |
| v0.3.0-rocm4.2-pytorch1.7.0 | SuperBench v0.3.0 with ROCm 4.2, PyTorch 1.7.0 |
| v0.3.0-rocm4.0-pytorch1.7.0 | SuperBench v0.3.0 with ROCm 4.0, PyTorch 1.7.0 |

</TabItem>
</Tabs>
2 changes: 1 addition & 1 deletion superbench/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,5 @@
Provide hardware and software benchmarks for AI systems.
"""

__version__ = '0.2.1'
__version__ = '0.3.0'
__author__ = 'Microsoft'
4 changes: 3 additions & 1 deletion superbench/benchmarks/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
# Copyright (c) Microsoft Corporation - All rights reserved
# Licensed under the MIT License

set -e

SB_MICRO_PATH="${SB_MICRO_PATH:-/usr/local}"

Expand All @@ -12,6 +13,7 @@ for dir in micro_benchmarks/*/ ; do
BUILD_ROOT=$dir/build
mkdir -p $BUILD_ROOT
cmake -DCMAKE_INSTALL_PREFIX=$SB_MICRO_PATH -DCMAKE_BUILD_TYPE=Release -S $SOURCE_DIR -B $BUILD_ROOT
cmake --build $BUILD_ROOT --target install
cmake --build $BUILD_ROOT
cmake --install $BUILD_ROOT
fi
done
Original file line number Diff line number Diff line change
Expand Up @@ -264,11 +264,7 @@ def _postprocess(self):
torch.distributed.destroy_process_group()
except BaseException as e:
self._result.set_return_code(ReturnCode.DISTRIBUTED_SETTING_DESTROY_FAILURE)
logger.error(
'Post process failed - benchmark: {}, mode: {}, message: {}.'.format(
self._name, self._args.mode, str(e)
)
)
logger.error('Post process failed - benchmark: {}, message: {}.'.format(self._name, str(e)))
return False

return True
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@
cmake_minimum_required(VERSION 3.18)
project(cublas_benchmark LANGUAGES CXX)

include(../cuda_common.cmake)
find_package(CUDAToolkit QUIET)
if(CUDAToolkit_FOUND)
include(../cuda_common.cmake)
set(SRC "cublas_helper.cpp" CACHE STRING "source file")
set(TARGET_NAME "cublas_function" CACHE STRING "target name")

Expand All @@ -25,8 +25,8 @@ if(CUDAToolkit_FOUND)
add_subdirectory(${json_SOURCE_DIR} ${json_BINARY_DIR} EXCLUDE_FROM_ALL)
endif()

add_executable(cublas_benchmark cublas_test.cpp)
target_link_libraries(cublas_benchmark ${TARGET_NAME} nlohmann_json::nlohmann_json CUDA::cudart CUDA::cublas)
add_executable(cublas_benchmark cublas_test.cpp)
target_link_libraries(cublas_benchmark ${TARGET_NAME} nlohmann_json::nlohmann_json CUDA::cudart CUDA::cublas)

install(TARGETS cublas_benchmark ${TARGET_NAME} RUNTIME DESTINATION bin LIBRARY DESTINATION lib)
endif()
2 changes: 2 additions & 0 deletions superbench/benchmarks/micro_benchmarks/cuda_common.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ if(NOT DEFINED CMAKE_CUDA_STANDARD)
set(CMAKE_CUDA_STANDARD_REQUIRED ON)
endif()

enable_language(CUDA)

if(NOT DEFINED NVCC_ARCHS_SUPPORTED)
# Reference: https://github.com/NVIDIA/cutlass/blob/0e137486498a52954eff239d874ee27ab23358e7/CMakeLists.txt#L89
set(NVCC_ARCHS_SUPPORTED "")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@
cmake_minimum_required(VERSION 3.18)
project(cudnn_benchmark LANGUAGES CXX)

include(../cuda_common.cmake)
find_package(CUDAToolkit QUIET)
if(CUDAToolkit_FOUND)
include(../cuda_common.cmake)
set(SRC "cudnn_helper.cpp" CACHE STRING "source file")
set(TARGET_NAME "cudnn_function" CACHE STRING "target name")

Expand All @@ -28,7 +28,7 @@ if(CUDAToolkit_FOUND)
add_subdirectory(${json_SOURCE_DIR} ${json_BINARY_DIR} EXCLUDE_FROM_ALL)
endif()

add_executable(cudnn_benchmark cudnn_test.cpp)
target_link_libraries(cudnn_benchmark ${TARGET_NAME} nlohmann_json::nlohmann_json CUDA::cudart ${CUDNN_LIBRARY})
add_executable(cudnn_benchmark cudnn_test.cpp)
target_link_libraries(cudnn_benchmark ${TARGET_NAME} nlohmann_json::nlohmann_json CUDA::cudart ${CUDNN_LIBRARY})
install(TARGETS cudnn_benchmark ${TARGET_NAME} RUNTIME DESTINATION bin LIBRARY DESTINATION lib)
endif()
Original file line number Diff line number Diff line change
Expand Up @@ -5,36 +5,34 @@ cmake_minimum_required(VERSION 3.18)

project(gpu_sm_copy LANGUAGES CXX)

include(../cuda_common.cmake)
find_package(CUDAToolkit QUIET)
include(../rocm_common.cmake)
find_package(HIP QUIET)

# Cuda environment
if(CUDAToolkit_FOUND)
message(STATUS "Found CUDA: " ${CUDAToolkit_VERSION})
enable_language(CUDA)

include(../cuda_common.cmake)
add_executable(gpu_sm_copy gpu_sm_copy.cu)
set_property(TARGET gpu_sm_copy PROPERTY CUDA_ARCHITECTURES ${NVCC_ARCHS_SUPPORTED})
install(TARGETS gpu_sm_copy RUNTIME DESTINATION bin)

# ROCm environment
elseif(HIP_FOUND)
message(STATUS "Found ROCm: " ${HIP_VERSION})

# Convert cuda code to hip code inplace
execute_process(COMMAND hipify-perl -inplace -print-stats gpu_sm_copy.cu
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/)

# Add HIP targets
set_source_files_properties(gpu_sm_copy.cu PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1)
# Link with HIP
hip_add_executable(gpu_sm_copy gpu_sm_copy.cu)
# Install tergets
install(TARGETS gpu_sm_copy RUNTIME DESTINATION bin)

else()
message(FATAL_ERROR "No CUDA or ROCm environment found.")
# ROCm environment
include(../rocm_common.cmake)
find_package(HIP QUIET)
if(HIP_FOUND)
message(STATUS "Found ROCm: " ${HIP_VERSION})

# Convert cuda code to hip code inplace
execute_process(COMMAND hipify-perl -inplace -print-stats gpu_sm_copy.cu
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/)

# Add HIP targets
set_source_files_properties(gpu_sm_copy.cu PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1)
# Link with HIP
hip_add_executable(gpu_sm_copy gpu_sm_copy.cu)
# Install tergets
install(TARGETS gpu_sm_copy RUNTIME DESTINATION bin)
else()
message(FATAL_ERROR "No CUDA or ROCm environment found.")
endif()
endif()

Original file line number Diff line number Diff line change
Expand Up @@ -5,36 +5,34 @@ cmake_minimum_required(VERSION 3.18)

project(kernel_launch_overhead LANGUAGES CXX)

include(../cuda_common.cmake)
find_package(CUDAToolkit QUIET)
include(../rocm_common.cmake)
find_package(HIP QUIET)

# Cuda environment
if(CUDAToolkit_FOUND)
message(STATUS "Found CUDA: " ${CUDAToolkit_VERSION})
enable_language(CUDA)

include(../cuda_common.cmake)
add_executable(kernel_launch_overhead kernel_launch.cu)
set_property(TARGET kernel_launch_overhead PROPERTY CUDA_ARCHITECTURES ${NVCC_ARCHS_SUPPORTED})
install(TARGETS kernel_launch_overhead RUNTIME DESTINATION bin)

# ROCm environment
elseif(HIP_FOUND)
message(STATUS "Found HIP: " ${HIP_VERSION})

# Convert cuda code to hip code inplace
execute_process(COMMAND hipify-perl -inplace -print-stats kernel_launch.cu
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/)

# Add HIP targets
set_source_files_properties(kernel_launch.cu PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1)
# Link with HIP
hip_add_executable(kernel_launch_overhead kernel_launch.cu)
# Install tergets
install(TARGETS kernel_launch_overhead RUNTIME DESTINATION bin)

else()
message(FATAL_ERROR "No CUDA or ROCm environment found.")
# ROCm environment
include(../rocm_common.cmake)
find_package(HIP QUIET)
if(HIP_FOUND)
message(STATUS "Found HIP: " ${HIP_VERSION})

# Convert cuda code to hip code inplace
execute_process(COMMAND hipify-perl -inplace -print-stats kernel_launch.cu
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/)

# Add HIP targets
set_source_files_properties(kernel_launch.cu PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1)
# Link with HIP
hip_add_executable(kernel_launch_overhead kernel_launch.cu)
# Install tergets
install(TARGETS kernel_launch_overhead RUNTIME DESTINATION bin)
else()
message(FATAL_ERROR "No CUDA or ROCm environment found.")
endif()
endif()

0 comments on commit dfbd70b

Please sign in to comment.