Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Release - SuperBench v0.3.0 #212

Merged
merged 18 commits into from
Sep 26, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
fbfb58f
Bug - Fix Bug : fix bug of error param operations to operation in rcc…
Sep 9, 2021
3553381
Bug - Revise 'docker run' in sb deploy (#195)
Sep 13, 2021
c9fb724
Bug: Fix Bug - Add barrier before 'destroy_process_group' in model be…
Sep 13, 2021
6da800f
Benchmarks: Build Pipeline - Restore rocblas build logic (#197)
Sep 13, 2021
2c281ba
Bug: Fix bug - fix bug of hipBusBandwidth build (#193)
Sep 13, 2021
3b9edee
CI/CD - Add ROCm image build in GitHub Actions (#194)
abuccts Sep 13, 2021
2cf1331
Benchmarks: Code Revision - Revise CMake files for microbenchmarks. (…
guoshzhao Sep 14, 2021
11c0ba3
CLI - Integrate system info for node (#199)
abuccts Sep 16, 2021
e3266da
Bug - Fix torch.distributed command for single node (#201)
abuccts Sep 17, 2021
2c2cad0
CI/CD - Push images in GitHub Action (#202)
abuccts Sep 17, 2021
c6f76ce
Tool: Fix bug - Fix function naming issue in system info (#200)
Sep 18, 2021
43da0dd
Benchmark: Fix Bug - fix error message of communication-computation-o…
Sep 18, 2021
b5349ef
CI/CD - Fix bug in build image for push event (#205)
abuccts Sep 18, 2021
42465b0
Benchmarks: Fix Bug - Fix wrong parameters for gpu-sm-copy-bw in conf…
yzygitzh Sep 18, 2021
031be6a
CI/CD - Update GitHub Action VM (#211)
abuccts Sep 23, 2021
ddb0fd2
Benchmarks: Update - Update benchmarks in configuration file (#208)
Sep 23, 2021
d6cc73a
Benchmarks: Build Pipeline - Update rccl-test git submodule to dc1ad4…
Sep 23, 2021
b875c44
Docs - Upgrade version and release note (#209)
abuccts Sep 24, 2021
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 47 additions & 10 deletions .github/workflows/build-image.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,32 @@ on:
push:
branches:
- main
- release/*
pull_request:
branches:
- main
- release/*
release:
types:
- published
workflow_dispatch:

jobs:
docker:
name: Docker build
name: Docker build ${{ matrix.name }}
runs-on: ubuntu-latest
permissions:
contents: read
packages: write
strategy:
matrix:
include:
- name: cuda11.1.1
tags: superbench/main:cuda11.1.1,superbench/superbench:latest
- name: rocm4.2-pytorch1.7.0
tags: superbench/main:rocm4.2-pytorch1.7.0
- name: rocm4.0-pytorch1.7.0
tags: superbench/main:rocm4.0-pytorch1.7.0
steps:
- name: Checkout
uses: actions/checkout@v2
Expand All @@ -26,43 +43,63 @@ jobs:
done
sudo apt-get clean
df -h
echo 'nproc: '$(nproc)
- name: Prepare metadata
id: metadata
run: |
DOCKER_IMAGE=superbench/superbench
IMAGE_TAG=latest
TAGS=${{ matrix.tags }}
if [[ "${{ github.event_name }}" == "push" ]] && [[ "${{ github.ref }}" == "refs/heads/release/"* ]]; then
TAGS=$(sed "s/main:/release:${GITHUB_REF##*/}-/g" <<< ${TAGS})
fi
if [[ "${{ github.event_name }}" == "pull_request" ]] && [[ "${{ github.base_ref }}" == "release/"* ]]; then
TAGS=$(sed "s/main:/release:${GITHUB_BASE_REF##*/}-/g" <<< ${TAGS})
fi
if [[ "${{ github.event_name }}" == "release" ]]; then
TAGS=$(sed "s/main:/superbench:${GITHUB_REF##*/}-/g" <<< ${TAGS})
GHCR_TAG=$(cut -d, -f1 <<< ${TAGS} | sed "s#superbench/superbench#ghcr.io/${{ github.repository }}/superbench#g")
TAGS="${TAGS},${GHCR_TAG}"
fi
if [[ "${{ github.event_name }}" == "workflow_dispatch" ]]; then
TAGS=$(sed "s/main:/dev:/g" <<< ${TAGS})
fi
DOCKERFILE=dockerfile/${{ matrix.name }}.dockerfile

DOCKERFILE=dockerfile/cuda11.1.1.dockerfile
TAGS="${DOCKER_IMAGE}:${IMAGE_TAG}"
CACHE_FROM="type=registry,ref=${DOCKER_IMAGE}:${IMAGE_TAG}"
CACHE_FROM="type=registry,ref=$(cut -d, -f1 <<< ${TAGS})"
CACHE_TO=""
if [ "${{ github.event_name }}" = "push" ]; then
if [[ "${{ github.event_name }}" != "pull_request" ]]; then
CACHE_TO="type=inline,mode=max"
fi

echo ::set-output name=dockerfile::${DOCKERFILE}
echo ::set-output name=tags::${TAGS}
echo ::set-output name=cache_from::${CACHE_FROM}
echo ::set-output name=cache_to::${CACHE_TO}
- name: Echo image tag
run: echo ${{ steps.metadata.outputs.tags }}
- name: Set up QEMU
uses: docker/setup-qemu-action@v1
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v1
- name: Login to Docker Hub
uses: docker/login-action@v1
if: ${{ github.event_name == 'push' }}
if: ${{ github.event_name != 'pull_request' }}
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
- name: Login to the GitHub Container Registry
uses: docker/login-action@v1
if: ${{ github.event_name == 'release' }}
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Build and push
id: docker_build
uses: docker/build-push-action@v2
with:
platforms: linux/amd64
context: .
file: ${{ steps.metadata.outputs.dockerfile }}
push: ${{ github.event_name == 'push' }}
push: ${{ github.event_name != 'pull_request' }}
tags: ${{ steps.metadata.outputs.tags }}
cache-from: ${{ steps.metadata.outputs.cache_from }}
cache-to: ${{ steps.metadata.outputs.cache_to }}
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/lint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ on:
jobs:
spelling:
name: Spelling check
runs-on: ubuntu-16.04
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v2
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

__SuperBench__ is a validation and profiling tool for AI infrastructure.

📢 [v0.2.1](https://github.com/microsoft/superbenchmark/releases/tag/v0.2.1) has been released!
📢 [v0.3.0](https://github.com/microsoft/superbenchmark/releases/tag/v0.3.0) has been released!

## _Check [aka.ms/superbench](https://aka.ms/superbench) for more details._

Expand Down
2 changes: 1 addition & 1 deletion dockerfile/rocm4.0-pytorch1.7.0.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ ENV PATH="${PATH}" \
WORKDIR ${SB_HOME}

ADD third_party third_party
RUN ROCM_VERSION=rocm-4.0.0 make -j -C third_party rocm
RUN ROCM_VERSION=rocm-4.0.0 make -j -C third_party -o rocm_rocblas rocm

# Workaround for image having package installed in user path
RUN mv /root/.local/bin/* /opt/conda/bin/ && \
Expand Down
5 changes: 4 additions & 1 deletion docs/developer-guides/using-docker.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,10 @@ docker buildx build \
<TabItem value='rocm'>

```bash
# coming soon
export DOCKER_BUILDKIT=1
docker buildx build \
--platform linux/amd64 --cache-to type=inline,mode=max \
--tag superbench-dev --file dockerfile/rocm4.2-pytorch1.7.0.dockerfile .
```

</TabItem>
Expand Down
2 changes: 1 addition & 1 deletion docs/getting-started/installation.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ You can clone the source from GitHub and build it.
:::note Note
You should checkout corresponding tag to use release version, for example,

`git clone -b v0.2.1 https://github.com/microsoft/superbenchmark`
`git clone -b v0.3.0 https://github.com/microsoft/superbenchmark`
:::

```bash
Expand Down
2 changes: 1 addition & 1 deletion docs/getting-started/run-superbench.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ sb deploy -f remote.ini --host-password [password]
:::note Note
You should deploy corresponding Docker image to use release version, for example,

`sb deploy -f local.ini -i superbench/superbench:v0.2.1-cuda11.1.1`
`sb deploy -f local.ini -i superbench/superbench:v0.3.0-cuda11.1.1`
:::

## Run
Expand Down
2 changes: 1 addition & 1 deletion docs/superbench-config.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ superbench:
<TabItem value='example'>

```yaml
version: v0.2
version: v0.3
superbench:
enable: benchmark_1
var:
Expand Down
6 changes: 5 additions & 1 deletion docs/tutorial/container-images.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,17 @@ available tags are listed below for all stable versions.

| Tag | Description |
| ----------------- | ---------------------------------- |
| v0.3.0-cuda11.1.1 | SuperBench v0.3.0 with CUDA 11.1.1 |
| v0.2.1-cuda11.1.1 | SuperBench v0.2.1 with CUDA 11.1.1 |
| v0.2.0-cuda11.1.1 | SuperBench v0.2.0 with CUDA 11.1.1 |

</TabItem>
<TabItem value='rocm'>

Coming soon.
| Tag | Description |
| --------------------------- | ---------------------------------------------- |
| v0.3.0-rocm4.2-pytorch1.7.0 | SuperBench v0.3.0 with ROCm 4.2, PyTorch 1.7.0 |
| v0.3.0-rocm4.0-pytorch1.7.0 | SuperBench v0.3.0 with ROCm 4.0, PyTorch 1.7.0 |

</TabItem>
</Tabs>
2 changes: 1 addition & 1 deletion superbench/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,5 @@
Provide hardware and software benchmarks for AI systems.
"""

__version__ = '0.2.1'
__version__ = '0.3.0'
__author__ = 'Microsoft'
4 changes: 3 additions & 1 deletion superbench/benchmarks/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
# Copyright (c) Microsoft Corporation - All rights reserved
# Licensed under the MIT License

set -e

SB_MICRO_PATH="${SB_MICRO_PATH:-/usr/local}"

Expand All @@ -12,6 +13,7 @@ for dir in micro_benchmarks/*/ ; do
BUILD_ROOT=$dir/build
mkdir -p $BUILD_ROOT
cmake -DCMAKE_INSTALL_PREFIX=$SB_MICRO_PATH -DCMAKE_BUILD_TYPE=Release -S $SOURCE_DIR -B $BUILD_ROOT
cmake --build $BUILD_ROOT --target install
cmake --build $BUILD_ROOT
cmake --install $BUILD_ROOT
fi
done
Original file line number Diff line number Diff line change
Expand Up @@ -264,11 +264,7 @@ def _postprocess(self):
torch.distributed.destroy_process_group()
except BaseException as e:
self._result.set_return_code(ReturnCode.DISTRIBUTED_SETTING_DESTROY_FAILURE)
logger.error(
'Post process failed - benchmark: {}, mode: {}, message: {}.'.format(
self._name, self._args.mode, str(e)
)
)
logger.error('Post process failed - benchmark: {}, message: {}.'.format(self._name, str(e)))
return False

return True
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@
cmake_minimum_required(VERSION 3.18)
project(cublas_benchmark LANGUAGES CXX)

include(../cuda_common.cmake)
find_package(CUDAToolkit QUIET)
if(CUDAToolkit_FOUND)
include(../cuda_common.cmake)
set(SRC "cublas_helper.cpp" CACHE STRING "source file")
set(TARGET_NAME "cublas_function" CACHE STRING "target name")

Expand All @@ -25,8 +25,8 @@ if(CUDAToolkit_FOUND)
add_subdirectory(${json_SOURCE_DIR} ${json_BINARY_DIR} EXCLUDE_FROM_ALL)
endif()

add_executable(cublas_benchmark cublas_test.cpp)
target_link_libraries(cublas_benchmark ${TARGET_NAME} nlohmann_json::nlohmann_json CUDA::cudart CUDA::cublas)
add_executable(cublas_benchmark cublas_test.cpp)
target_link_libraries(cublas_benchmark ${TARGET_NAME} nlohmann_json::nlohmann_json CUDA::cudart CUDA::cublas)

install(TARGETS cublas_benchmark ${TARGET_NAME} RUNTIME DESTINATION bin LIBRARY DESTINATION lib)
endif()
2 changes: 2 additions & 0 deletions superbench/benchmarks/micro_benchmarks/cuda_common.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ if(NOT DEFINED CMAKE_CUDA_STANDARD)
set(CMAKE_CUDA_STANDARD_REQUIRED ON)
endif()

enable_language(CUDA)

if(NOT DEFINED NVCC_ARCHS_SUPPORTED)
# Reference: https://github.com/NVIDIA/cutlass/blob/0e137486498a52954eff239d874ee27ab23358e7/CMakeLists.txt#L89
set(NVCC_ARCHS_SUPPORTED "")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@
cmake_minimum_required(VERSION 3.18)
project(cudnn_benchmark LANGUAGES CXX)

include(../cuda_common.cmake)
find_package(CUDAToolkit QUIET)
if(CUDAToolkit_FOUND)
include(../cuda_common.cmake)
set(SRC "cudnn_helper.cpp" CACHE STRING "source file")
set(TARGET_NAME "cudnn_function" CACHE STRING "target name")

Expand All @@ -28,7 +28,7 @@ if(CUDAToolkit_FOUND)
add_subdirectory(${json_SOURCE_DIR} ${json_BINARY_DIR} EXCLUDE_FROM_ALL)
endif()

add_executable(cudnn_benchmark cudnn_test.cpp)
target_link_libraries(cudnn_benchmark ${TARGET_NAME} nlohmann_json::nlohmann_json CUDA::cudart ${CUDNN_LIBRARY})
add_executable(cudnn_benchmark cudnn_test.cpp)
target_link_libraries(cudnn_benchmark ${TARGET_NAME} nlohmann_json::nlohmann_json CUDA::cudart ${CUDNN_LIBRARY})
install(TARGETS cudnn_benchmark ${TARGET_NAME} RUNTIME DESTINATION bin LIBRARY DESTINATION lib)
endif()
Original file line number Diff line number Diff line change
Expand Up @@ -5,36 +5,34 @@ cmake_minimum_required(VERSION 3.18)

project(gpu_sm_copy LANGUAGES CXX)

include(../cuda_common.cmake)
find_package(CUDAToolkit QUIET)
include(../rocm_common.cmake)
find_package(HIP QUIET)

# Cuda environment
if(CUDAToolkit_FOUND)
message(STATUS "Found CUDA: " ${CUDAToolkit_VERSION})
enable_language(CUDA)

include(../cuda_common.cmake)
add_executable(gpu_sm_copy gpu_sm_copy.cu)
set_property(TARGET gpu_sm_copy PROPERTY CUDA_ARCHITECTURES ${NVCC_ARCHS_SUPPORTED})
install(TARGETS gpu_sm_copy RUNTIME DESTINATION bin)

# ROCm environment
elseif(HIP_FOUND)
message(STATUS "Found ROCm: " ${HIP_VERSION})

# Convert cuda code to hip code inplace
execute_process(COMMAND hipify-perl -inplace -print-stats gpu_sm_copy.cu
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/)

# Add HIP targets
set_source_files_properties(gpu_sm_copy.cu PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1)
# Link with HIP
hip_add_executable(gpu_sm_copy gpu_sm_copy.cu)
# Install tergets
install(TARGETS gpu_sm_copy RUNTIME DESTINATION bin)

else()
message(FATAL_ERROR "No CUDA or ROCm environment found.")
# ROCm environment
include(../rocm_common.cmake)
find_package(HIP QUIET)
if(HIP_FOUND)
message(STATUS "Found ROCm: " ${HIP_VERSION})

# Convert cuda code to hip code inplace
execute_process(COMMAND hipify-perl -inplace -print-stats gpu_sm_copy.cu
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/)

# Add HIP targets
set_source_files_properties(gpu_sm_copy.cu PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1)
# Link with HIP
hip_add_executable(gpu_sm_copy gpu_sm_copy.cu)
# Install tergets
install(TARGETS gpu_sm_copy RUNTIME DESTINATION bin)
else()
message(FATAL_ERROR "No CUDA or ROCm environment found.")
endif()
endif()

Original file line number Diff line number Diff line change
Expand Up @@ -5,36 +5,34 @@ cmake_minimum_required(VERSION 3.18)

project(kernel_launch_overhead LANGUAGES CXX)

include(../cuda_common.cmake)
find_package(CUDAToolkit QUIET)
include(../rocm_common.cmake)
find_package(HIP QUIET)

# Cuda environment
if(CUDAToolkit_FOUND)
message(STATUS "Found CUDA: " ${CUDAToolkit_VERSION})
enable_language(CUDA)

include(../cuda_common.cmake)
add_executable(kernel_launch_overhead kernel_launch.cu)
set_property(TARGET kernel_launch_overhead PROPERTY CUDA_ARCHITECTURES ${NVCC_ARCHS_SUPPORTED})
install(TARGETS kernel_launch_overhead RUNTIME DESTINATION bin)

# ROCm environment
elseif(HIP_FOUND)
message(STATUS "Found HIP: " ${HIP_VERSION})

# Convert cuda code to hip code inplace
execute_process(COMMAND hipify-perl -inplace -print-stats kernel_launch.cu
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/)

# Add HIP targets
set_source_files_properties(kernel_launch.cu PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1)
# Link with HIP
hip_add_executable(kernel_launch_overhead kernel_launch.cu)
# Install tergets
install(TARGETS kernel_launch_overhead RUNTIME DESTINATION bin)

else()
message(FATAL_ERROR "No CUDA or ROCm environment found.")
# ROCm environment
include(../rocm_common.cmake)
find_package(HIP QUIET)
if(HIP_FOUND)
message(STATUS "Found HIP: " ${HIP_VERSION})

# Convert cuda code to hip code inplace
execute_process(COMMAND hipify-perl -inplace -print-stats kernel_launch.cu
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/)

# Add HIP targets
set_source_files_properties(kernel_launch.cu PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1)
# Link with HIP
hip_add_executable(kernel_launch_overhead kernel_launch.cu)
# Install tergets
install(TARGETS kernel_launch_overhead RUNTIME DESTINATION bin)
else()
message(FATAL_ERROR "No CUDA or ROCm environment found.")
endif()
endif()

Loading