diff --git a/superbench/benchmarks/model_benchmarks/pytorch_base.py b/superbench/benchmarks/model_benchmarks/pytorch_base.py index d99218460..0497dd750 100644 --- a/superbench/benchmarks/model_benchmarks/pytorch_base.py +++ b/superbench/benchmarks/model_benchmarks/pytorch_base.py @@ -174,6 +174,7 @@ def _postprocess(self): try: if self._args.distributed_impl == DistributedImpl.DDP: + torch.distributed.barrier() torch.distributed.destroy_process_group() except BaseException as e: self._result.set_return_code(ReturnCode.DISTRIBUTED_SETTING_DESTROY_FAILURE) diff --git a/superbench/runner/playbooks/deploy.yaml b/superbench/runner/playbooks/deploy.yaml index 0eec00ace..3437b9366 100644 --- a/superbench/runner/playbooks/deploy.yaml +++ b/superbench/runner/playbooks/deploy.yaml @@ -101,7 +101,7 @@ {{ '--security-opt seccomp=unconfined --group-add video' if amd_gpu_exist else '' }} \ -w /root -v {{ workspace }}:/root -v /mnt:/mnt \ -v /var/run/docker.sock:/var/run/docker.sock \ - {{ docker_image }} bash && \ + --entrypoint /bin/bash {{ docker_image }} && \ docker exec {{ container }} bash -c \ "chown -R root:root ~ && \ sed -i 's/[# ]*Port.*/Port {{ ssh_port }}/g' /etc/ssh/sshd_config && \ diff --git a/third_party/Makefile b/third_party/Makefile index b9e0a5523..ee243a92a 100755 --- a/third_party/Makefile +++ b/third_party/Makefile @@ -8,7 +8,6 @@ MPI_HOME ?= /usr/local/mpi HIP_HOME ?= /opt/rocm/hip RCCL_HOME ?= /opt/rocm/rccl ROCM_VERSION ?= rocm-$(shell dpkg -l | grep 'rocm-dev ' | awk '{print $$3}' | cut -d '.' -f1-3) -ROCM_ARCH ?= $(shell rocminfo | grep " gfx" | uniq | awk '{print $$2}') .PHONY: all cuda rocm common cuda_cutlass cuda_bandwidthTest cuda_nccl_tests cuda_perftest rocm_perftest fio rocm_rccl_tests rocm_rocblas rocm_bandwidthTest @@ -81,12 +80,7 @@ rocm_rocblas: sb_micro_path ifeq (, $(wildcard $(SB_MICRO_PATH)/bin/rocblas-bench)) if [ -d rocBLAS ]; then rm -rf rocBLAS; fi git clone -b ${ROCM_VERSION} https://github.com/ROCmSoftwarePlatform/rocBLAS.git ./rocBLAS -ifeq (${ROCM_VERSION}, rocm-4.0.0) - sed -i '/CMAKE_MATCH_1/a\ get_filename_component(HIP_CLANG_ROOT "$${HIP_CLANG_ROOT}" DIRECTORY)' /opt/rocm/hip/lib/cmake/hip/hip-config.cmake - cd ./rocBLAS && HIPCC_COMPILE_FLAGS_APPEND="-D_OPENMP=201811 -O3 -Wno-format-nonliteral -DCMAKE_HAVE_LIBC_PTHREAD -parallel-jobs=2" HIPCC_LINK_FLAGS_APPEND="-lpthread -O3 -parallel-jobs=2" ./install.sh -idc -a ${ROCM_ARCH} -else - cd ./rocBLAS && ./install.sh -idc -endif + cd ./rocBLAS && ./install.sh --dependencies --clients-only cp -v ./rocBLAS/build/release/clients/staging/rocblas-bench $(SB_MICRO_PATH)/bin/ endif