Skip to content

Commit

Permalink
Update on "Prioritize raising error message about unused parameters w…
Browse files Browse the repository at this point in the history
…hen rebuild_buckets fails"

Occasionally users run DDP with models with unused params, in this
case we would like to surface an error message telling them to run with
find_unused_params=True. However, a recent change to rebuild_buckets logic (#44798) made
it so that we raise a size mismatch error when this happens, but the
information about unused parameters is likely to be more useful and likely to
be the most common case of failure. Prefer raising this error over the
subsequent size mismatch errors.

Differential Revision: [D24151256](https://our.internmc.facebook.com/intern/diff/D24151256/)

[ghstack-poisoned]
  • Loading branch information
rohan-varma committed Oct 8, 2020
2 parents d1b71c8 + 1197a38 commit ec9826a
Show file tree
Hide file tree
Showing 139 changed files with 7,375 additions and 2,496 deletions.
1 change: 1 addition & 0 deletions .circleci/cimodel/data/pytorch_build_definitions.py
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,7 @@ def instantiate_configs():
rocm_version = None
if compiler_name == "cuda":
cuda_version = fc.find_prop("compiler_version")
restrict_phases = ["build", "test1", "test2"]

elif compiler_name == "rocm":
rocm_version = fc.find_prop("compiler_version")
Expand Down
78 changes: 68 additions & 10 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6668,7 +6668,7 @@ workflows:
build_environment: "pytorch-linux-xenial-cuda9.2-cudnn7-py3-gcc7-build"
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-cuda9.2-cudnn7-py3-gcc7"
- pytorch_linux_test:
name: pytorch_linux_xenial_cuda9_2_cudnn7_py3_gcc7_test
name: pytorch_linux_xenial_cuda9_2_cudnn7_py3_gcc7_test1
requires:
- pytorch_linux_xenial_cuda9_2_cudnn7_py3_gcc7_build
filters:
Expand All @@ -6677,7 +6677,21 @@ workflows:
- master
- /ci-all\/.*/
- /release\/.*/
build_environment: "pytorch-linux-xenial-cuda9.2-cudnn7-py3-gcc7-test"
build_environment: "pytorch-linux-xenial-cuda9.2-cudnn7-py3-gcc7-test1"
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-cuda9.2-cudnn7-py3-gcc7"
use_cuda_docker_runtime: "1"
resource_class: gpu.medium
- pytorch_linux_test:
name: pytorch_linux_xenial_cuda9_2_cudnn7_py3_gcc7_test2
requires:
- pytorch_linux_xenial_cuda9_2_cudnn7_py3_gcc7_build
filters:
branches:
only:
- master
- /ci-all\/.*/
- /release\/.*/
build_environment: "pytorch-linux-xenial-cuda9.2-cudnn7-py3-gcc7-test2"
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-cuda9.2-cudnn7-py3-gcc7"
use_cuda_docker_runtime: "1"
resource_class: gpu.medium
Expand Down Expand Up @@ -6706,10 +6720,18 @@ workflows:
build_environment: "pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7-build"
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7"
- pytorch_linux_test:
name: pytorch_linux_xenial_cuda10_2_cudnn7_py3_gcc7_test
name: pytorch_linux_xenial_cuda10_2_cudnn7_py3_gcc7_test1
requires:
- pytorch_linux_xenial_cuda10_2_cudnn7_py3_gcc7_build
build_environment: "pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7-test"
build_environment: "pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7-test1"
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7"
use_cuda_docker_runtime: "1"
resource_class: gpu.medium
- pytorch_linux_test:
name: pytorch_linux_xenial_cuda10_2_cudnn7_py3_gcc7_test2
requires:
- pytorch_linux_xenial_cuda10_2_cudnn7_py3_gcc7_build
build_environment: "pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7-test2"
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7"
use_cuda_docker_runtime: "1"
resource_class: gpu.medium
Expand Down Expand Up @@ -6780,7 +6802,21 @@ workflows:
build_environment: "pytorch-libtorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7-build"
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7"
- pytorch_linux_test:
name: pytorch_libtorch_linux_xenial_cuda10_2_cudnn7_py3_gcc7_test
name: pytorch_libtorch_linux_xenial_cuda10_2_cudnn7_py3_gcc7_test1
requires:
- pytorch_libtorch_linux_xenial_cuda10_2_cudnn7_py3_gcc7_build
filters:
branches:
only:
- master
- /ci-all\/.*/
- /release\/.*/
build_environment: "pytorch-libtorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7-test1"
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7"
use_cuda_docker_runtime: "1"
resource_class: gpu.medium
- pytorch_linux_test:
name: pytorch_libtorch_linux_xenial_cuda10_2_cudnn7_py3_gcc7_test2
requires:
- pytorch_libtorch_linux_xenial_cuda10_2_cudnn7_py3_gcc7_build
filters:
Expand All @@ -6789,7 +6825,7 @@ workflows:
- master
- /ci-all\/.*/
- /release\/.*/
build_environment: "pytorch-libtorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7-test"
build_environment: "pytorch-libtorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7-test2"
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7"
use_cuda_docker_runtime: "1"
resource_class: gpu.medium
Expand All @@ -6806,7 +6842,21 @@ workflows:
build_environment: "pytorch-linux-xenial-cuda11.0-cudnn8-py3-gcc7-build"
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-cuda11.0-cudnn8-py3-gcc7"
- pytorch_linux_test:
name: pytorch_linux_xenial_cuda11_0_cudnn8_py3_gcc7_test
name: pytorch_linux_xenial_cuda11_0_cudnn8_py3_gcc7_test1
requires:
- pytorch_linux_xenial_cuda11_0_cudnn8_py3_gcc7_build
filters:
branches:
only:
- master
- /ci-all\/.*/
- /release\/.*/
build_environment: "pytorch-linux-xenial-cuda11.0-cudnn8-py3-gcc7-test1"
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-cuda11.0-cudnn8-py3-gcc7"
use_cuda_docker_runtime: "1"
resource_class: gpu.medium
- pytorch_linux_test:
name: pytorch_linux_xenial_cuda11_0_cudnn8_py3_gcc7_test2
requires:
- pytorch_linux_xenial_cuda11_0_cudnn8_py3_gcc7_build
filters:
Expand All @@ -6815,7 +6865,7 @@ workflows:
- master
- /ci-all\/.*/
- /release\/.*/
build_environment: "pytorch-linux-xenial-cuda11.0-cudnn8-py3-gcc7-test"
build_environment: "pytorch-linux-xenial-cuda11.0-cudnn8-py3-gcc7-test2"
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-cuda11.0-cudnn8-py3-gcc7"
use_cuda_docker_runtime: "1"
resource_class: gpu.medium
Expand All @@ -6826,10 +6876,18 @@ workflows:
build_environment: "pytorch-libtorch-linux-xenial-cuda11.0-cudnn8-py3-gcc7-build"
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-cuda11.0-cudnn8-py3-gcc7"
- pytorch_linux_test:
name: pytorch_libtorch_linux_xenial_cuda11_0_cudnn8_py3_gcc7_test
name: pytorch_libtorch_linux_xenial_cuda11_0_cudnn8_py3_gcc7_test1
requires:
- pytorch_libtorch_linux_xenial_cuda11_0_cudnn8_py3_gcc7_build
build_environment: "pytorch-libtorch-linux-xenial-cuda11.0-cudnn8-py3-gcc7-test1"
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-cuda11.0-cudnn8-py3-gcc7"
use_cuda_docker_runtime: "1"
resource_class: gpu.medium
- pytorch_linux_test:
name: pytorch_libtorch_linux_xenial_cuda11_0_cudnn8_py3_gcc7_test2
requires:
- pytorch_libtorch_linux_xenial_cuda11_0_cudnn8_py3_gcc7_build
build_environment: "pytorch-libtorch-linux-xenial-cuda11.0-cudnn8-py3-gcc7-test"
build_environment: "pytorch-libtorch-linux-xenial-cuda11.0-cudnn8-py3-gcc7-test2"
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-cuda11.0-cudnn8-py3-gcc7"
use_cuda_docker_runtime: "1"
resource_class: gpu.medium
Expand Down
4 changes: 2 additions & 2 deletions .jenkins/pytorch/win-test-helpers/build_pytorch.bat
Original file line number Diff line number Diff line change
Expand Up @@ -95,15 +95,15 @@ if "%USE_CUDA%"=="1" (
copy %TMP_DIR_WIN%\bin\sccache.exe %TMP_DIR_WIN%\bin\nvcc.exe

:: randomtemp is used to resolve the intermittent build error related to CUDA.
:: code: https://github.com/peterjc123/randomtemp
:: code: https://github.com/peterjc123/randomtemp-rust
:: issue: https://github.com/pytorch/pytorch/issues/25393
::
:: Previously, CMake uses CUDA_NVCC_EXECUTABLE for finding nvcc and then
:: the calls are redirected to sccache. sccache looks for the actual nvcc
:: in PATH, and then pass the arguments to it.
:: Currently, randomtemp is placed before sccache (%TMP_DIR_WIN%\bin\nvcc)
:: so we are actually pretending sccache instead of nvcc itself.
curl -kL https://github.com/peterjc123/randomtemp/releases/download/v0.3/randomtemp.exe --output %TMP_DIR_WIN%\bin\randomtemp.exe
curl -kL https://github.com/peterjc123/randomtemp-rust/releases/download/v0.2/randomtemp.exe --output %TMP_DIR_WIN%\bin\randomtemp.exe
set RANDOMTEMP_EXECUTABLE=%TMP_DIR_WIN%\bin\nvcc.exe
set CUDA_NVCC_EXECUTABLE=%TMP_DIR_WIN%\bin\randomtemp.exe
set RANDOMTEMP_BASEDIR=%TMP_DIR_WIN%\bin
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ call %CONDA_PARENT_DIR%\Miniconda3\Scripts\activate.bat %CONDA_PARENT_DIR%\Minic
if "%REBUILD%"=="" (
call conda install -y -q python=%PYTHON_VERSION% numpy cffi pyyaml boto3
call conda install -y -q -c conda-forge cmake
call conda install -y -q -c rdonnelly libuv
call conda install -y -q -c conda-forge libuv=1.39
)

:: Get installed libuv path
Expand Down
18 changes: 18 additions & 0 deletions aten/src/ATen/BatchingRegistrations.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,22 @@ std::vector<Tensor> chunk_batching_rule(const Tensor& self, int64_t chunks, int6
return result;
}

std::vector<Tensor> tensor_split_sections_batching_rule(const Tensor& self, int64_t sections, int64_t dim) {
auto self_physical = MultiBatchVmapTransform::logicalToPhysical(self);
auto dim_physical = self_physical.getPhysicalDim(dim);
auto result = at::tensor_split(self_physical.tensor(), sections, dim_physical);
self_physical.makeLogicalFromPhysicalListInplace(result);
return result;
}

std::vector<Tensor> tensor_split_indices_batching_rule(const Tensor& self, IntArrayRef indices, int64_t dim) {
auto self_physical = MultiBatchVmapTransform::logicalToPhysical(self);
auto dim_physical = self_physical.getPhysicalDim(dim);
auto result = at::tensor_split(self_physical.tensor(), indices, dim_physical);
self_physical.makeLogicalFromPhysicalListInplace(result);
return result;
}

Tensor unsqueeze_batching_rule(const Tensor& self, int64_t dim) {
auto self_physical = MultiBatchVmapTransform::logicalToPhysical(self);
// NB: unsqueeze has some special handling of its `dim` argument so we can't call
Expand Down Expand Up @@ -527,6 +543,8 @@ TORCH_LIBRARY_IMPL(aten, Batched, m) {

// view operations
m.impl("chunk", chunk_batching_rule);
m.impl("tensor_split.sections", tensor_split_sections_batching_rule);
m.impl("tensor_split.indices", tensor_split_indices_batching_rule);
m.impl("diagonal", diagonal_batching_rule);
m.impl("expand", expand_batching_rule);
m.impl("expand_as", native::expand_as); // composite wrt autograd
Expand Down
7 changes: 7 additions & 0 deletions aten/src/ATen/core/NamedRegistrations.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -317,6 +317,11 @@ TORCH_LIBRARY_IMPL(aten, Named, m) {
m.impl("median.dim_values", CppFunction::makeFallthrough());
m.impl("median.names_dim", CppFunction::makeFallthrough());
m.impl("median.names_dim_values", CppFunction::makeFallthrough());
m.impl("nanmedian", CppFunction::makeFallthrough());
m.impl("nanmedian.dim", CppFunction::makeFallthrough());
m.impl("nanmedian.dim_values", CppFunction::makeFallthrough());
m.impl("nanmedian.names_dim", CppFunction::makeFallthrough());
m.impl("nanmedian.names_dim_values", CppFunction::makeFallthrough());
m.impl("min", CppFunction::makeFallthrough());
m.impl("min.dim", CppFunction::makeFallthrough());
m.impl("min.dim_min", CppFunction::makeFallthrough());
Expand Down Expand Up @@ -453,6 +458,8 @@ TORCH_LIBRARY_IMPL(aten, Named, m) {
m.impl("tanh", CppFunction::makeFallthrough());
m.impl("tanh.out", CppFunction::makeFallthrough());
m.impl("tanh_", CppFunction::makeFallthrough());
m.impl("tensor_split.indices", CppFunction::makeFallthrough());
m.impl("tensor_split.sections", CppFunction::makeFallthrough());
m.impl("threshold", CppFunction::makeFallthrough());
m.impl("threshold.out", CppFunction::makeFallthrough());
m.impl("threshold_", CppFunction::makeFallthrough());
Expand Down
4 changes: 3 additions & 1 deletion aten/src/ATen/core/aten_interned_strings.h
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,6 @@ _(aten, _th_baddbmm) \
_(aten, _th_bmm) \
_(aten, _th_get_device) \
_(aten, _th_kthvalue) \
_(aten, _th_median) \
_(aten, _th_mode) \
_(aten, _th_prod) \
_(aten, _th_sigmoid) \
Expand Down Expand Up @@ -463,6 +462,7 @@ _(aten, max_unpool3d_forward) \
_(aten, max_values) \
_(aten, mean) \
_(aten, median) \
_(aten, nanmedian) \
_(aten, meshgrid) \
_(aten, min) \
_(aten, min_values) \
Expand Down Expand Up @@ -664,6 +664,7 @@ _(aten, tan) \
_(aten, tanh) \
_(aten, tensor) \
_(aten, tensordot) \
_(aten, tensor_split) \
_(aten, th_addmm) \
_(aten, th_clone) \
_(aten, th_norm) \
Expand Down Expand Up @@ -901,6 +902,7 @@ _(attr, maxnorm) \
_(attr, maximum) \
_(attr, mean) \
_(attr, median) \
_(attr, nanmedian) \
_(attr, min) \
_(attr, min_indices) \
_(attr, min_val) \
Expand Down
34 changes: 18 additions & 16 deletions aten/src/ATen/cuda/nvrtc_stub/ATenNVRTC.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,22 +77,24 @@ namespace at { namespace cuda {
#define HIPOCCUPANCYMAXACTIVEBLOCKSPERMULTIPROCESSOR cuOccupancyMaxActiveBlocksPerMultiprocessor
#endif

#define AT_FORALL_NVRTC(_) \
_(nvrtcVersion) \
_(nvrtcCreateProgram) \
_(nvrtcDestroyProgram) \
_(nvrtcGetPTXSize) \
_(nvrtcGetPTX) \
_(cuModuleLoadData) \
_(cuModuleGetFunction) \
_(HIPOCCUPANCYMAXACTIVEBLOCKSPERMULTIPROCESSOR)\
_(nvrtcGetErrorString) \
_(nvrtcGetProgramLogSize) \
_(nvrtcGetProgramLog) \
_(cuLaunchKernel) \
_(nvrtcCompileProgram) \
_(cuCtxGetCurrent) \
_(cuModuleUnload) \
#define AT_FORALL_NVRTC(_) \
_(nvrtcVersion) \
_(nvrtcCreateProgram) \
_(nvrtcAddNameExpression) \
_(nvrtcDestroyProgram) \
_(nvrtcGetPTXSize) \
_(nvrtcGetPTX) \
_(cuModuleLoadData) \
_(cuModuleGetFunction) \
_(HIPOCCUPANCYMAXACTIVEBLOCKSPERMULTIPROCESSOR) \
_(nvrtcGetErrorString) \
_(nvrtcGetProgramLogSize) \
_(nvrtcGetProgramLog) \
_(cuLaunchKernel) \
_(nvrtcCompileProgram) \
_(cuCtxGetCurrent) \
_(nvrtcGetLoweredName) \
_(cuModuleUnload) \
_(cuDevicePrimaryCtxGetState)

#endif
Expand Down

0 comments on commit ec9826a

Please sign in to comment.