Merge remote-tracking branch 'origin/master' into quantized-conv

pytorch · Jan 7, 2021 · 2f41a83 · 2f41a83
2 parents 288fb06 + 321b988
commit 2f41a83
Show file tree

Hide file tree

Showing 625 changed files with 8,613 additions and 6,203 deletions.
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -11,6 +11,9 @@ parameters:
   run_binary_tests:
     type: boolean
     default: false
+  run_build:
+    type: boolean
+    default: true
 
 docker_config_defaults: &docker_config_defaults
   user: jenkins
@@ -9762,6 +9765,7 @@ workflows:
               only:
                 - postnightly
           executor: windows-with-nvidia-gpu
+    when: << pipeline.parameters.run_build >>
   ecr_gc:
     triggers:
       - schedule:

diff --git a/.circleci/generate_config_yml.py b/.circleci/generate_config_yml.py
@@ -112,7 +112,10 @@ def gen_build_workflows_tree():
                 "when": r"<< pipeline.parameters.run_binary_tests >>",
                 "jobs": [f() for f in binary_build_functions],
             },
-            "build": {"jobs": [f() for f in build_workflows_functions]},
+            "build": {
+                "when": r"<< pipeline.parameters.run_build >>",
+                "jobs": [f() for f in build_workflows_functions]
+            },
         }
     }
 

diff --git a/.circleci/scripts/binary_linux_test.sh b/.circleci/scripts/binary_linux_test.sh
@@ -51,7 +51,14 @@ if [[ "$PACKAGE_TYPE" == conda ]]; then
     else
       cu_ver="${DESIRED_CUDA:2:2}.${DESIRED_CUDA:4}"
     fi
-    retry conda install \${EXTRA_CONDA_FLAGS} -yq -c nvidia -c pytorch "cudatoolkit=\${cu_ver}"
+    (
+      # For some reason conda likes to re-activate the conda environment when attempting this install
+      # which means that a deactivate is run and some variables might not exist when that happens,
+      # namely CONDA_MKL_INTERFACE_LAYER_BACKUP from libblas so let's just ignore unbound variables when
+      # it comes to the conda installation commands
+      set +u
+      retry conda install \${EXTRA_CONDA_FLAGS} -yq -c nvidia -c pytorch "cudatoolkit=\${cu_ver}"
+    )
   fi
 elif [[ "$PACKAGE_TYPE" != libtorch ]]; then
   pip install "\$pkg"

diff --git a/.circleci/verbatim-sources/header-section.yml b/.circleci/verbatim-sources/header-section.yml
@@ -11,6 +11,9 @@ parameters:
   run_binary_tests:
     type: boolean
     default: false
+  run_build:
+    type: boolean
+    default: true
 
 docker_config_defaults: &docker_config_defaults
   user: jenkins

diff --git a/.github/pytorch-circleci-labels.yml b/.github/pytorch-circleci-labels.yml
@@ -9,3 +9,5 @@ labels_to_circle_params:
         - release/.*
       tags:
         - v[0-9]+(\.[0-9]+)*-rc[0-9]+
+    set_to_false:
+      - run_build
diff --git a/.github/workflows/update_s3_htmls.yml b/.github/workflows/update_s3_htmls.yml
@@ -9,6 +9,7 @@ on:
 jobs:
   update-html:
     runs-on: ubuntu-latest
+    if: ${{ github.repository_owner == 'pytorch' }}
     strategy:
       matrix:
         prefix: ["whl", "whl/test", "whl/nightly"]

diff --git a/.jenkins/pytorch/README.md b/.jenkins/pytorch/README.md
@@ -10,9 +10,9 @@ it is very easy to run these tests yourself:
    ``registry.pytorch.org/pytorch/pytorch-$BUILD_ENVIRONMENT:$DOCKER_VERSION``,
    where ``$BUILD_ENVIRONMENT`` is one of the build environments
    enumerated in
-   [pytorch-dockerfiles](https://github.com/pietern/pytorch-dockerfiles/blob/master/build.sh)
+   [pytorch-dockerfiles](https://github.com/pytorch/pytorch/blob/master/.circleci/docker/build.sh). The dockerfile used by jenkins can be found under the `.circle` [directory](https://github.com/pytorch/pytorch/blob/master/.circleci/docker)
 
-2. Run ``docker -it -u jenkins $DOCKER_IMAGE``, clone PyTorch and
+2. Run ``docker run -it -u jenkins $DOCKER_IMAGE``, clone PyTorch and
    run one of the scripts in this directory.
 
 The Docker images are designed so that any "reasonable" build commands
@@ -38,5 +38,5 @@ mechanisms we use:
   build scripts.
 
 - We reroute well known paths like `/usr/bin/gcc` to alternate
-  implementations with `update-alternatives, instead of setting
+  implementations with `update-alternatives`, instead of setting
   `CC` and `CXX` in our implementations.
diff --git a/.jenkins/pytorch/codegen-test.sh b/.jenkins/pytorch/codegen-test.sh
@@ -48,13 +48,6 @@ python -m tools.autograd.gen_autograd \
   "$OUT"/autograd \
   tools/autograd
 
-# unboxing_wrappers codegen (called by torch codegen but can run independently)
-mkdir -p "$OUT"/unboxing_wrappers
-python -m tools.jit.gen_unboxing_wrappers \
-  "$OUT"/torch/share/ATen/Declarations.yaml \
-  "$OUT"/unboxing_wrappers \
-  tools/jit/templates
-
 # annotated_fn_args codegen (called by torch codegen but can run independently)
 mkdir -p "$OUT"/annotated_fn_args
 python -m tools.autograd.gen_annotated_fn_args \

diff --git a/.jenkins/pytorch/macos-test.sh b/.jenkins/pytorch/macos-test.sh
@@ -9,11 +9,6 @@ pip install -q hypothesis "librosa>=0.6.2" "numba<=0.49.1" psutil
 # TODO move this to docker
 pip install unittest-xml-reporting pytest
 
-# faulthandler become built-in since 3.3
-if [[ ! $(python -c "import sys; print(int(sys.version_info >= (3, 3)))") == "1" ]]; then
-  pip install -q faulthandler
-fi
-
 if [ -z "${IN_CI}" ]; then
   rm -rf ${WORKSPACE_DIR}/miniconda3/lib/python3.6/site-packages/torch*
 fi

diff --git a/.jenkins/pytorch/win-test-helpers/setup_pytorch_env.bat b/.jenkins/pytorch/win-test-helpers/setup_pytorch_env.bat
@@ -41,8 +41,6 @@ popd
 :: The version is fixed to avoid flakiness: https://github.com/pytorch/pytorch/issues/31136
 pip install "ninja==1.10.0.post1" future "hypothesis==4.53.2" "librosa>=0.6.2" psutil pillow unittest-xml-reporting pytest coverage
 if %errorlevel% neq 0 ( exit /b %errorlevel% )
-:: No need to install faulthandler since we only test Python >= 3.6 on Windows
-:: faulthandler is builtin since Python 3.3
 
 set DISTUTILS_USE_SDK=1
 

diff --git a/BUILD.bazel b/BUILD.bazel
@@ -193,9 +193,6 @@ libtorch_cpp_generated_sources = [
         "torch/csrc/autograd/generated/Functions.h",
         "torch/csrc/autograd/generated/Functions.cpp",
         "torch/csrc/autograd/generated/variable_factories.h",
-        "torch/csrc/jit/generated/generated_unboxing_wrappers_0.cpp",
-        "torch/csrc/jit/generated/generated_unboxing_wrappers_1.cpp",
-        "torch/csrc/jit/generated/generated_unboxing_wrappers_2.cpp",
 ]
 
 libtorch_python_generated_sources = [

diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -173,6 +173,8 @@ option(USE_NATIVE_ARCH "Use -march=native" OFF)
 cmake_dependent_option(
     USE_NCCL "Use NCCL" ON
     "USE_CUDA OR USE_ROCM;UNIX;NOT APPLE" OFF)
+cmake_dependent_option(USE_RCCL "Use RCCL" ON
+    USE_NCCL OFF)
 cmake_dependent_option(
     USE_STATIC_NCCL "Use static NCCL" OFF
     "USE_NCCL" OFF)
@@ -316,7 +318,7 @@ set(OP_DEPENDENCY "" CACHE STRING
 # symbol lookup error: miniconda3/envs/pytorch-py3.7/lib/libmkl_intel_lp64.so: undefined symbol: mkl_blas_dsyrk
 # https://software.intel.com/en-us/articles/symbol-lookup-error-when-linking-intel-mkl-with-gcc-on-ubuntu
 if(LINUX)
-  set(CMAKE_SHARED_LINKER_FLAGS "-Wl,--no-as-needed")
+  set(CMAKE_SHARED_LINKER_FLAGS "-Wl,--no-as-needed ${CMAKE_SHARED_LINKER_FLAGS}")
 endif()
 
 if(MSVC)

diff --git a/android/test_app/app/src/main/AndroidManifest.xml b/android/test_app/app/src/main/AndroidManifest.xml
@@ -18,4 +18,10 @@
     </application>
 
     <uses-permission android:name="android.permission.CAMERA" />
+
+    <!--
+     Permissions required by the Snapdragon Profiler to collect GPU metrics.
+    -->
+    <uses-permission android:name="android.permission.INTERNET" />
+    <uses-permission android:name="android.permission.WRITE_EXTERNAL_STORAGE" />
 </manifest>
diff --git a/aten/conda/meta.yaml b/aten/conda/meta.yaml
@@ -24,7 +24,7 @@ requirements:
     - mkl # [not osx]
 
 about:
-  home: https://github.com/zdevito/ATen
+  home: https://github.com/pytorch/pytorch
   license: BSD
   summary: A TENsor library for C++14
 

diff --git a/aten/src/ATen/ATen.h b/aten/src/ATen/ATen.h
@@ -31,3 +31,4 @@
 #include <c10/util/Exception.h>
 #include <ATen/core/UnsafeFromTH.h>
 #include <ATen/core/ivalue.h>
+#include <ATen/core/jit_type.h>
diff --git a/aten/src/ATen/BatchingRegistrations.cpp b/aten/src/ATen/BatchingRegistrations.cpp
@@ -287,6 +287,25 @@ Tensor squeeze_dim_batching_rule(const Tensor& self, int64_t dim) {
   return self_physical.getPhysicalToLogicalMap().apply(result);
 }
 
+Tensor trace_batching_rule(const Tensor& self) {
+  auto self_physical = MultiBatchVmapTransform::logicalToPhysical(self);
+  // Batched Diagonal View
+  auto self_diag = at::diagonal(self_physical.tensor(), /*offset*/0, /*dim1*/-2, /*dim2*/-1);
+  auto result =  at::sum(self_diag, -1);
+  return self_physical.getPhysicalToLogicalMap().apply(result);
+}
+
+Tensor trace_backward_batching_rule(const Tensor& grad, IntArrayRef input_sizes) {
+  auto grad_physical = MultiBatchVmapTransform::logicalToPhysical(grad);
+  auto grad_input = at::zeros(grad_physical.getPhysicalShape(input_sizes), grad.options());
+  // Batched Diagonal View
+  auto grad_input_diag = at::diagonal(grad_input, /*offset*/0, /*dim1*/-2, /*dim2*/-1);
+  // Append a dimension of size one to the grad output 
+  auto grad_physical_tensor = grad_physical.tensor().unsqueeze(-1);
+  grad_input_diag.copy_(grad_physical_tensor);
+  return grad_physical.getPhysicalToLogicalMap().apply(grad_input);
+}
+
 Tensor transpose_int_batching_rule(const Tensor& self, int64_t dim0, int64_t dim1) {
   // PyTorch has a special case where scalar_tensor.transpose(dim0, dim1) works
   // for dim0, dim1 in {0, -1} and returns the scalar tensor. If the following happens:
@@ -996,7 +1015,7 @@ TORCH_LIBRARY_IMPL(aten, Batched, m) {
   m.impl("_add_batch_dim", native::_add_batch_dim);
   m.impl("_remove_batch_dim", native::_remove_batch_dim);
 
-  m.impl_UNBOXED("sum.dim_IntList", sum_batching_rule);
+  m.impl("sum.dim_IntList", sum_batching_rule);
   m.impl("is_complex", native::is_complex);
   m.impl("conj", native::conj);
 
@@ -1029,6 +1048,7 @@ TORCH_LIBRARY_IMPL(aten, Batched, m) {
   m.impl("squeeze", squeeze_batching_rule);
   m.impl("squeeze.dim", squeeze_dim_batching_rule);
   m.impl("t", native::t); // composite wrt autograd
+  m.impl("trace", trace_batching_rule);
   m.impl("transpose.int", transpose_int_batching_rule);
   m.impl("unbind.int", unbind_batching_rule);
   m.impl("unfold", unfold_batching_rule);
@@ -1089,6 +1109,7 @@ TORCH_LIBRARY_IMPL(aten, Batched, m) {
 #undef TO_BATCHING_RULE
   m.impl("clone", clone_batching_rule);
 
+  using TensorTensorScalarType = Tensor (*)(const Tensor&, const Tensor&, Scalar);
   using TensorTensorType = Tensor (*)(const Tensor&, const Tensor&);
   using TensorScalarType = Tensor (*)(const Tensor&, Scalar);
 
@@ -1115,6 +1136,12 @@ TORCH_LIBRARY_IMPL(aten, Batched, m) {
   m.impl("pow.Scalar", pow_scalar_Tensor_batching_rule);
 
   m.impl("sigmoid_backward", binary_pointwise_batching_rule<TensorTensorType, at::sigmoid_backward>);
+  m.impl(
+      "threshold_backward",
+      binary_pointwise_batching_rule<
+          TensorTensorScalarType,
+          at::threshold_backward,
+          Scalar>);
 
   // for at::result_type, call the native::result_type implementation.
   // We don't have to do anything special because native::result_type operates
@@ -1150,6 +1177,7 @@ TORCH_LIBRARY_IMPL(aten, Batched, m) {
   // backward operators
   m.impl("select_backward", select_backward_batching_rule);
   m.impl("slice_backward", slice_backward_batching_rule);
+  m.impl("trace_backward", trace_backward_batching_rule);
   m.impl("diagonal_backward", diagonal_backward_batching_rule);
 
   // Tensor.new_* operators

diff --git a/aten/src/ATen/CMakeLists.txt b/aten/src/ATen/CMakeLists.txt
@@ -72,7 +72,7 @@ file(GLOB metal_h "metal/*.h")
 file(GLOB metal_cpp "metal/*.cpp")
 file(GLOB_RECURSE native_metal_h "native/metal/*.h")
 file(GLOB metal_test_srcs "native/metal/mpscnn/tests/*.mm")
-file(GLOB_RECURSE native_metal_srcs "native/metal/*.mm", "native/metal/*.cpp")
+file(GLOB_RECURSE native_metal_srcs "native/metal/*.mm" "native/metal/*.cpp")
 EXCLUDE(native_metal_srcs "${native_metal_srcs}" ${metal_test_srcs})
 file(GLOB metal_prepack_h "native/metal/MetalPrepackOpContext.h")
 file(GLOB metal_prepack_cpp "native/metal/MetalPrepackOpRegister.cpp")