Update base for Update on "Remove hacky double registration of to_her…

…e op in reg_distributed_ops" This was added as a part of #38590 but we can use default arguments here. We use fmt:;format to bind the default value to the rpc timeout at runtime. Differential Revision: [D21912719](https://our.internmc.facebook.com/intern/diff/D21912719/) [ghstack-poisoned]
pytorch · Jun 16, 2020 · 5ef5fa9 · 5ef5fa9
2 parents 132a4c9 + f13be5f
commit 5ef5fa9
Show file tree

Hide file tree

Showing 59 changed files with 1,540 additions and 430 deletions.
diff --git a/.jenkins/caffe2/common.sh b/.jenkins/caffe2/common.sh
@@ -6,69 +6,6 @@ TEST_DIR="$ROOT_DIR/caffe2_tests"
 gtest_reports_dir="${TEST_DIR}/cpp"
 pytest_reports_dir="${TEST_DIR}/python"
 
-# This is needed to work around ROCm using old docker images until
-# the transition to new images is complete.
-# TODO: Remove once ROCm CI is using new images.
-if [[ $BUILD_ENVIRONMENT == py3.6-devtoolset7-rocmrpm-centos* ]]; then
-  # This file is sourced multiple times, only install conda the first time.
-  # We must install conda where we have write access.
-  CONDA_DIR="$ROOT_DIR/conda"
-  if [[ ! -d $CONDA_DIR ]]; then
-    ANACONDA_PYTHON_VERSION=3.6
-    BASE_URL="https://repo.anaconda.com/miniconda"
-    CONDA_FILE="Miniconda3-latest-Linux-x86_64.sh"
-    mkdir $CONDA_DIR
-    pushd /tmp
-    wget -q "${BASE_URL}/${CONDA_FILE}"
-    chmod +x "${CONDA_FILE}"
-    ./"${CONDA_FILE}" -b -f -p "$CONDA_DIR"
-    popd
-    export PATH="$CONDA_DIR/bin:$PATH"
-    # Ensure we run conda in a directory that jenkins has write access to
-    pushd $CONDA_DIR
-    # Track latest conda update
-    conda update -n base conda
-    # Install correct Python version
-    conda install python="$ANACONDA_PYTHON_VERSION"
-
-    conda_install() {
-      # Ensure that the install command don't upgrade/downgrade Python
-      # This should be called as
-      #   conda_install pkg1 pkg2 ... [-c channel]
-      conda install -q -y python="$ANACONDA_PYTHON_VERSION" $*
-    }
-
-    # Install PyTorch conda deps, as per https://github.com/pytorch/pytorch README
-    conda_install numpy pyyaml mkl mkl-include setuptools cffi typing future six
-
-    # TODO: This isn't working atm
-    conda_install nnpack -c killeent
-
-    # Install some other packages
-
-    # Need networkx 2.0 because bellmand_ford was moved in 2.1 . Scikit-image by
-    # defaults installs the most recent networkx version, so we install this lower
-    # version explicitly before scikit-image pulls it in as a dependency
-    pip install networkx==2.0
-
-    # TODO: Why is scipy pinned
-    # numba & llvmlite is pinned because of https://github.com/numba/numba/issues/4368
-    # scikit-learn is pinned because of
-    # https://github.com/scikit-learn/scikit-learn/issues/14485 (affects gcc 5.5
-    # only)
-    pip install --progress-bar off pytest scipy==1.1.0 scikit-learn==0.20.3 scikit-image librosa>=0.6.2 psutil numba==0.46.0 llvmlite==0.30.0
-
-    # click - onnx
-    # hypothesis - tests
-    # jupyter - for tutorials
-    pip install --progress-bar off click hypothesis jupyter protobuf tabulate virtualenv mock typing-extensions
-
-    popd
-  else
-    export PATH="$CONDA_DIR/bin:$PATH"
-  fi
-fi
-
 # Figure out which Python to use
 PYTHON="$(which python)"
 if [[ "${BUILD_ENVIRONMENT}" =~ py((2|3)\.?[0-9]?\.?[0-9]?) ]]; then

diff --git a/.jenkins/caffe2/test.sh b/.jenkins/caffe2/test.sh
@@ -11,6 +11,15 @@ if [[ "${BUILD_ENVIRONMENT}" == *-rocm* ]]; then
   # temporary to locate some kernel issues on the CI nodes
   export HSAKMT_DEBUG_LEVEL=4
 fi
+# These additional packages are needed for circleci ROCm builds.
+if [[ $BUILD_ENVIRONMENT == pytorch-linux-xenial-rocm* ]]; then
+    # Need networkx 2.0 because bellmand_ford was moved in 2.1 . Scikit-image by
+    # defaults installs the most recent networkx version, so we install this lower
+    # version explicitly before scikit-image pulls it in as a dependency
+    pip install networkx==2.0
+    # click - onnx
+    pip install --progress-bar off click protobuf tabulate virtualenv mock typing-extensions
+fi
 
 # Find where cpp tests and Caffe2 itself are installed
 if [[ "$BUILD_ENVIRONMENT" == *cmake* ]]; then
@@ -71,16 +80,24 @@ if [[ "$BUILD_ENVIRONMENT" == *cmake* ]]; then
   exit 0
 fi
 
+# If pip is installed as root, we must use sudo.
+# CircleCI docker images could install conda as jenkins user, or use the OS's python package.
+PIP=$(which pip)
+PIP_USER=$(stat --format '%U' $PIP)
+if [[ "$PIP_USER" = root ]]; then
+  MAYBE_SUDO=sudo
+fi
+
 # if [[ "$BUILD_ENVIRONMENT" == *ubuntu14.04* ]]; then
   # Hotfix, use hypothesis 3.44.6 on Ubuntu 14.04
   # See comments on
   # https://github.com/HypothesisWorks/hypothesis-python/commit/eadd62e467d6cee6216e71b391951ec25b4f5830
-  sudo pip -q uninstall -y hypothesis
+  $MAYBE_SUDO pip -q uninstall -y hypothesis
   # "pip install hypothesis==3.44.6" from official server is unreliable on
   # CircleCI, so we host a copy on S3 instead
-  sudo pip -q install attrs==18.1.0 -f https://s3.amazonaws.com/ossci-linux/wheels/attrs-18.1.0-py2.py3-none-any.whl
-  sudo pip -q install coverage==4.5.1 -f https://s3.amazonaws.com/ossci-linux/wheels/coverage-4.5.1-cp36-cp36m-macosx_10_12_x86_64.whl
-  sudo pip -q install hypothesis==3.44.6 -f https://s3.amazonaws.com/ossci-linux/wheels/hypothesis-3.44.6-py3-none-any.whl
+  $MAYBE_SUDO pip -q install attrs==18.1.0 -f https://s3.amazonaws.com/ossci-linux/wheels/attrs-18.1.0-py2.py3-none-any.whl
+  $MAYBE_SUDO pip -q install coverage==4.5.1 -f https://s3.amazonaws.com/ossci-linux/wheels/coverage-4.5.1-cp36-cp36m-macosx_10_12_x86_64.whl
+  $MAYBE_SUDO pip -q install hypothesis==3.44.6 -f https://s3.amazonaws.com/ossci-linux/wheels/hypothesis-3.44.6-py3-none-any.whl
 # else
 #   pip install --user --no-cache-dir hypothesis==3.59.0
 # fi

diff --git a/android/libs/fbjni_local/build.gradle b/android/libs/fbjni_local/build.gradle
diff --git a/android/libs/fbjni_local/gradle.properties b/android/libs/fbjni_local/gradle.properties
diff --git a/android/settings.gradle b/android/settings.gradle
@@ -1,6 +1,5 @@
-include ':app', ':pytorch_android', ':fbjni', ':pytorch_android_torchvision', ':pytorch_host', ':test_app'
+include ':app', ':pytorch_android', ':pytorch_android_torchvision', ':pytorch_host', ':test_app'
 
-project(':fbjni').projectDir = file('libs/fbjni_local')
 project(':pytorch_android_torchvision').projectDir = file('pytorch_android_torchvision')
 
 project(':pytorch_host').projectDir = file('pytorch_android/host')

diff --git a/aten/src/ATen/CPUApplyUtils.h b/aten/src/ATen/CPUApplyUtils.h
@@ -341,21 +341,6 @@ apply_op(int64_t numel, int64_t offset, const Op& op, Args... iters) {
   b_val[0] * c_val[0]; };
 */
 
-template <typename scalar1, typename Op>
-inline void CPU_tensor_apply1(Tensor tensor1, const Op op) {
-  if (!_apply_preamble({tensor1}))
-    return;
-  if (tensor1.ndimension() < 8) {
-    apply_op(
-        tensor1.numel(),
-        0,
-        op,
-        strided_tensor_iter_fixed<scalar1, 8>(tensor1, true));
-  } else {
-    apply_op(tensor1.numel(), 0, op, strided_tensor_iter<scalar1>(tensor1));
-  }
-}
-
 template <typename scalar1, typename scalar2, typename Op>
 inline void CPU_tensor_apply2(Tensor tensor1, Tensor tensor2, const Op op) {
   if (!_apply_preamble({tensor1, tensor2}))

diff --git a/aten/src/ATen/ParallelOpenMP.h b/aten/src/ATen/ParallelOpenMP.h
@@ -26,8 +26,12 @@ inline void parallel_for(
 #ifdef _OPENMP
   std::atomic_flag err_flag = ATOMIC_FLAG_INIT;
   std::exception_ptr eptr;
+  // Work around memory leak when using 1 thread in nested "omp parallel"
+  // caused by some buggy OpenMP versions and the fact that omp_in_parallel()
+  // returns false when omp_get_max_threads() == 1 inside nested "omp parallel"
+  // See issue gh-32284
 
-#pragma omp parallel if (!omp_in_parallel() && ((end - begin) > grain_size))
+#pragma omp parallel if (omp_get_max_threads() > 1 && !omp_in_parallel() && ((end - begin) > grain_size))
   {
     // choose number of tasks based on grain size and number of threads
     // can't use num_threads clause due to bugs in GOMP's thread pool (See #32008)

diff --git a/aten/src/ATen/core/DistributionsHelper.h b/aten/src/ATen/core/DistributionsHelper.h
@@ -126,6 +126,67 @@ struct uniform_real_distribution {
     T to_;
 };
 
+// The SFINAE checks introduced in #39816 looks overcomplicated and must revisited
+// https://github.com/pytorch/pytorch/issues/40052
+#define DISTRIBUTION_HELPER_GENERATE_HAS_MEMBER(member)              \
+template <typename T>                                                \
+struct has_member_##member                                           \
+{                                                                    \
+    typedef char yes;                                                \
+    typedef long no;                                                 \
+    template <typename U> static yes test(decltype(&U::member));     \
+    template <typename U> static no test(...);                       \
+    static constexpr bool value = sizeof(test<T>(0)) == sizeof(yes); \
+}
+
+DISTRIBUTION_HELPER_GENERATE_HAS_MEMBER(next_double_normal_sample);
+DISTRIBUTION_HELPER_GENERATE_HAS_MEMBER(set_next_double_normal_sample);
+DISTRIBUTION_HELPER_GENERATE_HAS_MEMBER(next_float_normal_sample);
+DISTRIBUTION_HELPER_GENERATE_HAS_MEMBER(set_next_float_normal_sample);
+
+#define DISTRIBUTION_HELPER_GENERATE_NEXT_NORMAL_METHODS(TYPE)                                      \
+                                                                                                    \
+template <typename RNG, typename ret_type,                                                          \
+          typename std::enable_if_t<(                                                               \
+            has_member_next_##TYPE##_normal_sample<RNG>::value &&                                   \
+            has_member_set_next_##TYPE##_normal_sample<RNG>::value                                  \
+          ), int> = 0>                                                                              \
+C10_HOST_DEVICE inline bool maybe_get_next_##TYPE##_normal_sample(RNG* generator, ret_type* ret) {  \
+  if (generator->next_##TYPE##_normal_sample()) {                                                   \
+    *ret = *(generator->next_##TYPE##_normal_sample());                                             \
+    generator->set_next_##TYPE##_normal_sample(c10::optional<TYPE>());                              \
+    return true;                                                                                    \
+  }                                                                                                 \
+  return false;                                                                                     \
+}                                                                                                   \
+                                                                                                    \
+template <typename RNG, typename ret_type,                                                          \
+          typename std::enable_if_t<(                                                               \
+            !has_member_next_##TYPE##_normal_sample<RNG>::value ||                                  \
+            !has_member_set_next_##TYPE##_normal_sample<RNG>::value                                 \
+          ), int> = 0>                                                                              \
+C10_HOST_DEVICE inline bool maybe_get_next_##TYPE##_normal_sample(RNG* generator, ret_type* ret) {  \
+  return false;                                                                                     \
+}                                                                                                   \
+                                                                                                    \
+template <typename RNG, typename ret_type,                                                          \
+          typename std::enable_if_t<(                                                               \
+            has_member_set_next_##TYPE##_normal_sample<RNG>::value                                  \
+          ), int> = 0>                                                                              \
+C10_HOST_DEVICE inline void maybe_set_next_##TYPE##_normal_sample(RNG* generator, ret_type cache) { \
+  generator->set_next_##TYPE##_normal_sample(cache);                                                \
+}                                                                                                   \
+                                                                                                    \
+template <typename RNG, typename ret_type,                                                          \
+          typename std::enable_if_t<(                                                               \
+            !has_member_set_next_##TYPE##_normal_sample<RNG>::value                                 \
+          ), int> = 0>                                                                              \
+C10_HOST_DEVICE inline void maybe_set_next_##TYPE##_normal_sample(RNG* generator, ret_type cache) { \
+}
+
+DISTRIBUTION_HELPER_GENERATE_NEXT_NORMAL_METHODS(double);
+DISTRIBUTION_HELPER_GENERATE_NEXT_NORMAL_METHODS(float);
+
 /**
  * Samples a normal distribution using the Box-Muller method
  * Takes mean and standard deviation as inputs
@@ -144,41 +205,29 @@ struct normal_distribution {
   template <typename RNG>
   C10_HOST_DEVICE inline dist_acctype<T> operator()(RNG generator){
     dist_acctype<T> ret;
-#if !defined(__CUDACC__) && !defined(__HIPCC__)
     // return cached values if available
     if (std::is_same<T, double>::value) {
-      if (generator->next_double_normal_sample()) {
-        ret = *(generator->next_double_normal_sample()) * stdv + mean;
-        // reset c10::optional to null
-        generator->set_next_double_normal_sample(c10::optional<double>());
-        return ret;
+      if (maybe_get_next_double_normal_sample(generator, &ret)) {
+        return transformation::normal(ret, mean, stdv);
       }
     } else {
-      if (generator->next_float_normal_sample()) {
-        ret = *(generator->next_float_normal_sample()) * stdv + mean;
-        // reset c10::optional to null
-        generator->set_next_float_normal_sample(c10::optional<float>());
-        return ret;
+      if (maybe_get_next_float_normal_sample(generator, &ret)) {
+        return transformation::normal(ret, mean, stdv);
       }
     }
-#endif
     // otherwise generate new normal values
     uniform_real_distribution<T> uniform(0.0, 1.0);
     const dist_acctype<T> u1 = uniform(generator);
     const dist_acctype<T> u2 = uniform(generator);
     const dist_acctype<T> r = ::sqrt(static_cast<T>(-2.0) * ::log(static_cast<T>(1.0)-u2));
     const dist_acctype<T> theta = static_cast<T>(2.0) * static_cast<T>(M_PI) * u1;
-#if !defined(__CUDACC__) && !defined(__HIPCC__)
     if (std::is_same<T, double>::value) {
-      dist_acctype<double> cache = r * ::sin(theta);
-      generator->set_next_double_normal_sample(c10::optional<double>(cache));
+      maybe_set_next_double_normal_sample(generator, r * ::sin(theta));
     } else {
-      dist_acctype<float> cache = r * ::sin(theta);
-      generator->set_next_float_normal_sample(c10::optional<float>(cache));
+      maybe_set_next_float_normal_sample(generator, r * ::sin(theta));
     }
-#endif
-    ret = transformation::normal(r * ::cos(theta), mean, stdv);
-    return ret;
+    ret = r * ::cos(theta);
+    return transformation::normal(ret, mean, stdv);
   }
 
   private:

diff --git a/aten/src/ATen/core/boxing/impl/make_boxed_from_unboxed_functor.h b/aten/src/ATen/core/boxing/impl/make_boxed_from_unboxed_functor.h
@@ -51,7 +51,11 @@ using supported_primitive_arg_types = guts::typelist::typelist<
         /* everything is ok, this is a primitive type */
       }, /* else */ [] {
         auto tmap = c10::getCustomClassTypeMap();
-        TORCH_CHECK(c10::isCustomClassRegistered<T>(), "Tried to use undefined class as input argument");
+        TORCH_CHECK(
+          c10::isCustomClassRegistered<T>(),
+          "Tried to use undefined class ",
+          c10::util::get_fully_qualified_type_name<T>(),
+          " as input argument");
       });
     }
   };
@@ -140,7 +144,7 @@ using supported_primitive_arg_types = guts::typelist::typelist<
         /* everything is ok, this is a primitive type */
       }, /* else */ [] {
         auto tmap = getCustomClassTypeMap();
-        TORCH_CHECK(c10::isCustomClassRegistered<T>(), "Tried to use undefined class as output");
+        TORCH_CHECK(c10::isCustomClassRegistered<T>(), "Tried to use undefined class ", c10::util::get_fully_qualified_type_name<T>(), " as output");
       });
     }
   };

diff --git a/aten/src/ATen/core/ivalue_inl.h b/aten/src/ATen/core/ivalue_inl.h
@@ -318,6 +318,7 @@ struct C10_EXPORT ivalue::Future final : c10::intrusive_ptr_target {
   const IValue& constValue() {
     std::unique_lock<std::mutex> lock(mutex_);
     AT_ASSERT(completed());
+    AT_ASSERT(!error_);
     return value_;
   }
 
@@ -366,6 +367,11 @@ struct C10_EXPORT ivalue::Future final : c10::intrusive_ptr_target {
     return completed_;
   }
 
+  bool hasValue() const {
+    std::unique_lock<std::mutex> lock(mutex_);
+    return completed_ && !error_;
+  }
+
   bool hasError() const {
     std::unique_lock<std::mutex> lock(mutex_);
     return error_ ? true : false;