Merge remote-tracking branch 'upstream/master' into allocator_reserve

pytorch · Jan 26, 2021 · 91d7920 · 91d7920
2 parents fe3ae60 + 5748410
commit 91d7920
Show file tree

Hide file tree

Showing 614 changed files with 20,631 additions and 6,094 deletions.
diff --git a/.circleci/docker/common/install_conda.sh b/.circleci/docker/common/install_conda.sh
@@ -92,6 +92,8 @@ if [ -n "$ANACONDA_PYTHON_VERSION" ]; then
     conda_install magma-cuda110 -c pytorch
   elif [[ "$CUDA_VERSION" == 11.1* ]]; then
     conda_install magma-cuda111 -c pytorch
+  elif [[ "$CUDA_VERSION" == 11.2* ]]; then
+    conda_install magma-cuda112 -c pytorch
   fi
 
   # TODO: This isn't working atm

diff --git a/.circleci/scripts/binary_linux_test.sh b/.circleci/scripts/binary_linux_test.sh
@@ -39,27 +39,27 @@ fi
 #   conda build scripts themselves. These should really be consolidated
 pkg="/final_pkgs/\$(ls /final_pkgs)"
 if [[ "$PACKAGE_TYPE" == conda ]]; then
-  conda install \${EXTRA_CONDA_FLAGS} -y "\$pkg" --offline
-  if [[ "$DESIRED_CUDA" == 'cpu' ]]; then
-    retry conda install \${EXTRA_CONDA_FLAGS} -y cpuonly -c pytorch
-  fi
-  retry conda install \${EXTRA_CONDA_FLAGS} -yq future numpy protobuf six
-  if [[ "$DESIRED_CUDA" != 'cpu' ]]; then
-    # DESIRED_CUDA is in format cu90 or cu102
-    if [[ "${#DESIRED_CUDA}" == 4 ]]; then
-      cu_ver="${DESIRED_CUDA:2:1}.${DESIRED_CUDA:3}"
-    else
-      cu_ver="${DESIRED_CUDA:2:2}.${DESIRED_CUDA:4}"
+  (
+    # For some reason conda likes to re-activate the conda environment when attempting this install
+    # which means that a deactivate is run and some variables might not exist when that happens,
+    # namely CONDA_MKL_INTERFACE_LAYER_BACKUP from libblas so let's just ignore unbound variables when
+    # it comes to the conda installation commands
+    set +u
+    conda install \${EXTRA_CONDA_FLAGS} -y "\$pkg" --offline
+    if [[ "$DESIRED_CUDA" == 'cpu' ]]; then
+      retry conda install \${EXTRA_CONDA_FLAGS} -y cpuonly -c pytorch
     fi
-    (
-      # For some reason conda likes to re-activate the conda environment when attempting this install
-      # which means that a deactivate is run and some variables might not exist when that happens,
-      # namely CONDA_MKL_INTERFACE_LAYER_BACKUP from libblas so let's just ignore unbound variables when
-      # it comes to the conda installation commands
-      set +u
+    retry conda install \${EXTRA_CONDA_FLAGS} -yq future numpy protobuf six
+    if [[ "$DESIRED_CUDA" != 'cpu' ]]; then
+      # DESIRED_CUDA is in format cu90 or cu102
+      if [[ "${#DESIRED_CUDA}" == 4 ]]; then
+        cu_ver="${DESIRED_CUDA:2:1}.${DESIRED_CUDA:3}"
+      else
+        cu_ver="${DESIRED_CUDA:2:2}.${DESIRED_CUDA:4}"
+      fi
       retry conda install \${EXTRA_CONDA_FLAGS} -yq -c nvidia -c pytorch "cudatoolkit=\${cu_ver}"
-    )
-  fi
+    fi
+  )
 elif [[ "$PACKAGE_TYPE" != libtorch ]]; then
   pip install "\$pkg"
   retry pip install -q future numpy protobuf six

diff --git a/.gitignore b/.gitignore
@@ -38,20 +38,23 @@ docs/cpp/source/html/
 docs/cpp/source/latex/
 docs/source/generated/
 log
+test-reports/
 test/.coverage
 test/.hypothesis/
 test/cpp/api/mnist
 test/custom_operator/model.pt
+test/jit_hooks/*.pt
 test/data/legacy_modules.t7
 test/data/*.pt
 test/backward_compatibility/nightly_schemas.txt
 dropout_model.pt
 test/generated_type_hints_smoketest.py
 test/htmlcov
 test/cpp_extensions/install/
-test/test-reports/
 third_party/build/
 tools/shared/_utils_internal.py
+tools/fast_nvcc/wrap_nvcc.sh
+tools/fast_nvcc/tmp/
 torch.egg-info/
 torch/_C/__init__.pyi
 torch/_C/_nn.pyi

diff --git a/.gitmodules b/.gitmodules
@@ -1,7 +1,7 @@
 [submodule "third_party/pybind11"]
     ignore = dirty
     path = third_party/pybind11
-    url = https://github.com/seemethere/pybind11.git
+    url = https://github.com/pybind/pybind11.git
 [submodule "third_party/cub"]
     ignore = dirty
     path = third_party/cub

diff --git a/.jenkins/pytorch/build.sh b/.jenkins/pytorch/build.sh
@@ -126,7 +126,7 @@ if [[ "$BUILD_ENVIRONMENT" == *rocm* ]]; then
   fi
 
   python tools/amd_build/build_amd.py
-  python setup.py install --user
+  python setup.py install
 
   # remove sccache wrappers post-build; runtime compilation of MIOpen kernels does not yet fully support them
   sudo rm -f /opt/cache/bin/cc
@@ -223,6 +223,18 @@ else
     popd
     assert_git_not_dirty
 
+    # Build jit hook tests
+    JIT_HOOK_BUILD="$PWD/../jit-hook-build"
+    JIT_HOOK_TEST="$PWD/test/jit_hooks"
+    python --version
+    SITE_PACKAGES="$(python -c 'from distutils.sysconfig import get_python_lib; print(get_python_lib())')"
+    mkdir "$JIT_HOOK_BUILD"
+    pushd "$JIT_HOOK_BUILD"
+    cmake "$JIT_HOOK_TEST" -DCMAKE_PREFIX_PATH="$SITE_PACKAGES/torch" -DPYTHON_EXECUTABLE="$(which python)"
+    make VERBOSE=1
+    popd
+    assert_git_not_dirty
+
     # Build custom backend tests.
     CUSTOM_BACKEND_BUILD="$PWD/../custom-backend-build"
     CUSTOM_BACKEND_TEST="$PWD/test/custom_backend"
@@ -261,6 +273,7 @@ if [[ "${BUILD_ENVIRONMENT}" == *xla* ]]; then
   # TODO: Move this to Dockerfile.
 
   pip_install lark-parser
+  pip_install cloud-tpu-client
 
   sudo apt-get -qq update
   sudo apt-get -qq install npm nodejs

diff --git a/.jenkins/pytorch/macos-test.sh b/.jenkins/pytorch/macos-test.sh
@@ -134,18 +134,39 @@ test_custom_script_ops() {
   assert_git_not_dirty
 }
 
+test_jit_hooks() {
+  echo "Testing jit hooks in cpp"
+  pushd test/jit_hooks
+  # Build the custom operator library.
+  rm -rf build && mkdir build
+  pushd build
+  SITE_PACKAGES="$(python -c 'from distutils.sysconfig import get_python_lib; print(get_python_lib())')"
+  CMAKE_PREFIX_PATH="$SITE_PACKAGES/torch" cmake ..
+  make VERBOSE=1
+  popd
+
+  # Run tests Python-side and export a script module.
+  python model.py --export-script-module=model
+  # Run tests C++-side and load the exported script module.
+  build/test_jit_hooks ./model
+  popd
+  assert_git_not_dirty
+}
+
 
 if [ -z "${BUILD_ENVIRONMENT}" ] || [[ "${BUILD_ENVIRONMENT}" == *-test ]]; then
   test_python_all
   test_libtorch
   test_custom_script_ops
+  test_jit_hooks
   test_custom_backend
 else
   if [[ "${BUILD_ENVIRONMENT}" == *-test1 ]]; then
     test_python_all
   elif [[ "${BUILD_ENVIRONMENT}" == *-test2 ]]; then
     test_libtorch
     test_custom_script_ops
+    test_jit_hooks
     test_custom_backend
   fi
 fi
diff --git a/.jenkins/pytorch/test.sh b/.jenkins/pytorch/test.sh
@@ -242,6 +242,21 @@ test_custom_script_ops() {
   fi
 }
 
+test_jit_hooks() {
+  if [[ "$BUILD_ENVIRONMENT" != *rocm* ]] && [[ "$BUILD_ENVIRONMENT" != *asan* ]] ; then
+    echo "Testing jit hooks in cpp"
+    HOOK_BUILD="$PWD/../jit-hook-build"
+    pushd test/jit_hooks
+    cp -a "$HOOK_BUILD" build
+    # Run tests Python-side and export the script modules with hooks
+    python model.py --export-script-module=model
+    # Run tests C++-side and load the exported script modules
+    build/test_jit_hooks ./model
+    popd
+    assert_git_not_dirty
+  fi
+}
+
 test_torch_function_benchmark() {
   echo "Testing __torch_function__ benchmarks"
   pushd benchmarks/overrides_benchmark

diff --git a/.jenkins/pytorch/win-test-helpers/test_python_all_except_nn.bat b/.jenkins/pytorch/win-test-helpers/test_python_all_except_nn.bat
diff --git a/...torch/win-test-helpers/test_python_nn.bat → ...-test-helpers/test_python_first_shard.bat b/...torch/win-test-helpers/test_python_nn.bat → ...-test-helpers/test_python_first_shard.bat
@@ -12,9 +12,7 @@ if ERRORLEVEL 1 exit /b 1
 if ERRORLEVEL 1 exit /b 1
 
 echo Run nn tests
-python run_test.py --include test_nn --verbose --determine-from="%1"
+python run_test.py --exclude-jit-executor --shard 1 2 --verbose --determine-from="%1"
 if ERRORLEVEL 1 exit /b 1
 
 popd
-
-
diff --git a/.jenkins/pytorch/win-test-helpers/test_python_second_shard.bat b/.jenkins/pytorch/win-test-helpers/test_python_second_shard.bat
@@ -0,0 +1,3 @@
+call %SCRIPT_HELPERS_DIR%\setup_pytorch_env.bat
+cd test && python run_test.py --exclude-jit-executor --shard 2 2 --verbose --determine-from="%1" && cd ..
+if ERRORLEVEL 1 exit /b 1
diff --git a/.jenkins/pytorch/win-test.sh b/.jenkins/pytorch/win-test.sh
@@ -48,15 +48,15 @@ run_tests() {
         $SCRIPT_HELPERS_DIR/test_custom_backend.bat
         $SCRIPT_HELPERS_DIR/test_libtorch.bat
     else
+        export PYTORCH_COLLECT_COVERAGE=1
         if [[ "${JOB_BASE_NAME}" == *-test1 ]]; then
-            export PYTORCH_COLLECT_COVERAGE=1
-            $SCRIPT_HELPERS_DIR/test_python_nn.bat "$DETERMINE_FROM"
+            $SCRIPT_HELPERS_DIR/test_python_first_shard.bat "$DETERMINE_FROM"
             $SCRIPT_HELPERS_DIR/test_libtorch.bat
             if [[ "${USE_CUDA}" == "1" ]]; then
               $SCRIPT_HELPERS_DIR/test_python_jit_legacy.bat "$DETERMINE_FROM"
             fi
         elif [[ "${JOB_BASE_NAME}" == *-test2 ]]; then
-            $SCRIPT_HELPERS_DIR/test_python_all_except_nn.bat "$DETERMINE_FROM"
+            $SCRIPT_HELPERS_DIR/test_python_second_shard.bat "$DETERMINE_FROM"
             $SCRIPT_HELPERS_DIR/test_custom_backend.bat
             $SCRIPT_HELPERS_DIR/test_custom_script_ops.bat
         fi
@@ -67,7 +67,7 @@ run_tests
 assert_git_not_dirty
 echo "TEST PASSED"
 
-if [[ "${BUILD_ENVIRONMENT}" == "pytorch-win-vs2019-cuda10-cudnn7-py3" ]] && [[ "${JOB_BASE_NAME}" == *-test1 ]]; then
+if [[ "${BUILD_ENVIRONMENT}" == "pytorch-win-vs2019-cuda10-cudnn7-py3" ]]; then
   pushd $TEST_DIR
   python -mpip install coverage
   echo "Generating XML coverage report"

diff --git a/BUILD.bazel b/BUILD.bazel
@@ -134,6 +134,8 @@ genrule(
         "aten/src/ATen/RegisterMeta.cpp",
         "aten/src/ATen/RegisterDefaultBackend.cpp",
         "aten/src/ATen/RegisterSchema.cpp",
+        "aten/src/ATen/CPUFunctions.h",
+        "aten/src/ATen/CUDAFunctions.h",
         "aten/src/ATen/Functions.h",
         "aten/src/ATen/Functions.cpp",
         "aten/src/ATen/NativeFunctions.h",

diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -30,8 +30,16 @@ endif()
 
 set(CMAKE_INSTALL_MESSAGE NEVER)
 
+# check and set CMAKE_CXX_STANDARD
+string(FIND "${CMAKE_CXX_FLAGS}" "-std=c++" env_cxx_standard)
+if(env_cxx_standard GREATER -1)
+  message(
+      WARNING "C++ standard version definition detected in environment variable."
+      "PyTorch requires -std=c++14. Please remove -std=c++ settings in your environment.")
+endif()
 set(CMAKE_CXX_STANDARD 14)
 set(CMAKE_C_STANDARD 11)
+
 if(DEFINED GLIBCXX_USE_CXX11_ABI)
   if(${GLIBCXX_USE_CXX11_ABI} EQUAL 1)
     set(CXX_STANDARD_REQUIRED ON)
@@ -153,6 +161,7 @@ option(COLORIZE_OUTPUT "Colorize output during compilation" ON)
 option(USE_ASAN "Use Address Sanitizer" OFF)
 option(USE_TSAN "Use Thread Sanitizer" OFF)
 option(USE_CUDA "Use CUDA" ON)
+option(USE_FAST_NVCC "Use parallel NVCC build" OFF)
 option(USE_ROCM "Use ROCm" ON)
 option(CAFFE2_STATIC_LINK_CUDA "Statically link CUDA libraries" OFF)
 cmake_dependent_option(
@@ -162,7 +171,8 @@ cmake_dependent_option(
     USE_STATIC_CUDNN "Use cuDNN static libraries" OFF
     "USE_CUDNN" OFF)
 option(USE_FBGEMM "Use FBGEMM (quantized 8-bit server operators)" ON)
-option(USE_KINETO "Use Kineto profiling library" OFF)
+option(USE_KINETO "Use Kineto profiling library" ON)
+option(USE_CUPTI_SO "Use CUPTI as a shared library" OFF)
 option(USE_FAKELOWP "Use FakeLowp operators" OFF)
 option(USE_FFMPEG "Use ffmpeg" OFF)
 option(USE_GFLAGS "Use GFLAGS" OFF)
@@ -220,8 +230,8 @@ option(USE_ZSTD "Use ZSTD" OFF)
 # Ensure that an MKLDNN build is the default for x86 CPUs
 # but optional for AArch64 (dependent on -DUSE_MKLDNN).
 cmake_dependent_option(
-  USE_MKLDNN "Use MKLDNN. Only available on x86, x86_64, and AArch64." ON
-  "CPU_INTEL OR CPU_AARCH64 AND USE_MKLDNN" OFF)
+  USE_MKLDNN "Use MKLDNN. Only available on x86, x86_64, and AArch64." "${CPU_INTEL}"
+  "CPU_INTEL OR CPU_AARCH64" OFF)
 set(MKLDNN_ENABLE_CONCURRENT_EXEC ${USE_MKLDNN})
 cmake_dependent_option(
     USE_MKLDNN_CBLAS "Use CBLAS in MKLDNN" OFF
@@ -239,6 +249,7 @@ cmake_dependent_option(
 option(USE_TBB "Use TBB" OFF)
 option(ONNX_ML "Enable traditional ONNX ML API." ON)
 option(HAVE_SOVERSION "Whether to add SOVERSION to the shared objects" OFF)
+option(USE_DEPLOY "Enable torch::deploy embedded python interpreter" OFF)
 
 # Since TensorPipe does not support Windows, set it to OFF when WIN32 detected
 # On Windows platform, if user does not install libuv in build conda env and
@@ -536,31 +547,12 @@ if(USE_FBGEMM AND ((CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND CMAKE_SIZEOF_VO
   set(USE_FBGEMM OFF)
 endif()
 
-if(USE_KINETO AND INTERN_BUILD_MOBILE)
-  message(STATUS "Not using libkineto in a mobile build.")
-  set(USE_KINETO OFF)
-endif()
-
-if(USE_KINETO AND (NOT USE_CUDA))
-  message(STATUS "Not using libkineto in a non-CUDA build.")
-  set(USE_KINETO OFF)
-endif()
-
-if(USE_KINETO AND MSVC)
-  message(STATUS "Not using libkineto in a Windows build.")
-  set(USE_KINETO OFF)
-endif()
-
 include(cmake/Dependencies.cmake)
 
 if(USE_FBGEMM)
   string(APPEND CMAKE_CXX_FLAGS " -DUSE_FBGEMM")
 endif()
 
-if(USE_KINETO)
-  string(APPEND CMAKE_CXX_FLAGS " -DUSE_KINETO")
-endif()
-
 if(USE_QNNPACK)
   string(APPEND CMAKE_CXX_FLAGS " -DUSE_QNNPACK")
 endif()

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -1078,3 +1078,7 @@ following steps:
 4. Now you can find the pytorch working directory, which could be
    `~/workspace` or `~/project`, and run commands locally to debug
    the failure.
+
+For certain Windows failures, it may be useful to have a full [Remote
+Desktop](https://docs.microsoft.com/en-us/windows-server/remote/remote-desktop-services/clients/remote-desktop-clients) connection. See detailed instructions [here](https://github.com/pytorch/pytorch/wiki/Debugging-Windows-with-Remote-Desktop-or-CDB-(CLI-windbg)-on-CircleCI)
+for how to set that up after rerunning the job.
diff --git a/android/pytorch_android_torchvision/src/main/cpp/pytorch_vision_jni.cpp b/android/pytorch_android_torchvision/src/main/cpp/pytorch_vision_jni.cpp
@@ -4,8 +4,6 @@
 
 #include "jni.h"
 
-#define clamp0255(x) x > 255 ? 255 : x < 0 ? 0 : x
-
 namespace pytorch_vision_jni {
 
 static void imageYUV420CenterCropToFloatBuffer(
@@ -65,7 +63,7 @@ static void imageYUV420CenterCropToFloatBuffer(
   const uint8_t* vData = (uint8_t*)jniEnv->GetDirectBufferAddress(vBuffer);
 
   float scale = cropWidthAfterRtn / tensorWidth;
-  int uvRowStride = uRowStride >> 1;
+  int uvRowStride = uRowStride;
   int cropXMult = 1;
   int cropYMult = 1;
   int cropXAdd = offsetX;
@@ -91,7 +89,7 @@ static void imageYUV420CenterCropToFloatBuffer(
   float normStdBm255 = 255 * normStdRGB[2];
 
   int xBeforeRtn, yBeforeRtn;
-  int yIdx, uvIdx, ui, vi, a0, ri, gi, bi;
+  int yi, yIdx, uvIdx, ui, vi, a0, ri, gi, bi;
   int channelSize = tensorWidth * tensorHeight;
   int wr = outOffset;
   int wg = wr + channelSize;
@@ -101,16 +99,23 @@ static void imageYUV420CenterCropToFloatBuffer(
       xBeforeRtn = cropXAdd + cropXMult * (int)(x * scale);
       yBeforeRtn = cropYAdd + cropYMult * (int)(y * scale);
       yIdx = yBeforeRtn * yRowStride + xBeforeRtn * yPixelStride;
-      uvIdx = (yBeforeRtn >> 1) * uvRowStride + xBeforeRtn * uvPixelStride;
+      uvIdx = (yBeforeRtn >> 1) * uvRowStride + (xBeforeRtn >> 1) * uvPixelStride;
       ui = uData[uvIdx];
       vi = vData[uvIdx];
-      a0 = 1192 * (yData[yIdx] - 16);
-      ri = (a0 + 1634 * (vi - 128)) >> 10;
-      gi = (a0 - 832 * (vi - 128) - 400 * (ui - 128)) >> 10;
-      bi = (a0 + 2066 * (ui - 128)) >> 10;
-      outData[wr++] = (clamp0255(ri) - normMeanRm255) / normStdRm255;
-      outData[wg++] = (clamp0255(gi) - normMeanGm255) / normStdGm255;
-      outData[wb++] = (clamp0255(bi) - normMeanBm255) / normStdBm255;
+      yi = yData[yIdx];
+      yi = (yi - 16) < 0 ? 0 : (yi - 16);
+      ui -= 128;
+      vi -= 128;
+      a0 = 1192 * yi;
+      ri = (a0 + 1634 * vi) >> 10;
+      gi = (a0 - 833 * vi - 400 * ui) >> 10;
+      bi = (a0 + 2066 * ui) >> 10;
+      ri = ri > 255 ? 255 : ri < 0 ? 0 : ri;
+      gi = gi > 255 ? 255 : gi < 0 ? 0 : gi;
+      bi = bi > 255 ? 255 : bi < 0 ? 0 : bi;
+      outData[wr++] = (ri - normMeanRm255) / normStdRm255;
+      outData[wg++] = (gi - normMeanGm255) / normStdGm255;
+      outData[wb++] = (bi - normMeanBm255) / normStdBm255;
     }
   }
 }