From c5900760a80f3a2196a45df30ba9ce04694f9b4a Mon Sep 17 00:00:00 2001 From: Will Feng Date: Tue, 6 Aug 2019 18:12:41 -0400 Subject: [PATCH 01/33] [WIP] --- check_binary.sh | 71 ++++++++++++++++++++++----------------- manywheel/build.sh | 7 ++++ manywheel/build_common.sh | 4 +++ update_compiler.sh | 1 + 4 files changed, 53 insertions(+), 30 deletions(-) diff --git a/check_binary.sh b/check_binary.sh index 7298f21b5..25d41cf68 100755 --- a/check_binary.sh +++ b/check_binary.sh @@ -13,7 +13,7 @@ set -eux -o pipefail # 8. CuDNN is available for CUDA builds # # This script needs the env variables DESIRED_PYTHON, DESIRED_CUDA, -# DESIRED_DEVTOOLSET and PACKAGE_TYPE +# DESIRED_DEVTOOLSET, PACKAGE_TYPE and CXX_ABI_VARIANT # # This script expects PyTorch to be installed into the active Python (the # Python returned by `which python`). Or, if this is testing a libtorch @@ -37,21 +37,14 @@ fi echo "Checking that the gcc ABI is what we expect" if [[ "$(uname)" != 'Darwin' ]]; then function is_expected() { - # This commented out logic is what you'd expect if 'devtoolset7' actually - # built with the new GCC ABI, but it doesn't; it always builds with ABI=0. - # When a compiler is added that does build with new ABI, then replace - # devtoolset7 (and the DESIRED_DEVTOOLSET variable) with your new compiler - #if [[ "$DESIRED_DEVTOOLSET" == 'devtoolset7' ]]; then - # if [[ "$1" -gt 0 || "$1" == "ON" ]]; then - # echo 1 - # fi - #else - # if [[ -z "$1" || "$1" == 0 || "$1" == "OFF" ]]; then - # echo 1 - # fi - #fi - if [[ -z "$1" || "$1" == 0 || "$1" == "OFF" ]]; then - echo 1 + if [[ "$CXX_ABI_VARIANT" == 'cxx11-abi' ]]; then + if [[ "$1" -gt 0 || "$1" == "ON" ]]; then + echo 1 + fi + else + if [[ -z "$1" || "$1" == 0 || "$1" == "OFF" ]]; then + echo 1 + fi fi } @@ -90,26 +83,44 @@ if [[ "$(uname)" != 'Darwin' ]]; then fi # We also check that there are [not] cxx11 symbols in libtorch + # yf225 TODO: fix comment here # TODO this doesn't catch everything. Even when building with the old ABI # there are 44 symbols in the new ABI in the libtorch library, making this # check return true. This should actually check that the number of new ABI # symbols is sufficiently large. # Also, this is wrong on the old ABI, since there are some cxx11 symbols with # devtoolset7. - #echo "Checking that symbols in libtorch.so have the right gcc abi" - #libtorch="${install_root}/lib/libtorch.so" - #cxx11_symbols="$(nm "$libtorch" | c++filt | grep __cxx11 | wc -l)" || true - #if [[ "$(is_expected $cxx11_symbols)" != 1 ]]; then - # if [[ "$cxx11_symbols" == 0 ]]; then - # echo "No cxx11 symbols found, but there should be." - # else - # echo "Found cxx11 symbols but there shouldn't be. Dumping symbols" - # nm "$libtorch" | c++filt | grep __cxx11 - # fi - # exit 1 - #else - # echo "cxx11 symbols seem to be in order" - #fi + echo "Checking that symbols in libtorch.so have the right gcc abi" + # Function to retry functions that sometimes timeout or have flaky failures + check_lib_symbols_for_abi_correctness () { + lib=$1 + echo "lib: ", $lib + if [[ "$CXX_ABI_VARIANT" == 'cxx11-abi' ]]; then + pre_cxx11_symbols=$(($(nm "$lib" | c++filt | grep std::basic_string | wc -l) + $(nm "$lib" | c++filt | grep std::list | wc -l))) + echo "pre_cxx11_symbols: ", $pre_cxx11_symbols + if [[ "$pre_cxx11_symbols" -gt 0 ]]; then + echo "Found pre-cxx11 symbols but there shouldn't be. Dumping symbols" + nm "$lib" | c++filt | grep std::basic_string + nm "$lib" | c++filt | grep std::list + exit 1 + fi + else + cxx11_symbols=$(($(nm "$lib" | c++filt | grep std::__cxx11::basic_string | wc -l) + $(nm "$lib" | c++filt | grep std::__cxx11::list | wc -l))) + echo "cxx11_symbols: ", $cxx11_symbols + if [[ "$cxx11_symbols" -gt 0 ]]; then + echo "Found cxx11 symbols but there shouldn't be. Dumping symbols" + nm "$lib" | c++filt | grep std::__cxx11::basic_string + nm "$lib" | c++filt | grep std::__cxx11::list + exit 1 + fi + fi + } + libc10="${install_root}/lib/libc10.so" + libtorch="${install_root}/lib/libtorch.so" + check_lib_symbols_for_abi_correctness $libc10 + check_lib_symbols_for_abi_correctness $libtorch + + echo "cxx11 symbols seem to be in order" fi # if on Darwin ############################################################################### diff --git a/manywheel/build.sh b/manywheel/build.sh index a7c2e474e..a9e6bfbc2 100755 --- a/manywheel/build.sh +++ b/manywheel/build.sh @@ -11,6 +11,9 @@ export ATEN_STATIC_CUDA=1 export USE_CUDA_STATIC_LINK=1 export INSTALL_TEST=0 # dont install test binaries into site-packages +# yf225 TODO debug +echo "manywheel/build.sh: CXX_ABI_VARIANT: ", $CXX_ABI_VARIANT + # Keep an array of cmake variables to add to if [[ -z "$CMAKE_ARGS" ]]; then # These are passed to tools/build_pytorch_libs.sh::build() @@ -21,6 +24,10 @@ if [[ -z "$EXTRA_CAFFE2_CMAKE_FLAGS" ]]; then EXTRA_CAFFE2_CMAKE_FLAGS=() fi +if [[ "$CXX_ABI_VARIANT" == 'cxx11-abi' ]]; then + CMAKE_ARGS+=("-D_GLIBCXX_USE_CXX11_ABI=$GLIBCXX_USE_CXX11_ABI") +fi + # Determine CUDA version and architectures to build for CUDA_VERSION=$(nvcc --version|tail -n1|cut -f5 -d" "|cut -f1 -d",") echo "CUDA $CUDA_VERSION Detected" diff --git a/manywheel/build_common.sh b/manywheel/build_common.sh index 60f91be84..720afde29 100644 --- a/manywheel/build_common.sh +++ b/manywheel/build_common.sh @@ -116,6 +116,10 @@ if [[ -n "$BUILD_PYTHONLESS" ]]; then mkdir -p build pushd build echo "Calling tools/build_libtorch.py at $(date)" + + # yf225 TODO debug + echo "manywheel/build_common.sh: CMAKE_ARGS: ", ${CMAKE_ARGS[@]} + time CMAKE_ARGS=${CMAKE_ARGS[@]} \ EXTRA_CAFFE2_CMAKE_FLAGS="${EXTRA_CAFFE2_CMAKE_FLAGS[@]} $STATIC_CMAKE_FLAG" \ python ../tools/build_libtorch.py diff --git a/update_compiler.sh b/update_compiler.sh index 22f7721e1..a4dad453d 100755 --- a/update_compiler.sh +++ b/update_compiler.sh @@ -10,6 +10,7 @@ set -ex # Why does this file exist? Why not just update the compiler on the base docker # images? # +# yf225 TODO: need to update comment here! # So, all the nightlies used to be built on devtoolset3 with the old gcc ABI. # These packages worked well for most people, but could not be linked against # by client c++ libraries that were compiled using the new gcc ABI. Since both From 53a21aa01ce9ecc56d7c9a5edaa07892b5e84b16 Mon Sep 17 00:00:00 2001 From: Will Feng Date: Tue, 6 Aug 2019 20:20:25 -0400 Subject: [PATCH 02/33] Move zip openssl installation out of build_common.sh --- manywheel/build_common.sh | 3 --- 1 file changed, 3 deletions(-) diff --git a/manywheel/build_common.sh b/manywheel/build_common.sh index 720afde29..c1a58d500 100644 --- a/manywheel/build_common.sh +++ b/manywheel/build_common.sh @@ -19,9 +19,6 @@ retry () { $* || (sleep 1 && $*) || (sleep 2 && $*) || (sleep 4 && $*) || (sleep 8 && $*) } -# TODO move this into the Docker images -retry yum install -q -y zip openssl - # We use the package name to test the package by passing this to 'pip install' # This is the env variable that setup.py uses to name the package. Note that # pip 'normalizes' the name first by changing all - to _ From 585c1a769a62a23ae578da5873c2e1635bbca4ac Mon Sep 17 00:00:00 2001 From: Will Feng Date: Tue, 6 Aug 2019 21:56:29 -0400 Subject: [PATCH 03/33] fix libgomp --- manywheel/build_cpu.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/manywheel/build_cpu.sh b/manywheel/build_cpu.sh index cfd3b21d2..0ec2b20ab 100755 --- a/manywheel/build_cpu.sh +++ b/manywheel/build_cpu.sh @@ -28,7 +28,7 @@ mkdir -p "$PYTORCH_FINAL_PACKAGE_DIR" || true DEPS_LIST=( - "/usr/lib64/libgomp.so.1" + "/usr/lib/gcc/x86_64-linux-gnu/5/libgomp.so" ) DEPS_SONAME=( From 1af709c4d3259706aa5139de8b2ac799aa511850 Mon Sep 17 00:00:00 2001 From: Will Feng Date: Wed, 7 Aug 2019 10:19:34 -0400 Subject: [PATCH 04/33] try to fix --- manywheel/build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/manywheel/build.sh b/manywheel/build.sh index a9e6bfbc2..a763cec85 100755 --- a/manywheel/build.sh +++ b/manywheel/build.sh @@ -24,7 +24,7 @@ if [[ -z "$EXTRA_CAFFE2_CMAKE_FLAGS" ]]; then EXTRA_CAFFE2_CMAKE_FLAGS=() fi -if [[ "$CXX_ABI_VARIANT" == 'cxx11-abi' ]]; then +if [[ "$CXX_ABI_VARIANT" == "cxx11-abi" ]]; then CMAKE_ARGS+=("-D_GLIBCXX_USE_CXX11_ABI=$GLIBCXX_USE_CXX11_ABI") fi From 55baf54973617d1975eb1c62ff1acb2eaf890a0c Mon Sep 17 00:00:00 2001 From: Will Feng Date: Wed, 7 Aug 2019 10:42:05 -0400 Subject: [PATCH 05/33] improve build_cpu.sh as well --- manywheel/build_cpu.sh | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/manywheel/build_cpu.sh b/manywheel/build_cpu.sh index 0ec2b20ab..96de3b58f 100755 --- a/manywheel/build_cpu.sh +++ b/manywheel/build_cpu.sh @@ -5,6 +5,9 @@ set -ex export TH_BINARY_BUILD=1 export USE_CUDA=0 +# yf225 TODO debug +echo "manywheel/build.sh: CXX_ABI_VARIANT: ", $CXX_ABI_VARIANT + # Keep an array of cmake variables to add to if [[ -z "$CMAKE_ARGS" ]]; then # These are passed to tools/build_pytorch_libs.sh::build() @@ -15,6 +18,10 @@ if [[ -z "$EXTRA_CAFFE2_CMAKE_FLAGS" ]]; then EXTRA_CAFFE2_CMAKE_FLAGS=() fi +if [[ "$CXX_ABI_VARIANT" == "cxx11-abi" ]]; then + CMAKE_ARGS+=("-D_GLIBCXX_USE_CXX11_ABI=$GLIBCXX_USE_CXX11_ABI") +fi + WHEELHOUSE_DIR="wheelhousecpu" LIBTORCH_HOUSE_DIR="libtorch_housecpu" if [[ -z "$PYTORCH_FINAL_PACKAGE_DIR" ]]; then From e22df100d433ce37c32ead4b357f6e823199bdb2 Mon Sep 17 00:00:00 2001 From: Will Feng Date: Wed, 7 Aug 2019 11:09:05 -0400 Subject: [PATCH 06/33] combine GLIBCXX_USE_CXX11_ABI setting --- manywheel/build.sh | 7 ------- manywheel/build_common.sh | 8 +++++++- manywheel/build_cpu.sh | 7 ------- 3 files changed, 7 insertions(+), 15 deletions(-) diff --git a/manywheel/build.sh b/manywheel/build.sh index a763cec85..a7c2e474e 100755 --- a/manywheel/build.sh +++ b/manywheel/build.sh @@ -11,9 +11,6 @@ export ATEN_STATIC_CUDA=1 export USE_CUDA_STATIC_LINK=1 export INSTALL_TEST=0 # dont install test binaries into site-packages -# yf225 TODO debug -echo "manywheel/build.sh: CXX_ABI_VARIANT: ", $CXX_ABI_VARIANT - # Keep an array of cmake variables to add to if [[ -z "$CMAKE_ARGS" ]]; then # These are passed to tools/build_pytorch_libs.sh::build() @@ -24,10 +21,6 @@ if [[ -z "$EXTRA_CAFFE2_CMAKE_FLAGS" ]]; then EXTRA_CAFFE2_CMAKE_FLAGS=() fi -if [[ "$CXX_ABI_VARIANT" == "cxx11-abi" ]]; then - CMAKE_ARGS+=("-D_GLIBCXX_USE_CXX11_ABI=$GLIBCXX_USE_CXX11_ABI") -fi - # Determine CUDA version and architectures to build for CUDA_VERSION=$(nvcc --version|tail -n1|cut -f5 -d" "|cut -f1 -d",") echo "CUDA $CUDA_VERSION Detected" diff --git a/manywheel/build_common.sh b/manywheel/build_common.sh index c1a58d500..843757698 100644 --- a/manywheel/build_common.sh +++ b/manywheel/build_common.sh @@ -115,7 +115,13 @@ if [[ -n "$BUILD_PYTHONLESS" ]]; then echo "Calling tools/build_libtorch.py at $(date)" # yf225 TODO debug - echo "manywheel/build_common.sh: CMAKE_ARGS: ", ${CMAKE_ARGS[@]} + echo "manywheel/build_common.sh: CXX_ABI_VARIANT: ", $CXX_ABI_VARIANT + + if [[ "$CXX_ABI_VARIANT" == "cxx11-abi" ]]; then + export GLIBCXX_USE_CXX11_ABI=1 + else + export GLIBCXX_USE_CXX11_ABI=0 + fi time CMAKE_ARGS=${CMAKE_ARGS[@]} \ EXTRA_CAFFE2_CMAKE_FLAGS="${EXTRA_CAFFE2_CMAKE_FLAGS[@]} $STATIC_CMAKE_FLAG" \ diff --git a/manywheel/build_cpu.sh b/manywheel/build_cpu.sh index 96de3b58f..0ec2b20ab 100755 --- a/manywheel/build_cpu.sh +++ b/manywheel/build_cpu.sh @@ -5,9 +5,6 @@ set -ex export TH_BINARY_BUILD=1 export USE_CUDA=0 -# yf225 TODO debug -echo "manywheel/build.sh: CXX_ABI_VARIANT: ", $CXX_ABI_VARIANT - # Keep an array of cmake variables to add to if [[ -z "$CMAKE_ARGS" ]]; then # These are passed to tools/build_pytorch_libs.sh::build() @@ -18,10 +15,6 @@ if [[ -z "$EXTRA_CAFFE2_CMAKE_FLAGS" ]]; then EXTRA_CAFFE2_CMAKE_FLAGS=() fi -if [[ "$CXX_ABI_VARIANT" == "cxx11-abi" ]]; then - CMAKE_ARGS+=("-D_GLIBCXX_USE_CXX11_ABI=$GLIBCXX_USE_CXX11_ABI") -fi - WHEELHOUSE_DIR="wheelhousecpu" LIBTORCH_HOUSE_DIR="libtorch_housecpu" if [[ -z "$PYTORCH_FINAL_PACKAGE_DIR" ]]; then From aa580b7a4063170bb93a52a77d8ce9b0200ab4b8 Mon Sep 17 00:00:00 2001 From: Will Feng Date: Wed, 7 Aug 2019 11:10:42 -0400 Subject: [PATCH 07/33] add comment --- manywheel/build_cpu.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/manywheel/build_cpu.sh b/manywheel/build_cpu.sh index 0ec2b20ab..e647c452a 100755 --- a/manywheel/build_cpu.sh +++ b/manywheel/build_cpu.sh @@ -28,6 +28,7 @@ mkdir -p "$PYTORCH_FINAL_PACKAGE_DIR" || true DEPS_LIST=( + # yf225 TODO: this should be dependent on OS name (CentOS vs. Ubuntu) "/usr/lib/gcc/x86_64-linux-gnu/5/libgomp.so" ) From 0736d7b3896ec0c85ef42dc5048089886195fa3c Mon Sep 17 00:00:00 2001 From: Will Feng Date: Wed, 7 Aug 2019 12:08:18 -0400 Subject: [PATCH 08/33] fix _GLIBCXX_USE_CXX11_ABI flag passing --- manywheel/build_common.sh | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/manywheel/build_common.sh b/manywheel/build_common.sh index 843757698..e6c9d9eb2 100644 --- a/manywheel/build_common.sh +++ b/manywheel/build_common.sh @@ -94,6 +94,16 @@ if [[ "$DESIRED_PYTHON" == "cp37-cp37m" ]]; then else retry pip install -q numpy==1.11 fi + +# yf225 TODO debug +echo "manywheel/build_common.sh: CXX_ABI_VARIANT: ", $CXX_ABI_VARIANT + +if [[ "$CXX_ABI_VARIANT" == "cxx11-abi" ]]; then + export _GLIBCXX_USE_CXX11_ABI=1 +else + export _GLIBCXX_USE_CXX11_ABI=0 +fi + echo "Calling setup.py bdist at $(date)" time CMAKE_ARGS=${CMAKE_ARGS[@]} \ EXTRA_CAFFE2_CMAKE_FLAGS=${EXTRA_CAFFE2_CMAKE_FLAGS[@]} \ @@ -114,15 +124,6 @@ if [[ -n "$BUILD_PYTHONLESS" ]]; then pushd build echo "Calling tools/build_libtorch.py at $(date)" - # yf225 TODO debug - echo "manywheel/build_common.sh: CXX_ABI_VARIANT: ", $CXX_ABI_VARIANT - - if [[ "$CXX_ABI_VARIANT" == "cxx11-abi" ]]; then - export GLIBCXX_USE_CXX11_ABI=1 - else - export GLIBCXX_USE_CXX11_ABI=0 - fi - time CMAKE_ARGS=${CMAKE_ARGS[@]} \ EXTRA_CAFFE2_CMAKE_FLAGS="${EXTRA_CAFFE2_CMAKE_FLAGS[@]} $STATIC_CMAKE_FLAG" \ python ../tools/build_libtorch.py From 623eb8f0eeea75bd05ddfbf59d25237474619a15 Mon Sep 17 00:00:00 2001 From: Will Feng Date: Wed, 7 Aug 2019 12:13:22 -0400 Subject: [PATCH 09/33] fix PWD --- check_binary.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/check_binary.sh b/check_binary.sh index 25d41cf68..3a1ee84ab 100755 --- a/check_binary.sh +++ b/check_binary.sh @@ -24,7 +24,8 @@ set -eux -o pipefail # The install root depends on both the package type and the os # All MacOS packages use conda, even for the wheel packages. if [[ "$PACKAGE_TYPE" == libtorch ]]; then - install_root="$pwd" + # NOTE: Only $PWD works on both CentOS and Ubuntu + install_root="$PWD" else py_dot="${DESIRED_PYTHON:0:3}" install_root="$(dirname $(which python))/../lib/python${py_dot}/site-packages/torch/" From 719564ea0a7d83ce81eeb349b4c9eceb2c09bc80 Mon Sep 17 00:00:00 2001 From: Will Feng Date: Wed, 7 Aug 2019 12:19:53 -0400 Subject: [PATCH 10/33] further test comment --- check_binary.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/check_binary.sh b/check_binary.sh index 3a1ee84ab..037c8115f 100755 --- a/check_binary.sh +++ b/check_binary.sh @@ -172,6 +172,7 @@ fi ############################################################################### if [[ "$PACKAGE_TYPE" == 'libtorch' ]]; then # For libtorch testing is done. All further tests require Python + # TODO: We should run those further tests for libtorch as well exit 0 fi python -c 'import torch' From 7fe08a4ba611383789f85f16cc9980800a2564f4 Mon Sep 17 00:00:00 2001 From: Will Feng Date: Wed, 7 Aug 2019 12:26:15 -0400 Subject: [PATCH 11/33] fix comments --- check_binary.sh | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/check_binary.sh b/check_binary.sh index 037c8115f..e8cdac5c1 100755 --- a/check_binary.sh +++ b/check_binary.sh @@ -84,6 +84,12 @@ if [[ "$(uname)" != 'Darwin' ]]; then fi # We also check that there are [not] cxx11 symbols in libtorch + # To check whether it is using cxx11 ABI, check non-existence of symbol: + # - std::basic_string + # - std::list + # To check whether it is using pre-cxx11 ABI, check non-existence of symbol: + # - std::__cxx11::basic_string + # - std::__cxx11::list # yf225 TODO: fix comment here # TODO this doesn't catch everything. Even when building with the old ABI # there are 44 symbols in the new ABI in the libtorch library, making this @@ -92,13 +98,12 @@ if [[ "$(uname)" != 'Darwin' ]]; then # Also, this is wrong on the old ABI, since there are some cxx11 symbols with # devtoolset7. echo "Checking that symbols in libtorch.so have the right gcc abi" - # Function to retry functions that sometimes timeout or have flaky failures check_lib_symbols_for_abi_correctness () { lib=$1 - echo "lib: ", $lib + echo "lib: " $lib if [[ "$CXX_ABI_VARIANT" == 'cxx11-abi' ]]; then pre_cxx11_symbols=$(($(nm "$lib" | c++filt | grep std::basic_string | wc -l) + $(nm "$lib" | c++filt | grep std::list | wc -l))) - echo "pre_cxx11_symbols: ", $pre_cxx11_symbols + echo "pre_cxx11_symbols: " $pre_cxx11_symbols if [[ "$pre_cxx11_symbols" -gt 0 ]]; then echo "Found pre-cxx11 symbols but there shouldn't be. Dumping symbols" nm "$lib" | c++filt | grep std::basic_string @@ -107,7 +112,7 @@ if [[ "$(uname)" != 'Darwin' ]]; then fi else cxx11_symbols=$(($(nm "$lib" | c++filt | grep std::__cxx11::basic_string | wc -l) + $(nm "$lib" | c++filt | grep std::__cxx11::list | wc -l))) - echo "cxx11_symbols: ", $cxx11_symbols + echo "cxx11_symbols: " $cxx11_symbols if [[ "$cxx11_symbols" -gt 0 ]]; then echo "Found cxx11 symbols but there shouldn't be. Dumping symbols" nm "$lib" | c++filt | grep std::__cxx11::basic_string From f1a408791d35cb61432205fddedc361dcb3f42fb Mon Sep 17 00:00:00 2001 From: Will Feng Date: Wed, 7 Aug 2019 12:50:44 -0400 Subject: [PATCH 12/33] fix abi check --- check_binary.sh | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/check_binary.sh b/check_binary.sh index e8cdac5c1..8f44291ee 100755 --- a/check_binary.sh +++ b/check_binary.sh @@ -90,19 +90,12 @@ if [[ "$(uname)" != 'Darwin' ]]; then # To check whether it is using pre-cxx11 ABI, check non-existence of symbol: # - std::__cxx11::basic_string # - std::__cxx11::list - # yf225 TODO: fix comment here - # TODO this doesn't catch everything. Even when building with the old ABI - # there are 44 symbols in the new ABI in the libtorch library, making this - # check return true. This should actually check that the number of new ABI - # symbols is sufficiently large. - # Also, this is wrong on the old ABI, since there are some cxx11 symbols with - # devtoolset7. echo "Checking that symbols in libtorch.so have the right gcc abi" check_lib_symbols_for_abi_correctness () { lib=$1 echo "lib: " $lib if [[ "$CXX_ABI_VARIANT" == 'cxx11-abi' ]]; then - pre_cxx11_symbols=$(($(nm "$lib" | c++filt | grep std::basic_string | wc -l) + $(nm "$lib" | c++filt | grep std::list | wc -l))) + pre_cxx11_symbols=$(($(nm "$lib" | c++filt | grep std::basic_string | wc -l) + $(nm "$lib" | c++filt | grep std::list | wc -l))) || true echo "pre_cxx11_symbols: " $pre_cxx11_symbols if [[ "$pre_cxx11_symbols" -gt 0 ]]; then echo "Found pre-cxx11 symbols but there shouldn't be. Dumping symbols" @@ -111,7 +104,7 @@ if [[ "$(uname)" != 'Darwin' ]]; then exit 1 fi else - cxx11_symbols=$(($(nm "$lib" | c++filt | grep std::__cxx11::basic_string | wc -l) + $(nm "$lib" | c++filt | grep std::__cxx11::list | wc -l))) + cxx11_symbols=$(($(nm "$lib" | c++filt | grep std::__cxx11::basic_string | wc -l) + $(nm "$lib" | c++filt | grep std::__cxx11::list | wc -l))) || true echo "cxx11_symbols: " $cxx11_symbols if [[ "$cxx11_symbols" -gt 0 ]]; then echo "Found cxx11 symbols but there shouldn't be. Dumping symbols" From 7646fc5dda01313d89a2704dce8cec2ee6376dfa Mon Sep 17 00:00:00 2001 From: Will Feng Date: Wed, 7 Aug 2019 14:20:00 -0400 Subject: [PATCH 13/33] update comments --- check_binary.sh | 10 ++++++++++ manywheel/build_common.sh | 4 ---- update_compiler.sh | 38 ++++++++++++-------------------------- 3 files changed, 22 insertions(+), 30 deletions(-) diff --git a/check_binary.sh b/check_binary.sh index 8f44291ee..e26cdd007 100755 --- a/check_binary.sh +++ b/check_binary.sh @@ -35,6 +35,16 @@ fi ############################################################################### # Check GCC ABI ############################################################################### + +# NOTE [ Building libtorch with old vs. new gcc ABI ] +# +# Packages built with one version of ABI could not be linked against by client +# C++ libraries that were compiled using the other version of ABI. Since both +# gcc ABIs are still common in the wild, we need to support both ABIs. Currently: +# +# - All the nightlies built on CentOS 7 + devtoolset7 use the old gcc ABI. +# - All the nightlies built on Ubuntu 16.04 + gcc 5.4 use the new gcc ABI. + echo "Checking that the gcc ABI is what we expect" if [[ "$(uname)" != 'Darwin' ]]; then function is_expected() { diff --git a/manywheel/build_common.sh b/manywheel/build_common.sh index e6c9d9eb2..a7d7970a3 100644 --- a/manywheel/build_common.sh +++ b/manywheel/build_common.sh @@ -95,9 +95,6 @@ else retry pip install -q numpy==1.11 fi -# yf225 TODO debug -echo "manywheel/build_common.sh: CXX_ABI_VARIANT: ", $CXX_ABI_VARIANT - if [[ "$CXX_ABI_VARIANT" == "cxx11-abi" ]]; then export _GLIBCXX_USE_CXX11_ABI=1 else @@ -123,7 +120,6 @@ if [[ -n "$BUILD_PYTHONLESS" ]]; then mkdir -p build pushd build echo "Calling tools/build_libtorch.py at $(date)" - time CMAKE_ARGS=${CMAKE_ARGS[@]} \ EXTRA_CAFFE2_CMAKE_FLAGS="${EXTRA_CAFFE2_CMAKE_FLAGS[@]} $STATIC_CMAKE_FLAG" \ python ../tools/build_libtorch.py diff --git a/update_compiler.sh b/update_compiler.sh index a4dad453d..d04fa1517 100755 --- a/update_compiler.sh +++ b/update_compiler.sh @@ -1,6 +1,8 @@ #!/bin/bash set -ex +# NOTE: This script is called by default on all nightlies. + # Expected to be run on a Docker image built from # https://github.com/pytorch/builder/blob/master/conda/Dockerfile (or the # manywheel equivalent) @@ -10,35 +12,19 @@ set -ex # Why does this file exist? Why not just update the compiler on the base docker # images? # -# yf225 TODO: need to update comment here! -# So, all the nightlies used to be built on devtoolset3 with the old gcc ABI. -# These packages worked well for most people, but could not be linked against -# by client c++ libraries that were compiled using the new gcc ABI. Since both -# gcc ABIs are still common in the wild, we should be able to support both -# ABIs. Hence, we need a script to alter the compiler on the root docker images -# to configure which ABI we want to build with. +# Answer: Yes we should just update the compiler to devtoolset7 on all the CentOS +# base docker images. There's no reason to keep around devtoolset3 because it's +# not used anymore. # -# So then this script was written to change from devtoolset3 to devtoolset7. It -# turns out that this doesn't actually fix the problem, since devtoolset7 is -# incapable of building with the new gcc ABI. BUT, devtoolset7 /is/ able to +# We use devtoolset7 instead of devtoolset3 because devtoolset7 /is/ able to # build with avx512 instructions, which are needed for fbgemm to get good -# performance. So now this script is called by default on all nightlies. -# -# But we still don't have the new gcc ABI. So what should happen next is -# - Upgrade the compiler on all the base docker images to be devtoolset7. -# There's no reason to keep around devtoolset3. It will never be used. -# - Alter this script to install another compiler toolchain, not a devtoolset#, -# which can build with the new gcc ABI. Then use this script as intended, in -# a parallel suite of new-gcc-ABI nightlies. +# performance. # -# When this script is finally changed to build with the new gcc ABI, then we'll -# need to set this variable manually because -# https://github.com/pytorch/pytorch/blob/master/torch/abi-check.cpp sets the -# ABI to 0 by default. -# ``` export _GLIBCXX_USE_CXX11_ABI=1 ``` -# Note that this probably needs to get set in the .circleci infra that's -# running this, since env variables set in this file are probably discarded. -# ~~~ +# Note that devtoolset7 still *cannot* build with the new gcc ABI +# (see https://bugzilla.redhat.com/show_bug.cgi?id=1546704). Instead, we use +# Ubuntu 16.04 + gcc 5.4 to build with the new gcc ABI, using an Ubuntu 16.04 +# base docker image. +# For details, see NOTE [ Building libtorch with old vs. new gcc ABI ]. # The gcc version should be 4.9.2 right now echo "Initial gcc version is $(gcc --version)" From ead3c34bae933db0f4ee2d84fa98410b67d4c055 Mon Sep 17 00:00:00 2001 From: Will Feng Date: Wed, 7 Aug 2019 14:24:38 -0400 Subject: [PATCH 14/33] improve DEPS_LIST --- manywheel/build_cpu.sh | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/manywheel/build_cpu.sh b/manywheel/build_cpu.sh index e647c452a..a9fbe104f 100755 --- a/manywheel/build_cpu.sh +++ b/manywheel/build_cpu.sh @@ -26,11 +26,16 @@ if [[ -z "$PYTORCH_FINAL_PACKAGE_DIR" ]]; then fi mkdir -p "$PYTORCH_FINAL_PACKAGE_DIR" || true - -DEPS_LIST=( - # yf225 TODO: this should be dependent on OS name (CentOS vs. Ubuntu) - "/usr/lib/gcc/x86_64-linux-gnu/5/libgomp.so" -) +OS_NAME=`awk -F= '/^NAME/{print $2}' /etc/os-release` +if [[ "$OS_NAME" == 'CentOS Linux' ]]; then + DEPS_LIST=( + "/usr/lib64/libgomp.so.1" + ) +elif [[ "$OS_NAME" == 'Ubuntu' ]]; then + DEPS_LIST=( + "/usr/lib/gcc/x86_64-linux-gnu/5/libgomp.so" + ) +fi DEPS_SONAME=( "libgomp.so.1" From 2c8cc1a30cb8a940e9de5ffff34c082466aba8de Mon Sep 17 00:00:00 2001 From: Will Feng Date: Wed, 7 Aug 2019 16:27:05 -0400 Subject: [PATCH 15/33] remove CXX_ABI_VARIANT --- check_binary.sh | 6 +++--- manywheel/build_common.sh | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/check_binary.sh b/check_binary.sh index e26cdd007..75f508e08 100755 --- a/check_binary.sh +++ b/check_binary.sh @@ -13,7 +13,7 @@ set -eux -o pipefail # 8. CuDNN is available for CUDA builds # # This script needs the env variables DESIRED_PYTHON, DESIRED_CUDA, -# DESIRED_DEVTOOLSET, PACKAGE_TYPE and CXX_ABI_VARIANT +# DESIRED_DEVTOOLSET and PACKAGE_TYPE # # This script expects PyTorch to be installed into the active Python (the # Python returned by `which python`). Or, if this is testing a libtorch @@ -48,7 +48,7 @@ fi echo "Checking that the gcc ABI is what we expect" if [[ "$(uname)" != 'Darwin' ]]; then function is_expected() { - if [[ "$CXX_ABI_VARIANT" == 'cxx11-abi' ]]; then + if [[ "$DESIRED_DEVTOOLSET" == *"cxx11-abi"* ]]; then if [[ "$1" -gt 0 || "$1" == "ON" ]]; then echo 1 fi @@ -104,7 +104,7 @@ if [[ "$(uname)" != 'Darwin' ]]; then check_lib_symbols_for_abi_correctness () { lib=$1 echo "lib: " $lib - if [[ "$CXX_ABI_VARIANT" == 'cxx11-abi' ]]; then + if [[ "$DESIRED_DEVTOOLSET" == *"cxx11-abi"* ]]; then pre_cxx11_symbols=$(($(nm "$lib" | c++filt | grep std::basic_string | wc -l) + $(nm "$lib" | c++filt | grep std::list | wc -l))) || true echo "pre_cxx11_symbols: " $pre_cxx11_symbols if [[ "$pre_cxx11_symbols" -gt 0 ]]; then diff --git a/manywheel/build_common.sh b/manywheel/build_common.sh index a7d7970a3..ceda8cee7 100644 --- a/manywheel/build_common.sh +++ b/manywheel/build_common.sh @@ -95,7 +95,7 @@ else retry pip install -q numpy==1.11 fi -if [[ "$CXX_ABI_VARIANT" == "cxx11-abi" ]]; then +if [[ "$DESIRED_DEVTOOLSET" == *"cxx11-abi"* ]]; then export _GLIBCXX_USE_CXX11_ABI=1 else export _GLIBCXX_USE_CXX11_ABI=0 From d741af2c1670a61492dc13a1760f9daa5569b6ec Mon Sep 17 00:00:00 2001 From: Will Feng Date: Wed, 7 Aug 2019 17:55:52 -0400 Subject: [PATCH 16/33] add identifier to new ABI binary zip --- manywheel/build_common.sh | 13 ++++++++++--- smoke_test.sh | 7 ++++++- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/manywheel/build_common.sh b/manywheel/build_common.sh index ceda8cee7..87d217f52 100644 --- a/manywheel/build_common.sh +++ b/manywheel/build_common.sh @@ -146,9 +146,16 @@ if [[ -n "$BUILD_PYTHONLESS" ]]; then echo "$(pushd $pytorch_rootdir && git rev-parse HEAD)" > libtorch/build-hash mkdir -p /tmp/$LIBTORCH_HOUSE_DIR - zip -rq /tmp/$LIBTORCH_HOUSE_DIR/libtorch-$LIBTORCH_VARIANT-$PYTORCH_BUILD_VERSION.zip libtorch - cp /tmp/$LIBTORCH_HOUSE_DIR/libtorch-$LIBTORCH_VARIANT-$PYTORCH_BUILD_VERSION.zip \ - /tmp/$LIBTORCH_HOUSE_DIR/libtorch-$LIBTORCH_VARIANT-latest.zip + + if [[ "$DESIRED_DEVTOOLSET" == *"cxx11-abi"* ]]; then + LIBTORCH_ABI="cxx11-abi-" + else + LIBTORCH_ABI= + fi + + zip -rq /tmp/$LIBTORCH_HOUSE_DIR/libtorch-$LIBTORCH_ABI$LIBTORCH_VARIANT-$PYTORCH_BUILD_VERSION.zip libtorch + cp /tmp/$LIBTORCH_HOUSE_DIR/libtorch-$LIBTORCH_ABI$LIBTORCH_VARIANT-$PYTORCH_BUILD_VERSION.zip \ + /tmp/$LIBTORCH_HOUSE_DIR/libtorch-$LIBTORCH_ABI$LIBTORCH_VARIANT-latest.zip fi popd diff --git a/smoke_test.sh b/smoke_test.sh index 7d035fce1..9f207de72 100755 --- a/smoke_test.sh +++ b/smoke_test.sh @@ -48,7 +48,12 @@ if [[ "$PACKAGE_TYPE" == 'libtorch' ]]; then else libtorch_variant="$LIBTORCH_VARIANT" fi - package_name="libtorch-$libtorch_variant-${NIGHTLIES_DATE_PREAMBLE}${DATE}.zip" + if [[ "$DESIRED_DEVTOOLSET" == *"cxx11-abi"* ]]; then + LIBTORCH_ABI="cxx11-abi-" + else + LIBTORCH_ABI= + fi + package_name="libtorch-$LIBTORCH_ABI$libtorch_variant-${NIGHTLIES_DATE_PREAMBLE}${DATE}.zip" elif [[ "$PACKAGE_TYPE" == *wheel ]]; then package_name='torch' elif [[ "$DESIRED_CUDA" == 'cpu' && "$(uname)" != 'Darwin' ]]; then From 7993e172ccc2fcd3e730b602c59cb6620d1a3785 Mon Sep 17 00:00:00 2001 From: Will Feng Date: Wed, 7 Aug 2019 18:52:31 -0400 Subject: [PATCH 17/33] only check gcc ABI for libtorch builds --- check_binary.sh | 168 ++++++++++++++++++++++++------------------------ 1 file changed, 85 insertions(+), 83 deletions(-) diff --git a/check_binary.sh b/check_binary.sh index 75f508e08..cb87bffef 100755 --- a/check_binary.sh +++ b/check_binary.sh @@ -45,92 +45,94 @@ fi # - All the nightlies built on CentOS 7 + devtoolset7 use the old gcc ABI. # - All the nightlies built on Ubuntu 16.04 + gcc 5.4 use the new gcc ABI. -echo "Checking that the gcc ABI is what we expect" -if [[ "$(uname)" != 'Darwin' ]]; then - function is_expected() { - if [[ "$DESIRED_DEVTOOLSET" == *"cxx11-abi"* ]]; then - if [[ "$1" -gt 0 || "$1" == "ON" ]]; then - echo 1 - fi - else - if [[ -z "$1" || "$1" == 0 || "$1" == "OFF" ]]; then - echo 1 +if [[ "$PACKAGE_TYPE" == libtorch ]]; then + echo "Checking that the gcc ABI is what we expect" + if [[ "$(uname)" != 'Darwin' ]]; then + function is_expected() { + if [[ "$DESIRED_DEVTOOLSET" == *"cxx11-abi"* ]]; then + if [[ "$1" -gt 0 || "$1" == "ON" ]]; then + echo 1 + fi + else + if [[ -z "$1" || "$1" == 0 || "$1" == "OFF" ]]; then + echo 1 + fi fi + } + + # First we check that the env var in TorchConfig.cmake is correct + + # We search for D_GLIBCXX_USE_CXX11_ABI=1 in torch/TorchConfig.cmake + torch_config="${install_root}/share/cmake/Torch/TorchConfig.cmake" + if [[ ! -f "$torch_config" ]]; then + echo "No TorchConfig.cmake found!" + ls -lah "$install_root/share/cmake/Torch" + exit 1 fi - } - - # First we check that the env var in TorchConfig.cmake is correct - - # We search for D_GLIBCXX_USE_CXX11_ABI=1 in torch/TorchConfig.cmake - torch_config="${install_root}/share/cmake/Torch/TorchConfig.cmake" - if [[ ! -f "$torch_config" ]]; then - echo "No TorchConfig.cmake found!" - ls -lah "$install_root/share/cmake/Torch" - exit 1 - fi - echo "Checking the TorchConfig.cmake" - cat "$torch_config" - - # The sed call below is - # don't print lines by default (only print the line we want) - # -n - # execute the following expression - # e - # replace lines that match with the first capture group and print - # s/.*D_GLIBCXX_USE_CXX11_ABI=\(.\)".*/\1/p - # any characters, D_GLIBCXX_USE_CXX11_ABI=, exactly one any character, a - # quote, any characters - # Note the exactly one single character after the '='. In the case that the - # variable is not set the '=' will be followed by a '"' immediately and the - # line will fail the match and nothing will be printed; this is what we - # want. Otherwise it will capture the 0 or 1 after the '='. - # /.*D_GLIBCXX_USE_CXX11_ABI=\(.\)".*/ - # replace the matched line with the capture group and print - # /\1/p - actual_gcc_abi="$(sed -ne 's/.*D_GLIBCXX_USE_CXX11_ABI=\(.\)".*/\1/p' < "$torch_config")" - if [[ "$(is_expected "$actual_gcc_abi")" != 1 ]]; then - echo "gcc ABI $actual_gcc_abi not as expected." - exit 1 - fi - - # We also check that there are [not] cxx11 symbols in libtorch - # To check whether it is using cxx11 ABI, check non-existence of symbol: - # - std::basic_string - # - std::list - # To check whether it is using pre-cxx11 ABI, check non-existence of symbol: - # - std::__cxx11::basic_string - # - std::__cxx11::list - echo "Checking that symbols in libtorch.so have the right gcc abi" - check_lib_symbols_for_abi_correctness () { - lib=$1 - echo "lib: " $lib - if [[ "$DESIRED_DEVTOOLSET" == *"cxx11-abi"* ]]; then - pre_cxx11_symbols=$(($(nm "$lib" | c++filt | grep std::basic_string | wc -l) + $(nm "$lib" | c++filt | grep std::list | wc -l))) || true - echo "pre_cxx11_symbols: " $pre_cxx11_symbols - if [[ "$pre_cxx11_symbols" -gt 0 ]]; then - echo "Found pre-cxx11 symbols but there shouldn't be. Dumping symbols" - nm "$lib" | c++filt | grep std::basic_string - nm "$lib" | c++filt | grep std::list - exit 1 - fi - else - cxx11_symbols=$(($(nm "$lib" | c++filt | grep std::__cxx11::basic_string | wc -l) + $(nm "$lib" | c++filt | grep std::__cxx11::list | wc -l))) || true - echo "cxx11_symbols: " $cxx11_symbols - if [[ "$cxx11_symbols" -gt 0 ]]; then - echo "Found cxx11 symbols but there shouldn't be. Dumping symbols" - nm "$lib" | c++filt | grep std::__cxx11::basic_string - nm "$lib" | c++filt | grep std::__cxx11::list - exit 1 - fi + echo "Checking the TorchConfig.cmake" + cat "$torch_config" + + # The sed call below is + # don't print lines by default (only print the line we want) + # -n + # execute the following expression + # e + # replace lines that match with the first capture group and print + # s/.*D_GLIBCXX_USE_CXX11_ABI=\(.\)".*/\1/p + # any characters, D_GLIBCXX_USE_CXX11_ABI=, exactly one any character, a + # quote, any characters + # Note the exactly one single character after the '='. In the case that the + # variable is not set the '=' will be followed by a '"' immediately and the + # line will fail the match and nothing will be printed; this is what we + # want. Otherwise it will capture the 0 or 1 after the '='. + # /.*D_GLIBCXX_USE_CXX11_ABI=\(.\)".*/ + # replace the matched line with the capture group and print + # /\1/p + actual_gcc_abi="$(sed -ne 's/.*D_GLIBCXX_USE_CXX11_ABI=\(.\)".*/\1/p' < "$torch_config")" + if [[ "$(is_expected "$actual_gcc_abi")" != 1 ]]; then + echo "gcc ABI $actual_gcc_abi not as expected." + exit 1 fi - } - libc10="${install_root}/lib/libc10.so" - libtorch="${install_root}/lib/libtorch.so" - check_lib_symbols_for_abi_correctness $libc10 - check_lib_symbols_for_abi_correctness $libtorch - - echo "cxx11 symbols seem to be in order" -fi # if on Darwin + + # We also check that there are [not] cxx11 symbols in libtorch + # To check whether it is using cxx11 ABI, check non-existence of symbol: + # - std::basic_string + # - std::list + # To check whether it is using pre-cxx11 ABI, check non-existence of symbol: + # - std::__cxx11::basic_string + # - std::__cxx11::list + echo "Checking that symbols in libtorch.so have the right gcc abi" + check_lib_symbols_for_abi_correctness () { + lib=$1 + echo "lib: " $lib + if [[ "$DESIRED_DEVTOOLSET" == *"cxx11-abi"* ]]; then + pre_cxx11_symbols=$(($(nm "$lib" | c++filt | grep std::basic_string | wc -l) + $(nm "$lib" | c++filt | grep std::list | wc -l))) || true + echo "pre_cxx11_symbols: " $pre_cxx11_symbols + if [[ "$pre_cxx11_symbols" -gt 0 ]]; then + echo "Found pre-cxx11 symbols but there shouldn't be. Dumping symbols" + nm "$lib" | c++filt | grep std::basic_string + nm "$lib" | c++filt | grep std::list + exit 1 + fi + else + cxx11_symbols=$(($(nm "$lib" | c++filt | grep std::__cxx11::basic_string | wc -l) + $(nm "$lib" | c++filt | grep std::__cxx11::list | wc -l))) || true + echo "cxx11_symbols: " $cxx11_symbols + if [[ "$cxx11_symbols" -gt 0 ]]; then + echo "Found cxx11 symbols but there shouldn't be. Dumping symbols" + nm "$lib" | c++filt | grep std::__cxx11::basic_string + nm "$lib" | c++filt | grep std::__cxx11::list + exit 1 + fi + fi + } + libc10="${install_root}/lib/libc10.so" + libtorch="${install_root}/lib/libtorch.so" + check_lib_symbols_for_abi_correctness $libc10 + check_lib_symbols_for_abi_correctness $libtorch + + echo "cxx11 symbols seem to be in order" + fi # if on Darwin +fi # if libtorch ############################################################################### # Check for no OpenBLAS From d0b4e7c43c1e4360bfcb8abbf15ec5a8ccaa3f11 Mon Sep 17 00:00:00 2001 From: Will Feng Date: Thu, 8 Aug 2019 11:32:56 -0400 Subject: [PATCH 18/33] skip ABI test for devtoolset7 --- check_binary.sh | 67 ++++++++++++++++++++++++++---------------------- conda/.DS_Store | Bin 0 -> 6148 bytes 2 files changed, 37 insertions(+), 30 deletions(-) create mode 100644 conda/.DS_Store diff --git a/check_binary.sh b/check_binary.sh index cb87bffef..1108df5c9 100755 --- a/check_binary.sh +++ b/check_binary.sh @@ -50,7 +50,7 @@ if [[ "$PACKAGE_TYPE" == libtorch ]]; then if [[ "$(uname)" != 'Darwin' ]]; then function is_expected() { if [[ "$DESIRED_DEVTOOLSET" == *"cxx11-abi"* ]]; then - if [[ "$1" -gt 0 || "$1" == "ON" ]]; then + if [[ "$1" -gt 0 || "$1" == "ON " ]]; then echo 1 fi else @@ -101,36 +101,43 @@ if [[ "$PACKAGE_TYPE" == libtorch ]]; then # To check whether it is using pre-cxx11 ABI, check non-existence of symbol: # - std::__cxx11::basic_string # - std::__cxx11::list - echo "Checking that symbols in libtorch.so have the right gcc abi" - check_lib_symbols_for_abi_correctness () { - lib=$1 - echo "lib: " $lib - if [[ "$DESIRED_DEVTOOLSET" == *"cxx11-abi"* ]]; then - pre_cxx11_symbols=$(($(nm "$lib" | c++filt | grep std::basic_string | wc -l) + $(nm "$lib" | c++filt | grep std::list | wc -l))) || true - echo "pre_cxx11_symbols: " $pre_cxx11_symbols - if [[ "$pre_cxx11_symbols" -gt 0 ]]; then - echo "Found pre-cxx11 symbols but there shouldn't be. Dumping symbols" - nm "$lib" | c++filt | grep std::basic_string - nm "$lib" | c++filt | grep std::list - exit 1 - fi - else - cxx11_symbols=$(($(nm "$lib" | c++filt | grep std::__cxx11::basic_string | wc -l) + $(nm "$lib" | c++filt | grep std::__cxx11::list | wc -l))) || true - echo "cxx11_symbols: " $cxx11_symbols - if [[ "$cxx11_symbols" -gt 0 ]]; then - echo "Found cxx11 symbols but there shouldn't be. Dumping symbols" - nm "$lib" | c++filt | grep std::__cxx11::basic_string - nm "$lib" | c++filt | grep std::__cxx11::list - exit 1 + # + # NOTE: There are some cxx11 symbols with devtoolset7 even if we build with + # old ABI, so this test doesn't work. On the other hand, since it is known + # that devtoolset7 on CentOS can *only* build with old ABI (https://bugzilla.redhat.com/show_bug.cgi?id=1546704), + # we don't need to test for cxx11 symbols here. + if [[ "$DESIRED_DEVTOOLSET" != "devtoolset7" ]]; then + echo "Checking that symbols in libtorch.so have the right gcc abi" + check_lib_symbols_for_abi_correctness () { + lib=$1 + echo "lib: " $lib + if [[ "$DESIRED_DEVTOOLSET" == *"cxx11-abi"* ]]; then + pre_cxx11_symbols=$(($(nm "$lib" | c++filt | grep std::basic_string | wc -l) + $(nm "$lib" | c++filt | grep std::list | wc -l))) || true + echo "pre_cxx11_symbols: " $pre_cxx11_symbols + if [[ "$pre_cxx11_symbols" -gt 0 ]]; then + echo "Found pre-cxx11 symbols but there shouldn't be. Dumping symbols" + nm "$lib" | c++filt | grep std::basic_string + nm "$lib" | c++filt | grep std::list + exit 1 + fi + else + cxx11_symbols=$(($(nm "$lib" | c++filt | grep std::__cxx11::basic_string | wc -l) + $(nm "$lib" | c++filt | grep std::__cxx11::list | wc -l))) || true + echo "cxx11_symbols: " $cxx11_symbols + if [[ "$cxx11_symbols" -gt 0 ]]; then + echo "Found cxx11 symbols but there shouldn't be. Dumping symbols" + nm "$lib" | c++filt | grep std::__cxx11::basic_string + nm "$lib" | c++filt | grep std::__cxx11::list + exit 1 + fi fi - fi - } - libc10="${install_root}/lib/libc10.so" - libtorch="${install_root}/lib/libtorch.so" - check_lib_symbols_for_abi_correctness $libc10 - check_lib_symbols_for_abi_correctness $libtorch - - echo "cxx11 symbols seem to be in order" + } + libc10="${install_root}/lib/libc10.so" + libtorch="${install_root}/lib/libtorch.so" + check_lib_symbols_for_abi_correctness $libc10 + check_lib_symbols_for_abi_correctness $libtorch + + echo "cxx11 symbols seem to be in order" + fi # if devtoolset7 fi # if on Darwin fi # if libtorch diff --git a/conda/.DS_Store b/conda/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..5008ddfcf53c02e82d7eee2e57c38e5672ef89f6 GIT binary patch literal 6148 zcmeH~Jr2S!425mzP>H1@V-^m;4Wg<&0T*E43hX&L&p$$qDprKhvt+--jT7}7np#A3 zem<@ulZcFPQ@L2!n>{z**++&mCkOWA81W14cNZlEfg7;MkzE(HCqgga^y>{tEnwC%0;vJ&^%eQ zLs35+`xjp>T0 Date: Thu, 8 Aug 2019 12:08:03 -0400 Subject: [PATCH 19/33] DEBUG: CUDA version --- manywheel/build.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/manywheel/build.sh b/manywheel/build.sh index a7c2e474e..fb780518a 100755 --- a/manywheel/build.sh +++ b/manywheel/build.sh @@ -22,6 +22,8 @@ if [[ -z "$EXTRA_CAFFE2_CMAKE_FLAGS" ]]; then fi # Determine CUDA version and architectures to build for +echo "yf225 TODO: DESIRED_CUDA: " $DESIRED_CUDA +echo "yf225 TODO: CUDA_VERSION: " $CUDA_VERSION CUDA_VERSION=$(nvcc --version|tail -n1|cut -f5 -d" "|cut -f1 -d",") echo "CUDA $CUDA_VERSION Detected" From 6f2bfeb54ea0583f511fadb846419185035a6eda Mon Sep 17 00:00:00 2001 From: Will Feng Date: Thu, 8 Aug 2019 12:30:14 -0400 Subject: [PATCH 20/33] improve CUDA version checking --- manywheel/build.sh | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/manywheel/build.sh b/manywheel/build.sh index fb780518a..83206474e 100755 --- a/manywheel/build.sh +++ b/manywheel/build.sh @@ -22,9 +22,20 @@ if [[ -z "$EXTRA_CAFFE2_CMAKE_FLAGS" ]]; then fi # Determine CUDA version and architectures to build for -echo "yf225 TODO: DESIRED_CUDA: " $DESIRED_CUDA -echo "yf225 TODO: CUDA_VERSION: " $CUDA_VERSION -CUDA_VERSION=$(nvcc --version|tail -n1|cut -f5 -d" "|cut -f1 -d",") +# +# NOTE: We should first check `DESIRED_CUDA` when determining `CUDA_VERSION`, +# because in some cases a single Docker image can have multiple CUDA versions +# on it, and `nvcc --version` might not show the CUDA version we want. +if [[ -n "$DESIRED_CUDA" ]]; then + # cu90, cu92, cu100, cu101 + if [[ ${#DESIRED_CUDA} -eq 4 ]]; then + CUDA_VERSION="${DESIRED_CUDA:2:1}.${DESIRED_CUDA:3:1}" + elif [[ ${#DESIRED_CUDA} -eq 5 ]]; then + CUDA_VERSION="${DESIRED_CUDA:2:2}.${DESIRED_CUDA:4:1}" + fi +else + CUDA_VERSION=$(nvcc --version|tail -n1|cut -f5 -d" "|cut -f1 -d",") +fi echo "CUDA $CUDA_VERSION Detected" export TORCH_CUDA_ARCH_LIST="3.5;5.0+PTX" From 049e679acacc415063f9f5b47d3de3910f1adf8d Mon Sep 17 00:00:00 2001 From: Will Feng Date: Thu, 8 Aug 2019 12:39:38 -0400 Subject: [PATCH 21/33] better message --- manywheel/build.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/manywheel/build.sh b/manywheel/build.sh index 83206474e..4c31c070e 100755 --- a/manywheel/build.sh +++ b/manywheel/build.sh @@ -33,10 +33,11 @@ if [[ -n "$DESIRED_CUDA" ]]; then elif [[ ${#DESIRED_CUDA} -eq 5 ]]; then CUDA_VERSION="${DESIRED_CUDA:2:2}.${DESIRED_CUDA:4:1}" fi + echo "Using CUDA $CUDA_VERSION as determined by DESIRED_CUDA" else CUDA_VERSION=$(nvcc --version|tail -n1|cut -f5 -d" "|cut -f1 -d",") + echo "CUDA $CUDA_VERSION Detected" fi -echo "CUDA $CUDA_VERSION Detected" export TORCH_CUDA_ARCH_LIST="3.5;5.0+PTX" if [[ $CUDA_VERSION == "9.0" ]]; then From 2aa44b5db3991686c6ce1b9027e9dd5f89adb79d Mon Sep 17 00:00:00 2001 From: Will Feng Date: Thu, 8 Aug 2019 14:19:15 -0400 Subject: [PATCH 22/33] Switch /usr/local/cuda to point to correct CUDA version --- manywheel/build.sh | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/manywheel/build.sh b/manywheel/build.sh index 4c31c070e..69b00904a 100755 --- a/manywheel/build.sh +++ b/manywheel/build.sh @@ -146,6 +146,12 @@ fi # builder/test.sh requires DESIRED_CUDA to know what tests to exclude export DESIRED_CUDA="$cuda_version_nodot" +# Switch `/usr/local/cuda` to the desired CUDA version +rm -rf /usr/local/cuda || true +ln -s "/usr/local/cuda-${CUDA_VERSION}" /usr/local/cuda +export CUDA_VERSION=$(ls /usr/local/cuda/lib64/libcudart.so.*|sort|tac | head -1 | rev | cut -d"." -f -3 | rev) # 10.0.130 +export CUDA_VERSION_SHORT=$(ls /usr/local/cuda/lib64/libcudart.so.*|sort|tac | head -1 | rev | cut -d"." -f -3 | rev | cut -f1,2 -d".") # 10.0 +export CUDNN_VERSION=$(ls /usr/local/cuda/lib64/libcudnn.so.*|sort|tac | head -1 | rev | cut -d"." -f -3 | rev) SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )" source $SCRIPTPATH/build_common.sh From 066dcdf991f32d86054f613476561664732d70fc Mon Sep 17 00:00:00 2001 From: Will Feng Date: Thu, 8 Aug 2019 16:45:49 -0400 Subject: [PATCH 23/33] fix libgomp path for CUDA --- manywheel/build.sh | 15 +++++++++++---- manywheel/build_cpu.sh | 16 ++++++++-------- 2 files changed, 19 insertions(+), 12 deletions(-) diff --git a/manywheel/build.sh b/manywheel/build.sh index 69b00904a..f00542d86 100755 --- a/manywheel/build.sh +++ b/manywheel/build.sh @@ -74,13 +74,20 @@ if [[ -z "$PYTORCH_FINAL_PACKAGE_DIR" ]]; then fi mkdir -p "$PYTORCH_FINAL_PACKAGE_DIR" || true +OS_NAME=`awk -F= '/^NAME/{print $2}' /etc/os-release` +if [[ "$OS_NAME" == *"CentOS Linux"* ]]; then + LIBGOMP_PATH="/usr/lib64/libgomp.so.1" +elif [[ "$OS_NAME" == *"Ubuntu"* ]]; then + LIBGOMP_PATH="/usr/lib/gcc/x86_64-linux-gnu/5/libgomp.so" +fi + if [[ $CUDA_VERSION == "9.0" ]]; then DEPS_LIST=( "/usr/local/cuda/lib64/libcudart.so.9.0" "/usr/local/cuda/lib64/libnvToolsExt.so.1" "/usr/local/cuda/lib64/libnvrtc.so.9.0" "/usr/local/cuda/lib64/libnvrtc-builtins.so" - "/usr/lib64/libgomp.so.1" + "$LIBGOMP_PATH" ) DEPS_SONAME=( @@ -96,7 +103,7 @@ DEPS_LIST=( "/usr/local/cuda/lib64/libnvToolsExt.so.1" "/usr/local/cuda/lib64/libnvrtc.so.9.2" "/usr/local/cuda/lib64/libnvrtc-builtins.so" - "/usr/lib64/libgomp.so.1" + "$LIBGOMP_PATH" ) DEPS_SONAME=( @@ -112,7 +119,7 @@ DEPS_LIST=( "/usr/local/cuda/lib64/libnvToolsExt.so.1" "/usr/local/cuda/lib64/libnvrtc.so.10.0" "/usr/local/cuda/lib64/libnvrtc-builtins.so" - "/usr/lib64/libgomp.so.1" + "$LIBGOMP_PATH" ) DEPS_SONAME=( @@ -128,7 +135,7 @@ DEPS_LIST=( "/usr/local/cuda/lib64/libnvToolsExt.so.1" "/usr/local/cuda/lib64/libnvrtc.so.10.1" "/usr/local/cuda/lib64/libnvrtc-builtins.so" - "/usr/lib64/libgomp.so.1" + "$LIBGOMP_PATH" ) DEPS_SONAME=( diff --git a/manywheel/build_cpu.sh b/manywheel/build_cpu.sh index a9fbe104f..8fb407dbf 100755 --- a/manywheel/build_cpu.sh +++ b/manywheel/build_cpu.sh @@ -27,16 +27,16 @@ fi mkdir -p "$PYTORCH_FINAL_PACKAGE_DIR" || true OS_NAME=`awk -F= '/^NAME/{print $2}' /etc/os-release` -if [[ "$OS_NAME" == 'CentOS Linux' ]]; then - DEPS_LIST=( - "/usr/lib64/libgomp.so.1" - ) -elif [[ "$OS_NAME" == 'Ubuntu' ]]; then - DEPS_LIST=( - "/usr/lib/gcc/x86_64-linux-gnu/5/libgomp.so" - ) +if [[ "$OS_NAME" == *"CentOS Linux"* ]]; then + LIBGOMP_PATH="/usr/lib64/libgomp.so.1" +elif [[ "$OS_NAME" == *"Ubuntu"* ]]; then + LIBGOMP_PATH="/usr/lib/gcc/x86_64-linux-gnu/5/libgomp.so" fi +DEPS_LIST=( + "$LIBGOMP_PATH" +) + DEPS_SONAME=( "libgomp.so.1" ) From cbbe3bc79cf80bedf4e36a652f68836b23d97f4c Mon Sep 17 00:00:00 2001 From: Will Feng Date: Thu, 8 Aug 2019 19:42:47 -0400 Subject: [PATCH 24/33] fix abi symbol checking --- check_binary.sh | 96 ++++++++++++++++++++++++++++--------------------- 1 file changed, 56 insertions(+), 40 deletions(-) diff --git a/check_binary.sh b/check_binary.sh index 1108df5c9..1249447bb 100755 --- a/check_binary.sh +++ b/check_binary.sh @@ -95,49 +95,65 @@ if [[ "$PACKAGE_TYPE" == libtorch ]]; then fi # We also check that there are [not] cxx11 symbols in libtorch + # # To check whether it is using cxx11 ABI, check non-existence of symbol: - # - std::basic_string - # - std::list + PRE_CXX11_SYMBOLS=( + "std::basic_string" + "std::list" + ) # To check whether it is using pre-cxx11 ABI, check non-existence of symbol: - # - std::__cxx11::basic_string - # - std::__cxx11::list - # - # NOTE: There are some cxx11 symbols with devtoolset7 even if we build with - # old ABI, so this test doesn't work. On the other hand, since it is known - # that devtoolset7 on CentOS can *only* build with old ABI (https://bugzilla.redhat.com/show_bug.cgi?id=1546704), - # we don't need to test for cxx11 symbols here. - if [[ "$DESIRED_DEVTOOLSET" != "devtoolset7" ]]; then - echo "Checking that symbols in libtorch.so have the right gcc abi" - check_lib_symbols_for_abi_correctness () { - lib=$1 - echo "lib: " $lib - if [[ "$DESIRED_DEVTOOLSET" == *"cxx11-abi"* ]]; then - pre_cxx11_symbols=$(($(nm "$lib" | c++filt | grep std::basic_string | wc -l) + $(nm "$lib" | c++filt | grep std::list | wc -l))) || true - echo "pre_cxx11_symbols: " $pre_cxx11_symbols - if [[ "$pre_cxx11_symbols" -gt 0 ]]; then - echo "Found pre-cxx11 symbols but there shouldn't be. Dumping symbols" - nm "$lib" | c++filt | grep std::basic_string - nm "$lib" | c++filt | grep std::list - exit 1 - fi - else - cxx11_symbols=$(($(nm "$lib" | c++filt | grep std::__cxx11::basic_string | wc -l) + $(nm "$lib" | c++filt | grep std::__cxx11::list | wc -l))) || true - echo "cxx11_symbols: " $cxx11_symbols - if [[ "$cxx11_symbols" -gt 0 ]]; then - echo "Found cxx11 symbols but there shouldn't be. Dumping symbols" - nm "$lib" | c++filt | grep std::__cxx11::basic_string - nm "$lib" | c++filt | grep std::__cxx11::list - exit 1 - fi + CXX11_SYMBOLS=( + "std::__cxx11::basic_string" + "std::__cxx11::list" + ) + # NOTE: Checking the above symbols in all namespaces doesn't work, because + # devtoolset7 always produces some cxx11 symbols even if we build with old ABI, + # and CuDNN always has pre-cxx11 symbols even if we build with new ABI using gcc 5.4. + # Instead, we *only* check the above symbols in the following namespaces: + LIBTORCH_NAMESPACE_LIST=( + "c10::" + "at::" + "caffe2::" + "torch::" + ) + echo "Checking that symbols in libtorch.so have the right gcc abi" + grep_symbols () { + symbols=("$@") + for namespace in "${LIBTORCH_NAMESPACE_LIST[@]}" + do + for symbol in "${symbols[@]}" + do + nm "$lib" | c++filt | grep $namespace.*$symbol + done + done + } + check_lib_symbols_for_abi_correctness () { + lib=$1 + echo "lib: " $lib + if [[ "$DESIRED_DEVTOOLSET" == *"cxx11-abi"* ]]; then + num_pre_cxx11_symbols=$(grep_symbols "${PRE_CXX11_SYMBOLS[@]}" | wc -l) || true + echo "num_pre_cxx11_symbols: " $num_pre_cxx11_symbols + if [[ "$num_pre_cxx11_symbols" -gt 0 ]]; then + echo "Found pre-cxx11 symbols but there shouldn't be. Dumping symbols" + grep_symbols "${PRE_CXX11_SYMBOLS[@]}" + exit 1 fi - } - libc10="${install_root}/lib/libc10.so" - libtorch="${install_root}/lib/libtorch.so" - check_lib_symbols_for_abi_correctness $libc10 - check_lib_symbols_for_abi_correctness $libtorch - - echo "cxx11 symbols seem to be in order" - fi # if devtoolset7 + else + num_cxx11_symbols=$(grep_symbols "${CXX11_SYMBOLS[@]}" | wc -l) || true + echo "num_cxx11_symbols: " $num_cxx11_symbols + if [[ "$num_cxx11_symbols" -gt 0 ]]; then + echo "Found cxx11 symbols but there shouldn't be. Dumping symbols" + grep_symbols "${CXX11_SYMBOLS[@]}" + exit 1 + fi + fi + } + libc10="${install_root}/lib/libc10.so" + libtorch="${install_root}/lib/libtorch.so" + check_lib_symbols_for_abi_correctness $libc10 + check_lib_symbols_for_abi_correctness $libtorch + + echo "cxx11 symbols seem to be in order" fi # if on Darwin fi # if libtorch From c7c1e551afe32fb4f9fa5383bd3535f2abbd26ba Mon Sep 17 00:00:00 2001 From: Will Feng Date: Thu, 8 Aug 2019 22:04:04 -0400 Subject: [PATCH 25/33] check number of correct ABI symbols as well --- check_binary.sh | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/check_binary.sh b/check_binary.sh index 1249447bb..0d4676188 100755 --- a/check_binary.sh +++ b/check_binary.sh @@ -111,10 +111,10 @@ if [[ "$PACKAGE_TYPE" == libtorch ]]; then # and CuDNN always has pre-cxx11 symbols even if we build with new ABI using gcc 5.4. # Instead, we *only* check the above symbols in the following namespaces: LIBTORCH_NAMESPACE_LIST=( - "c10::" - "at::" - "caffe2::" - "torch::" + "\sc10::" + "\sat::" + "\scaffe2::" + "\storch::" ) echo "Checking that symbols in libtorch.so have the right gcc abi" grep_symbols () { @@ -138,6 +138,12 @@ if [[ "$PACKAGE_TYPE" == libtorch ]]; then grep_symbols "${PRE_CXX11_SYMBOLS[@]}" exit 1 fi + num_cxx11_symbols=$(grep_symbols "${CXX11_SYMBOLS[@]}" | wc -l) || true + echo "num_cxx11_symbols: " $num_cxx11_symbols + if [[ "$num_cxx11_symbols" -lt 1000 ]]; then + echo "Didn't find enough cxx11 symbols. Aborting." + exit 1 + fi else num_cxx11_symbols=$(grep_symbols "${CXX11_SYMBOLS[@]}" | wc -l) || true echo "num_cxx11_symbols: " $num_cxx11_symbols @@ -146,6 +152,12 @@ if [[ "$PACKAGE_TYPE" == libtorch ]]; then grep_symbols "${CXX11_SYMBOLS[@]}" exit 1 fi + num_pre_cxx11_symbols=$(grep_symbols "${PRE_CXX11_SYMBOLS[@]}" | wc -l) || true + echo "num_pre_cxx11_symbols: " $num_pre_cxx11_symbols + if [[ "$num_pre_cxx11_symbols" -lt 1000 ]]; then + echo "Didn't find enough pre-cxx11 symbols. Aborting." + exit 1 + fi fi } libc10="${install_root}/lib/libc10.so" From 71e7c6f7968f5c702d216c91028ed979d9962a36 Mon Sep 17 00:00:00 2001 From: Will Feng Date: Thu, 8 Aug 2019 23:41:07 -0400 Subject: [PATCH 26/33] no need to check c10 library for ABI symbols --- check_binary.sh | 2 -- 1 file changed, 2 deletions(-) diff --git a/check_binary.sh b/check_binary.sh index 0d4676188..d40b02302 100755 --- a/check_binary.sh +++ b/check_binary.sh @@ -160,9 +160,7 @@ if [[ "$PACKAGE_TYPE" == libtorch ]]; then fi fi } - libc10="${install_root}/lib/libc10.so" libtorch="${install_root}/lib/libtorch.so" - check_lib_symbols_for_abi_correctness $libc10 check_lib_symbols_for_abi_correctness $libtorch echo "cxx11 symbols seem to be in order" From 49c461fe11a1d129149be249f207d830d58bfee9 Mon Sep 17 00:00:00 2001 From: Will Feng Date: Fri, 9 Aug 2019 12:25:35 -0400 Subject: [PATCH 27/33] improve LIBTORCH_NAMESPACE_LIST --- check_binary.sh | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/check_binary.sh b/check_binary.sh index d40b02302..9c2ed5ed0 100755 --- a/check_binary.sh +++ b/check_binary.sh @@ -111,10 +111,10 @@ if [[ "$PACKAGE_TYPE" == libtorch ]]; then # and CuDNN always has pre-cxx11 symbols even if we build with new ABI using gcc 5.4. # Instead, we *only* check the above symbols in the following namespaces: LIBTORCH_NAMESPACE_LIST=( - "\sc10::" - "\sat::" - "\scaffe2::" - "\storch::" + "c10::" + "at::" + "caffe2::" + "torch::" ) echo "Checking that symbols in libtorch.so have the right gcc abi" grep_symbols () { @@ -123,7 +123,7 @@ if [[ "$PACKAGE_TYPE" == libtorch ]]; then do for symbol in "${symbols[@]}" do - nm "$lib" | c++filt | grep $namespace.*$symbol + nm "$lib" | c++filt | grep " $namespace".*$symbol done done } From 19e0e8b7c8a7532545beb254a81b1a15e4bbef2a Mon Sep 17 00:00:00 2001 From: Will Feng Date: Fri, 9 Aug 2019 12:32:18 -0400 Subject: [PATCH 28/33] run ABI check on all binaries --- check_binary.sh | 222 ++++++++++++++++++++++++------------------------ 1 file changed, 110 insertions(+), 112 deletions(-) diff --git a/check_binary.sh b/check_binary.sh index 9c2ed5ed0..15de28194 100755 --- a/check_binary.sh +++ b/check_binary.sh @@ -45,127 +45,125 @@ fi # - All the nightlies built on CentOS 7 + devtoolset7 use the old gcc ABI. # - All the nightlies built on Ubuntu 16.04 + gcc 5.4 use the new gcc ABI. -if [[ "$PACKAGE_TYPE" == libtorch ]]; then - echo "Checking that the gcc ABI is what we expect" - if [[ "$(uname)" != 'Darwin' ]]; then - function is_expected() { - if [[ "$DESIRED_DEVTOOLSET" == *"cxx11-abi"* ]]; then - if [[ "$1" -gt 0 || "$1" == "ON " ]]; then - echo 1 - fi - else - if [[ -z "$1" || "$1" == 0 || "$1" == "OFF" ]]; then - echo 1 - fi +echo "Checking that the gcc ABI is what we expect" +if [[ "$(uname)" != 'Darwin' ]]; then + function is_expected() { + if [[ "$DESIRED_DEVTOOLSET" == *"cxx11-abi"* ]]; then + if [[ "$1" -gt 0 || "$1" == "ON " ]]; then + echo 1 + fi + else + if [[ -z "$1" || "$1" == 0 || "$1" == "OFF" ]]; then + echo 1 fi - } + fi + } - # First we check that the env var in TorchConfig.cmake is correct + # First we check that the env var in TorchConfig.cmake is correct - # We search for D_GLIBCXX_USE_CXX11_ABI=1 in torch/TorchConfig.cmake - torch_config="${install_root}/share/cmake/Torch/TorchConfig.cmake" - if [[ ! -f "$torch_config" ]]; then - echo "No TorchConfig.cmake found!" - ls -lah "$install_root/share/cmake/Torch" - exit 1 - fi - echo "Checking the TorchConfig.cmake" - cat "$torch_config" + # We search for D_GLIBCXX_USE_CXX11_ABI=1 in torch/TorchConfig.cmake + torch_config="${install_root}/share/cmake/Torch/TorchConfig.cmake" + if [[ ! -f "$torch_config" ]]; then + echo "No TorchConfig.cmake found!" + ls -lah "$install_root/share/cmake/Torch" + exit 1 + fi + echo "Checking the TorchConfig.cmake" + cat "$torch_config" - # The sed call below is - # don't print lines by default (only print the line we want) - # -n - # execute the following expression - # e - # replace lines that match with the first capture group and print - # s/.*D_GLIBCXX_USE_CXX11_ABI=\(.\)".*/\1/p - # any characters, D_GLIBCXX_USE_CXX11_ABI=, exactly one any character, a - # quote, any characters - # Note the exactly one single character after the '='. In the case that the - # variable is not set the '=' will be followed by a '"' immediately and the - # line will fail the match and nothing will be printed; this is what we - # want. Otherwise it will capture the 0 or 1 after the '='. - # /.*D_GLIBCXX_USE_CXX11_ABI=\(.\)".*/ - # replace the matched line with the capture group and print - # /\1/p - actual_gcc_abi="$(sed -ne 's/.*D_GLIBCXX_USE_CXX11_ABI=\(.\)".*/\1/p' < "$torch_config")" - if [[ "$(is_expected "$actual_gcc_abi")" != 1 ]]; then - echo "gcc ABI $actual_gcc_abi not as expected." - exit 1 - fi + # The sed call below is + # don't print lines by default (only print the line we want) + # -n + # execute the following expression + # e + # replace lines that match with the first capture group and print + # s/.*D_GLIBCXX_USE_CXX11_ABI=\(.\)".*/\1/p + # any characters, D_GLIBCXX_USE_CXX11_ABI=, exactly one any character, a + # quote, any characters + # Note the exactly one single character after the '='. In the case that the + # variable is not set the '=' will be followed by a '"' immediately and the + # line will fail the match and nothing will be printed; this is what we + # want. Otherwise it will capture the 0 or 1 after the '='. + # /.*D_GLIBCXX_USE_CXX11_ABI=\(.\)".*/ + # replace the matched line with the capture group and print + # /\1/p + actual_gcc_abi="$(sed -ne 's/.*D_GLIBCXX_USE_CXX11_ABI=\(.\)".*/\1/p' < "$torch_config")" + if [[ "$(is_expected "$actual_gcc_abi")" != 1 ]]; then + echo "gcc ABI $actual_gcc_abi not as expected." + exit 1 + fi - # We also check that there are [not] cxx11 symbols in libtorch - # - # To check whether it is using cxx11 ABI, check non-existence of symbol: - PRE_CXX11_SYMBOLS=( - "std::basic_string" - "std::list" - ) - # To check whether it is using pre-cxx11 ABI, check non-existence of symbol: - CXX11_SYMBOLS=( - "std::__cxx11::basic_string" - "std::__cxx11::list" - ) - # NOTE: Checking the above symbols in all namespaces doesn't work, because - # devtoolset7 always produces some cxx11 symbols even if we build with old ABI, - # and CuDNN always has pre-cxx11 symbols even if we build with new ABI using gcc 5.4. - # Instead, we *only* check the above symbols in the following namespaces: - LIBTORCH_NAMESPACE_LIST=( - "c10::" - "at::" - "caffe2::" - "torch::" - ) - echo "Checking that symbols in libtorch.so have the right gcc abi" - grep_symbols () { - symbols=("$@") - for namespace in "${LIBTORCH_NAMESPACE_LIST[@]}" + # We also check that there are [not] cxx11 symbols in libtorch + # + # To check whether it is using cxx11 ABI, check non-existence of symbol: + PRE_CXX11_SYMBOLS=( + "std::basic_string" + "std::list" + ) + # To check whether it is using pre-cxx11 ABI, check non-existence of symbol: + CXX11_SYMBOLS=( + "std::__cxx11::basic_string" + "std::__cxx11::list" + ) + # NOTE: Checking the above symbols in all namespaces doesn't work, because + # devtoolset7 always produces some cxx11 symbols even if we build with old ABI, + # and CuDNN always has pre-cxx11 symbols even if we build with new ABI using gcc 5.4. + # Instead, we *only* check the above symbols in the following namespaces: + LIBTORCH_NAMESPACE_LIST=( + "c10::" + "at::" + "caffe2::" + "torch::" + ) + echo "Checking that symbols in libtorch.so have the right gcc abi" + grep_symbols () { + symbols=("$@") + for namespace in "${LIBTORCH_NAMESPACE_LIST[@]}" + do + for symbol in "${symbols[@]}" do - for symbol in "${symbols[@]}" - do - nm "$lib" | c++filt | grep " $namespace".*$symbol - done + nm "$lib" | c++filt | grep " $namespace".*$symbol done - } - check_lib_symbols_for_abi_correctness () { - lib=$1 - echo "lib: " $lib - if [[ "$DESIRED_DEVTOOLSET" == *"cxx11-abi"* ]]; then - num_pre_cxx11_symbols=$(grep_symbols "${PRE_CXX11_SYMBOLS[@]}" | wc -l) || true - echo "num_pre_cxx11_symbols: " $num_pre_cxx11_symbols - if [[ "$num_pre_cxx11_symbols" -gt 0 ]]; then - echo "Found pre-cxx11 symbols but there shouldn't be. Dumping symbols" - grep_symbols "${PRE_CXX11_SYMBOLS[@]}" - exit 1 - fi - num_cxx11_symbols=$(grep_symbols "${CXX11_SYMBOLS[@]}" | wc -l) || true - echo "num_cxx11_symbols: " $num_cxx11_symbols - if [[ "$num_cxx11_symbols" -lt 1000 ]]; then - echo "Didn't find enough cxx11 symbols. Aborting." - exit 1 - fi - else - num_cxx11_symbols=$(grep_symbols "${CXX11_SYMBOLS[@]}" | wc -l) || true - echo "num_cxx11_symbols: " $num_cxx11_symbols - if [[ "$num_cxx11_symbols" -gt 0 ]]; then - echo "Found cxx11 symbols but there shouldn't be. Dumping symbols" - grep_symbols "${CXX11_SYMBOLS[@]}" - exit 1 - fi - num_pre_cxx11_symbols=$(grep_symbols "${PRE_CXX11_SYMBOLS[@]}" | wc -l) || true - echo "num_pre_cxx11_symbols: " $num_pre_cxx11_symbols - if [[ "$num_pre_cxx11_symbols" -lt 1000 ]]; then - echo "Didn't find enough pre-cxx11 symbols. Aborting." - exit 1 - fi + done + } + check_lib_symbols_for_abi_correctness () { + lib=$1 + echo "lib: " $lib + if [[ "$DESIRED_DEVTOOLSET" == *"cxx11-abi"* ]]; then + num_pre_cxx11_symbols=$(grep_symbols "${PRE_CXX11_SYMBOLS[@]}" | wc -l) || true + echo "num_pre_cxx11_symbols: " $num_pre_cxx11_symbols + if [[ "$num_pre_cxx11_symbols" -gt 0 ]]; then + echo "Found pre-cxx11 symbols but there shouldn't be. Dumping symbols" + grep_symbols "${PRE_CXX11_SYMBOLS[@]}" + exit 1 + fi + num_cxx11_symbols=$(grep_symbols "${CXX11_SYMBOLS[@]}" | wc -l) || true + echo "num_cxx11_symbols: " $num_cxx11_symbols + if [[ "$num_cxx11_symbols" -lt 1000 ]]; then + echo "Didn't find enough cxx11 symbols. Aborting." + exit 1 + fi + else + num_cxx11_symbols=$(grep_symbols "${CXX11_SYMBOLS[@]}" | wc -l) || true + echo "num_cxx11_symbols: " $num_cxx11_symbols + if [[ "$num_cxx11_symbols" -gt 0 ]]; then + echo "Found cxx11 symbols but there shouldn't be. Dumping symbols" + grep_symbols "${CXX11_SYMBOLS[@]}" + exit 1 fi - } - libtorch="${install_root}/lib/libtorch.so" - check_lib_symbols_for_abi_correctness $libtorch + num_pre_cxx11_symbols=$(grep_symbols "${PRE_CXX11_SYMBOLS[@]}" | wc -l) || true + echo "num_pre_cxx11_symbols: " $num_pre_cxx11_symbols + if [[ "$num_pre_cxx11_symbols" -lt 1000 ]]; then + echo "Didn't find enough pre-cxx11 symbols. Aborting." + exit 1 + fi + fi + } + libtorch="${install_root}/lib/libtorch.so" + check_lib_symbols_for_abi_correctness $libtorch - echo "cxx11 symbols seem to be in order" - fi # if on Darwin -fi # if libtorch + echo "cxx11 symbols seem to be in order" +fi # if on Darwin ############################################################################### # Check for no OpenBLAS From df05ed1409211ac6a65b8ea3513c909bfdee1ed1 Mon Sep 17 00:00:00 2001 From: Will Feng Date: Sun, 11 Aug 2019 18:17:56 -0400 Subject: [PATCH 29/33] DEBUG --- conda/build_pytorch.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conda/build_pytorch.sh b/conda/build_pytorch.sh index 2d1fc570d..bd2fe09f4 100755 --- a/conda/build_pytorch.sh +++ b/conda/build_pytorch.sh @@ -296,7 +296,7 @@ for py_ver in "${DESIRED_PYTHON[@]}"; do --python "$py_ver" \ --output-folder "$output_folder" \ --no-test \ - "$build_folder" + "$build_folder" || echo $PWD && ls && ls "$build_folder"/ && ls "$output_folder"/ && cat ""$build_folder"/caffe2/CMakeFiles/caffe2_pybind11_state.dir/build.make" echo "Finished conda-build at $(date)" # Create a new environment to test in From e13bb2b2ab9fe831d2e660c2e4582131f5998fac Mon Sep 17 00:00:00 2001 From: Will Feng Date: Mon, 12 Aug 2019 11:29:26 -0400 Subject: [PATCH 30/33] DEBUG python version problem --- conda/build_pytorch.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/conda/build_pytorch.sh b/conda/build_pytorch.sh index bd2fe09f4..1871b78e5 100755 --- a/conda/build_pytorch.sh +++ b/conda/build_pytorch.sh @@ -283,6 +283,9 @@ for py_ver in "${DESIRED_PYTHON[@]}"; do cat "$meta_yaml" # Build the package + if [[ "$py_ver" == "3.7" ]]; then # yf225 TODO debug + py_ver="3.7.3" + fi echo "Build $build_folder for Python version $py_ver" conda config --set anaconda_upload no echo "Calling conda-build at $(date)" From 26b9582319181d35cf621d6f3988122d008e8d5e Mon Sep 17 00:00:00 2001 From: Will Feng Date: Mon, 12 Aug 2019 11:30:21 -0400 Subject: [PATCH 31/33] Revert "DEBUG" This reverts commit df05ed1409211ac6a65b8ea3513c909bfdee1ed1. --- conda/build_pytorch.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conda/build_pytorch.sh b/conda/build_pytorch.sh index 1871b78e5..2f6d6f56f 100755 --- a/conda/build_pytorch.sh +++ b/conda/build_pytorch.sh @@ -299,7 +299,7 @@ for py_ver in "${DESIRED_PYTHON[@]}"; do --python "$py_ver" \ --output-folder "$output_folder" \ --no-test \ - "$build_folder" || echo $PWD && ls && ls "$build_folder"/ && ls "$output_folder"/ && cat ""$build_folder"/caffe2/CMakeFiles/caffe2_pybind11_state.dir/build.make" + "$build_folder" echo "Finished conda-build at $(date)" # Create a new environment to test in From dd4ceb4ef04e3f88a8d8e58554087a26fff0c090 Mon Sep 17 00:00:00 2001 From: Will Feng Date: Mon, 12 Aug 2019 14:19:28 -0400 Subject: [PATCH 32/33] Revert "DEBUG python version problem" This reverts commit e13bb2b2ab9fe831d2e660c2e4582131f5998fac. --- conda/build_pytorch.sh | 3 --- 1 file changed, 3 deletions(-) diff --git a/conda/build_pytorch.sh b/conda/build_pytorch.sh index 2f6d6f56f..2d1fc570d 100755 --- a/conda/build_pytorch.sh +++ b/conda/build_pytorch.sh @@ -283,9 +283,6 @@ for py_ver in "${DESIRED_PYTHON[@]}"; do cat "$meta_yaml" # Build the package - if [[ "$py_ver" == "3.7" ]]; then # yf225 TODO debug - py_ver="3.7.3" - fi echo "Build $build_folder for Python version $py_ver" conda config --set anaconda_upload no echo "Calling conda-build at $(date)" From c917ae9c9d233cd815b7155499730a065bf537f8 Mon Sep 17 00:00:00 2001 From: Will Feng Date: Tue, 13 Aug 2019 11:31:47 -0400 Subject: [PATCH 33/33] add yum install -q -y zip openssl back --- manywheel/build_common.sh | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/manywheel/build_common.sh b/manywheel/build_common.sh index 87d217f52..74d7bb7e8 100644 --- a/manywheel/build_common.sh +++ b/manywheel/build_common.sh @@ -19,6 +19,15 @@ retry () { $* || (sleep 1 && $*) || (sleep 2 && $*) || (sleep 4 && $*) || (sleep 8 && $*) } +# TODO move this into the Docker images +OS_NAME=`awk -F= '/^NAME/{print $2}' /etc/os-release` +if [[ "$OS_NAME" == *"CentOS Linux"* ]]; then + retry yum install -q -y zip openssl +elif [[ "$OS_NAME" == *"Ubuntu"* ]]; then + retry apt-get update + retry apt-get -y install zip openssl +fi + # We use the package name to test the package by passing this to 'pip install' # This is the env variable that setup.py uses to name the package. Note that # pip 'normalizes' the name first by changing all - to _