diff --git a/check_binary.sh b/check_binary.sh index a8e8d7946..2f68c2b24 100755 --- a/check_binary.sh +++ b/check_binary.sh @@ -360,3 +360,22 @@ if [[ "$DESIRED_CUDA" != 'cpu' && "$DESIRED_CUDA" != *"rocm"* ]]; then popd fi # if libtorch fi # if cuda + +############################################################################### +# Check PyTorch supports TCP_TLS gloo transport +############################################################################### + +if [[ "$(uname)" == 'Linux' && "$PACKAGE_TYPE" != 'libtorch' ]]; then + GLOO_CHECK="import torch.distributed as dist +try: + dist.init_process_group('gloo', rank=0, world_size=1) +except RuntimeError as e: + print(e) +" + RESULT=`GLOO_DEVICE_TRANSPORT=TCP_TLS MASTER_ADDR=localhost MASTER_PORT=63945 python -c "$GLOO_CHECK"` + GLOO_TRANSPORT_IS_NOT_SUPPORTED='gloo transport is not supported' + if [[ "$RESULT" =~ "$GLOO_TRANSPORT_IS_NOT_SUPPORTED" ]]; then + echo "PyTorch doesn't support TLS_TCP transport, please build with USE_GLOO_WITH_OPENSSL=1" + exit 1 + fi +fi diff --git a/conda/build_pytorch.sh b/conda/build_pytorch.sh index b0751ea37..f89d8fdc2 100755 --- a/conda/build_pytorch.sh +++ b/conda/build_pytorch.sh @@ -313,6 +313,11 @@ else export CONDA_BUILD_EXTRA_ARGS="" fi +# Build PyTorch with Gloo's TCP_TLS transport +if [[ "$(uname)" == 'Linux' ]]; then + export USE_GLOO_WITH_OPENSSL=1 +fi + # Loop through all Python versions to build a package for each for py_ver in "${DESIRED_PYTHON[@]}"; do build_string="py${py_ver}_${build_string_suffix}" diff --git a/conda/pytorch-nightly/meta.yaml b/conda/pytorch-nightly/meta.yaml index 3f5c75b2a..f152957c9 100644 --- a/conda/pytorch-nightly/meta.yaml +++ b/conda/pytorch-nightly/meta.yaml @@ -53,6 +53,7 @@ build: - DEVELOPER_DIR - DEBUG - USE_FBGEMM + - USE_GLOO_WITH_OPENSSL # [unix] - USE_SCCACHE # [win] - USE_DISTRIBUTED # [unix] - CMAKE_OSX_ARCHITECTURES # [unix]