pytorch
diff --git a/‎.ci/docker/almalinux/Dockerfile‎
Lines changed: 2 additions & 1 deletion b/‎.ci/docker/almalinux/Dockerfile‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎.ci/docker/almalinux/build.sh‎
Lines changed: 6 additions & 0 deletions b/‎.ci/docker/almalinux/build.sh‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎.ci/docker/libtorch/build.sh‎
Lines changed: 5 additions & 1 deletion b/‎.ci/docker/libtorch/build.sh‎
Lines changed: 5 additions & 1 deletion
diff --git a/‎.ci/docker/manywheel/build.sh‎
Lines changed: 5 additions & 1 deletion b/‎.ci/docker/manywheel/build.sh‎
Lines changed: 5 additions & 1 deletion
diff --git a/‎.ci/magma-rocm/Makefile‎
Lines changed: 8 additions & 2 deletions b/‎.ci/magma-rocm/Makefile‎
Lines changed: 8 additions & 2 deletions
diff --git a/‎.ci/magma-rocm/build_magma.sh‎
Lines changed: 3 additions & 3 deletions b/‎.ci/magma-rocm/build_magma.sh‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎.ci/pytorch/cpp_doc_push_script.sh‎
Lines changed: 1 addition & 1 deletion b/‎.ci/pytorch/cpp_doc_push_script.sh‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.ci/pytorch/macos-build.sh‎
Lines changed: 4 additions & 3 deletions b/‎.ci/pytorch/macos-build.sh‎
Lines changed: 4 additions & 3 deletions
diff --git a/‎.ci/pytorch/macos-test.sh‎
Lines changed: 0 additions & 4 deletions b/‎.ci/pytorch/macos-test.sh‎
Lines changed: 0 additions & 4 deletions
diff --git a/‎.ci/wheel/build_wheel.sh‎
Lines changed: 1 addition & 2 deletions b/‎.ci/wheel/build_wheel.sh‎
Lines changed: 1 addition & 2 deletions
@@ -69,7 +69,8 @@ RUN bash ./install_cuda.sh 13.0
 ENV DESIRED_CUDA=13.0
 
 FROM ${ROCM_IMAGE} as rocm
-ENV PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201"
+ARG PYTORCH_ROCM_ARCH
+ENV PYTORCH_ROCM_ARCH ${PYTORCH_ROCM_ARCH}
 ADD ./common/install_mkl.sh install_mkl.sh
 RUN bash ./install_mkl.sh && rm install_mkl.sh
 ENV MKLROOT /opt/intel
 
@@ -36,6 +36,12 @@ case ${DOCKER_TAG_PREFIX} in
     ;;
   rocm*)
     BASE_TARGET=rocm
+    PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201"
+    # add gfx950 conditionally starting in ROCm 7.0
+    if [[ "$ROCM_VERSION" == *"7.0"* ]]; then
+        PYTORCH_ROCM_ARCH="${PYTORCH_ROCM_ARCH};gfx950"
+    fi
+    EXTRA_BUILD_ARGS="${EXTRA_BUILD_ARGS} --build-arg PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH}"
     ;;
   *)
     echo "ERROR: Unknown docker tag ${DOCKER_TAG_PREFIX}"
 
@@ -40,12 +40,16 @@ case ${DOCKER_TAG_PREFIX} in
         ;;
     rocm*)
         # we want the patch version of 6.4 instead
-        if [[ $(ver $GPU_ARCH_VERSION) -eq $(ver 6.4) ]]; then
+        if [[ "$GPU_ARCH_VERSION" == *"6.4"* ]]; then
             GPU_ARCH_VERSION="${GPU_ARCH_VERSION}.2"
         fi
         BASE_TARGET=rocm
         GPU_IMAGE=rocm/dev-ubuntu-22.04:${GPU_ARCH_VERSION}-complete
         PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201"
+        # add gfx950 conditionally starting in ROCm 7.0
+        if [[ "$GPU_ARCH_VERSION" == *"7.0"* ]]; then
+            PYTORCH_ROCM_ARCH="${PYTORCH_ROCM_ARCH};gfx950"
+        fi
         DOCKER_GPU_BUILD_ARG="--build-arg PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH} --build-arg ROCM_VERSION=${GPU_ARCH_VERSION}"
         ;;
     *)
 
@@ -82,14 +82,18 @@ case ${image} in
         ;;
     manylinux2_28-builder:rocm*)
         # we want the patch version of 6.4 instead
-        if [[ $(ver $GPU_ARCH_VERSION) -eq $(ver 6.4) ]]; then
+        if [[ "$GPU_ARCH_VERSION" == *"6.4"* ]]; then
             GPU_ARCH_VERSION="${GPU_ARCH_VERSION}.2"
         fi
         TARGET=rocm_final
         MANY_LINUX_VERSION="2_28"
         DEVTOOLSET_VERSION="11"
         GPU_IMAGE=rocm/dev-almalinux-8:${GPU_ARCH_VERSION}-complete
         PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201"
+        # add gfx950 conditionally starting in ROCm 7.0
+        if [[ "$GPU_ARCH_VERSION" == *"7.0"* ]]; then
+            PYTORCH_ROCM_ARCH="${PYTORCH_ROCM_ARCH};gfx950"
+        fi
         DOCKER_GPU_BUILD_ARG="--build-arg ROCM_VERSION=${GPU_ARCH_VERSION} --build-arg PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH} --build-arg DEVTOOLSET_VERSION=${DEVTOOLSET_VERSION}"
         ;;
     manylinux2_28-builder:xpu)
 
@@ -1,11 +1,11 @@
 SHELL=/usr/bin/env bash
 
 DOCKER_CMD ?= docker
-DESIRED_ROCM ?= 6.4
+DESIRED_ROCM ?= 7.0
 DESIRED_ROCM_SHORT = $(subst .,,$(DESIRED_ROCM))
 PACKAGE_NAME = magma-rocm
 # inherit this from underlying docker image, do not pass this env var to docker
-#PYTORCH_ROCM_ARCH ?= gfx900;gfx906;gfx908;gfx90a;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201
+#PYTORCH_ROCM_ARCH ?= gfx900;gfx906;gfx908;gfx90a;gfx942;gfx950;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201
 
 DOCKER_RUN = set -eou pipefail; ${DOCKER_CMD} run --rm -i \
 	-v $(shell git rev-parse --show-toplevel)/.ci:/builder \
@@ -16,6 +16,7 @@ DOCKER_RUN = set -eou pipefail; ${DOCKER_CMD} run --rm -i \
 	magma-rocm/build_magma.sh
 
 .PHONY: all
+all: magma-rocm70
 all: magma-rocm64
 all: magma-rocm63
 
@@ -24,6 +25,11 @@ clean:
 	$(RM) -r magma-*
 	$(RM) -r output
 
+.PHONY: magma-rocm70
+magma-rocm70: DESIRED_ROCM := 7.0
+magma-rocm70:
+	$(DOCKER_RUN)
+
 .PHONY: magma-rocm64
 magma-rocm64: DESIRED_ROCM := 6.4
 magma-rocm64:
 
@@ -6,8 +6,8 @@ set -eou pipefail
 # The script expects DESIRED_CUDA and PACKAGE_NAME to be set
 ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
 
-# Version 2.7.2 + ROCm related updates
-MAGMA_VERSION=a1625ff4d9bc362906bd01f805dbbe12612953f6
+# https://github.com/icl-utk-edu/magma/pull/65
+MAGMA_VERSION=d6e4117bc88e73f06d26c6c2e14f064e8fc3d1ec
 
 # Folders for the build
 PACKAGE_FILES=${ROOT_DIR}/magma-rocm/package_files # metadata
@@ -20,7 +20,7 @@ mkdir -p ${PACKAGE_DIR} ${PACKAGE_OUTPUT}/linux-64 ${PACKAGE_BUILD} ${PACKAGE_RE
 
 # Fetch magma sources and verify checksum
 pushd ${PACKAGE_DIR}
-git clone https://bitbucket.org/icl/magma.git
+git clone https://github.com/jeffdaily/magma
 pushd magma
 git checkout ${MAGMA_VERSION}
 popd
 
@@ -58,7 +58,7 @@ time python tools/setup_helpers/generate_code.py \
 
 # Build the docs
 pushd docs/cpp
-time make VERBOSE=1 html -j
+time make VERBOSE=1 html
 
 popd
 popd
 
@@ -35,10 +35,11 @@ fi
 
 print_cmake_info
 if [[ ${BUILD_ENVIRONMENT} == *"distributed"* ]]; then
-  USE_OPENMP=1 WERROR=1 python setup.py bdist_wheel
+  # Needed for inductor benchmarks, as lots of HF networks make `torch.distribtued` calls
+  USE_DISTRIBUTED=1 USE_OPENMP=1 WERROR=1 python setup.py bdist_wheel
 else
-  # NB: we always build with distributed; USE_DISTRIBUTED turns off all
-  # backends (specifically the gloo backend), so test that this case works too
+  # Explicitly set USE_DISTRIBUTED=0 to align with the default build config on mac. This also serves as the sole CI config that tests
+  # that building with USE_DISTRIBUTED=0 works at all. See https://github.com/pytorch/pytorch/issues/86448
   USE_DISTRIBUTED=0 USE_OPENMP=1 MACOSX_DEPLOYMENT_TARGET=11.0 WERROR=1 BUILD_TEST=OFF USE_PYTORCH_METAL=1 python setup.py bdist_wheel --plat-name macosx_11_0_arm64
 fi
 if which sccache > /dev/null; then
 
@@ -13,13 +13,9 @@ if [[ ! $(python -c "import torch; print(int(torch.backends.openmp.is_available(
 fi
 popd
 
-python -mpip install -r requirements.txt
-
 # enable debug asserts in serialization
 export TORCH_SERIALIZATION_DEBUG=1
 
-python -mpip install --no-input -r requirements.txt
-
 setup_test_python() {
   # The CircleCI worker hostname doesn't resolve to an address.
   # This environment variable makes ProcessGroupGloo default to
 
@@ -177,8 +177,7 @@ source ~/${desired_python}-build/bin/activate
 retry pip install "${PINNED_PACKAGES[@]}" -r "${pytorch_rootdir}/requirements.txt"
 retry brew install libomp
 
-# For USE_DISTRIBUTED=1 on macOS, this enables gloo, which needs libuv, which
-# is build as part of tensorpipe submodule
+# For USE_DISTRIBUTED=1 on macOS, need libuv, which is build as part of tensorpipe submodule
 export USE_DISTRIBUTED=1
 
 export USE_MKLDNN=OFF