Skip to content

Commit

Permalink
Update
Browse files Browse the repository at this point in the history
[ghstack-poisoned]
  • Loading branch information
kadeng committed May 4, 2024
2 parents 9d7634e + e388cba commit 0c5f2bb
Show file tree
Hide file tree
Showing 169 changed files with 4,222 additions and 1,290 deletions.
4 changes: 2 additions & 2 deletions .ci/docker/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ case "$image" in
PROTOBUF=yes
DB=yes
VISION=yes
ROCM_VERSION=5.7
ROCM_VERSION=6.0
NINJA_VERSION=1.9.0
CONDA_CMAKE=yes
TRITON=yes
Expand All @@ -215,7 +215,7 @@ case "$image" in
PROTOBUF=yes
DB=yes
VISION=yes
ROCM_VERSION=6.0
ROCM_VERSION=6.1
NINJA_VERSION=1.9.0
CONDA_CMAKE=yes
TRITON=yes
Expand Down
4 changes: 4 additions & 0 deletions .ci/docker/common/install_rocm.sh
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,10 @@ install_ubuntu() {
rocprofiler-dev \
roctracer-dev

if [[ $(ver $ROCM_VERSION) -ge $(ver 6.1) ]]; then
DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated rocm-llvm-dev
fi

# precompiled miopen kernels added in ROCm 3.5, renamed in ROCm 5.5
# search for all unversioned packages
# if search fails it will abort this script; use true to avoid case where search fails
Expand Down
1 change: 1 addition & 0 deletions .ci/pytorch/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -322,6 +322,7 @@ test_inductor_distributed() {
pytest test/distributed/_composable/fsdp/test_fully_shard_training.py -k test_train_parity_2d_mlp
pytest test/distributed/_composable/fsdp/test_fully_shard_training.py -k test_train_parity_hsdp
pytest test/distributed/_composable/fsdp/test_fully_shard_training.py -k test_train_parity_2d_transformer_checkpoint_resume
pytest test/distributed/_composable/fsdp/test_fully_shard_training.py -k test_gradient_accumulation
pytest test/distributed/_composable/fsdp/test_fully_shard_frozen.py
pytest test/distributed/_composable/fsdp/test_fully_shard_mixed_precision.py -k test_compute_dtype
pytest test/distributed/_composable/fsdp/test_fully_shard_mixed_precision.py -k test_reduce_dtype
Expand Down
2 changes: 1 addition & 1 deletion .github/ci_commit_pins/vision.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
2c4665ffbb64f03f5d18016d3398af4ac4da5f03
06ad737628abc3a1e617571dc03cbdd5b36ea96a
18 changes: 9 additions & 9 deletions .github/workflows/inductor.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,28 +16,28 @@ concurrency:
permissions: read-all

jobs:
linux-focal-rocm6_0-py3_8-inductor-build:
name: rocm6.0-py3.8-inductor
linux-focal-rocm6_1-py3_8-inductor-build:
name: rocm6.1-py3.8-inductor
uses: ./.github/workflows/_linux-build.yml
with:
build-environment: linux-focal-rocm6.0-py3.8
build-environment: linux-focal-rocm6.1-py3.8
docker-image-name: pytorch-linux-focal-rocm-n-py3
test-matrix: |
{ include: [
{ config: "inductor", shard: 1, num_shards: 1, runner: "linux.rocm.gpu.2" },
]}
linux-focal-rocm6_0-py3_8-inductor-test:
linux-focal-rocm6_1-py3_8-inductor-test:
permissions:
id-token: write
contents: read
name: rocm6.0-py3.8-inductor
name: rocm6.1-py3.8-inductor
uses: ./.github/workflows/_rocm-test.yml
needs: linux-focal-rocm6_0-py3_8-inductor-build
needs: linux-focal-rocm6_1-py3_8-inductor-build
with:
build-environment: linux-focal-rocm6.0-py3.8
docker-image: ${{ needs.linux-focal-rocm6_0-py3_8-inductor-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-focal-rocm6_0-py3_8-inductor-build.outputs.test-matrix }}
build-environment: linux-focal-rocm6.1-py3.8
docker-image: ${{ needs.linux-focal-rocm6_1-py3_8-inductor-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-focal-rocm6_1-py3_8-inductor-build.outputs.test-matrix }}

linux-focal-cuda12_1-py3_10-gcc9-inductor-build:
name: cuda12.1-py3.10-gcc9-sm86
Expand Down
18 changes: 9 additions & 9 deletions .github/workflows/periodic.yml
Original file line number Diff line number Diff line change
Expand Up @@ -217,28 +217,28 @@ jobs:
docker-image: ${{ needs.linux-vulkan-focal-py3_11-clang10-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-vulkan-focal-py3_11-clang10-build.outputs.test-matrix }}

linux-focal-rocm6_0-py3_8-build:
name: linux-focal-rocm6.0-py3.8
linux-focal-rocm6_1-py3_8-build:
name: linux-focal-rocm6.1-py3.8
uses: ./.github/workflows/_linux-build.yml
with:
build-environment: linux-focal-rocm6.0-py3.8
build-environment: linux-focal-rocm6.1-py3.8
docker-image-name: pytorch-linux-focal-rocm-n-py3
test-matrix: |
{ include: [
{ config: "distributed", shard: 1, num_shards: 2, runner: "linux.rocm.gpu" },
{ config: "distributed", shard: 2, num_shards: 2, runner: "linux.rocm.gpu" },
]}
linux-focal-rocm6_0-py3_8-test:
linux-focal-rocm6_1-py3_8-test:
permissions:
id-token: write
contents: read
name: linux-focal-rocm6.0-py3.8
name: linux-focal-rocm6.1-py3.8
uses: ./.github/workflows/_rocm-test.yml
needs:
- linux-focal-rocm6_0-py3_8-build
- linux-focal-rocm6_1-py3_8-build
- target-determination
with:
build-environment: linux-focal-rocm6.0-py3.8
docker-image: ${{ needs.linux-focal-rocm6_0-py3_8-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-focal-rocm6_0-py3_8-build.outputs.test-matrix }}
build-environment: linux-focal-rocm6.1-py3.8
docker-image: ${{ needs.linux-focal-rocm6_1-py3_8-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-focal-rocm6_1-py3_8-build.outputs.test-matrix }}
6 changes: 3 additions & 3 deletions .github/workflows/pull.yml
Original file line number Diff line number Diff line change
Expand Up @@ -414,13 +414,13 @@ jobs:
{ config: "default", shard: 1, num_shards: 1 },
]}
linux-focal-rocm6_0-py3_8-build:
linux-focal-rocm6_1-py3_8-build:
# don't run build twice on main
if: github.event_name == 'pull_request'
name: linux-focal-rocm6.0-py3.8
name: linux-focal-rocm6.1-py3.8
uses: ./.github/workflows/_linux-build-label.yml
with:
build-environment: linux-focal-rocm6.0-py3.8
build-environment: linux-focal-rocm6.1-py3.8
docker-image-name: pytorch-linux-focal-rocm-n-py3
sync-tag: rocm-build
test-matrix: |
Expand Down
18 changes: 9 additions & 9 deletions .github/workflows/rocm.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,11 @@ jobs:
id-token: write
contents: read

linux-focal-rocm6_0-py3_8-build:
name: linux-focal-rocm6.0-py3.8
linux-focal-rocm6_1-py3_8-build:
name: linux-focal-rocm6.1-py3.8
uses: ./.github/workflows/_linux-build-label.yml
with:
build-environment: linux-focal-rocm6.0-py3.8
build-environment: linux-focal-rocm6.1-py3.8
docker-image-name: pytorch-linux-focal-rocm-n-py3
sync-tag: rocm-build
test-matrix: |
Expand All @@ -42,16 +42,16 @@ jobs:
{ config: "default", shard: 6, num_shards: 6, runner: "linux.rocm.gpu.2" },
]}
linux-focal-rocm6_0-py3_8-test:
linux-focal-rocm6_1-py3_8-test:
permissions:
id-token: write
contents: read
name: linux-focal-rocm6.0-py3.8
name: linux-focal-rocm6.1-py3.8
uses: ./.github/workflows/_rocm-test.yml
needs:
- linux-focal-rocm6_0-py3_8-build
- linux-focal-rocm6_1-py3_8-build
- target-determination
with:
build-environment: linux-focal-rocm6.0-py3.8
docker-image: ${{ needs.linux-focal-rocm6_0-py3_8-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-focal-rocm6_0-py3_8-build.outputs.test-matrix }}
build-environment: linux-focal-rocm6.1-py3.8
docker-image: ${{ needs.linux-focal-rocm6_1-py3_8-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-focal-rocm6_1-py3_8-build.outputs.test-matrix }}
18 changes: 9 additions & 9 deletions .github/workflows/slow.yml
Original file line number Diff line number Diff line change
Expand Up @@ -111,30 +111,30 @@ jobs:
docker-image: ${{ needs.linux-focal-py3_8-clang10-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-focal-py3_8-clang10-build.outputs.test-matrix }}

linux-focal-rocm6_0-py3_8-build:
name: linux-focal-rocm6.0-py3.8
linux-focal-rocm6_1-py3_8-build:
name: linux-focal-rocm6.1-py3.8
uses: ./.github/workflows/_linux-build.yml
with:
build-environment: linux-focal-rocm6.0-py3.8
build-environment: linux-focal-rocm6.1-py3.8
docker-image-name: pytorch-linux-focal-rocm-n-py3
test-matrix: |
{ include: [
{ config: "slow", shard: 1, num_shards: 1, runner: "linux.rocm.gpu" },
]}
linux-focal-rocm6_0-py3_8-test:
linux-focal-rocm6_1-py3_8-test:
permissions:
id-token: write
contents: read
name: linux-focal-rocm6.0-py3.8
name: linux-focal-rocm6.1-py3.8
uses: ./.github/workflows/_rocm-test.yml
needs:
- linux-focal-rocm6_0-py3_8-build
- linux-focal-rocm6_1-py3_8-build
- target-determination
with:
build-environment: linux-focal-rocm6.0-py3.8
docker-image: ${{ needs.linux-focal-rocm6_0-py3_8-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-focal-rocm6_0-py3_8-build.outputs.test-matrix }}
build-environment: linux-focal-rocm6.1-py3.8
docker-image: ${{ needs.linux-focal-rocm6_1-py3_8-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-focal-rocm6_1-py3_8-build.outputs.test-matrix }}

linux-jammy-py3_10-clang15-asan-build:
name: linux-jammy-py3.10-clang15-asan
Expand Down
18 changes: 9 additions & 9 deletions .github/workflows/trunk.yml
Original file line number Diff line number Diff line change
Expand Up @@ -198,29 +198,29 @@ jobs:
{ config: "force_on_cpu", shard: 1, num_shards: 1, runner: "windows.4xlarge.nonephemeral" },
]}
linux-focal-rocm6_0-py3_8-build:
name: linux-focal-rocm6.0-py3.8
linux-focal-rocm6_1-py3_8-build:
name: linux-focal-rocm6.1-py3.8
uses: ./.github/workflows/_linux-build-label.yml
with:
build-environment: linux-focal-rocm6.0-py3.8
build-environment: linux-focal-rocm6.1-py3.8
docker-image-name: pytorch-linux-focal-rocm-n-py3
sync-tag: rocm-build
test-matrix: |
{ include: [
{ config: "default", shard: 1, num_shards: 1, runner: "linux.rocm.gpu" },
]}
linux-focal-rocm6_0-py3_8-test:
linux-focal-rocm6_1-py3_8-test:
permissions:
id-token: write
contents: read
name: linux-focal-rocm6.0-py3.8
name: linux-focal-rocm6.1-py3.8
uses: ./.github/workflows/_rocm-test.yml
needs:
- linux-focal-rocm6_0-py3_8-build
- linux-focal-rocm6_1-py3_8-build
- target-determination
with:
build-environment: linux-focal-rocm6.0-py3.8
docker-image: ${{ needs.linux-focal-rocm6_0-py3_8-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-focal-rocm6_0-py3_8-build.outputs.test-matrix }}
build-environment: linux-focal-rocm6.1-py3.8
docker-image: ${{ needs.linux-focal-rocm6_1-py3_8-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-focal-rocm6_1-py3_8-build.outputs.test-matrix }}
tests-to-include: "test_nn test_torch test_cuda test_ops test_unary_ufuncs test_binary_ufuncs test_autograd inductor/test_torchinductor"
11 changes: 11 additions & 0 deletions CODEOWNERS
Validating CODEOWNERS rules …
Original file line number Diff line number Diff line change
Expand Up @@ -144,3 +144,14 @@ caffe2/utils/hip @jeffdaily @jithunnair-amd
/torch/csrc/Storage* @mikaylagawarecki
# subscribing for PyTorchFileWriter/PyTorchFileReader changes
/torch/csrc/jit/python/init.cpp @mikaylagawarecki

# CUDA and CUDA math libraries
aten/src/ATen/cuda/ @eqy
aten/src/ATen/cudnn/ @eqy
aten/src/ATen/native/cuda/ @eqy
aten/src/ATen/native/cudnn/ @eqy
c10/cuda @eqy
torch/cuda/ @eqy
torch/csrc/cuda/ @eqy
torch/backends/cuda/ @eqy
torch/backends/cudnn/ @eqy
4 changes: 0 additions & 4 deletions aten/src/ATen/NumericUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,11 +38,7 @@ inline C10_HOST_DEVICE bool _isnan(T val) {

template <typename T, std::enable_if_t<c10::is_complex<T>::value, int> = 0>
inline C10_HOST_DEVICE bool _isnan(T val) {
#if defined(__CUDACC__) || defined(__HIPCC__)
return ::isnan(val.real()) || ::isnan(val.imag());
#else
return std::isnan(val.real()) || std::isnan(val.imag());
#endif
}

template <typename T, std::enable_if_t<std::is_same_v<T, at::Half>, int> = 0>
Expand Down
3 changes: 1 addition & 2 deletions aten/src/ATen/core/ivalue_inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -2321,8 +2321,7 @@ IValue::IValue(c10::intrusive_ptr<T> custom_class) : tag(Tag::Object) {
} catch (const c10::Error&) {
throw c10::Error(
"Trying to instantiate a class that isn't a registered custom class: " +
std::string(c10::util::get_fully_qualified_type_name<T>()),
"");
std::string(c10::util::get_fully_qualified_type_name<T>()));
}
}();
auto ivalue_obj = c10::ivalue::Object::create(std::move(classType), /* numSlots */1);
Expand Down
14 changes: 0 additions & 14 deletions aten/src/ATen/cpu/vec/vec256/zarch/vec256_zarch.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,6 @@
#include <ATen/cpu/vec/vec_base.h>
#include <c10/util/complex.h>

#define SLEEF_MEMORY_WORKAROUND

namespace at {
namespace vec {

Expand Down Expand Up @@ -1148,32 +1146,20 @@ struct Vectorized<T, std::enable_if_t<is_zarch_implemented<T>()>> {
}

Vectorized<T> sin() const {
#ifndef SLEEF_MEMORY_WORKAROUND
return mapSleef(Sleef_sinf4_u10, Sleef_sind2_u10);
#else
return mapOrdinary(std::sin);
#endif
}
Vectorized<T> sinh() const {
return mapSleef(Sleef_sinhf4_u10, Sleef_sinhd2_u10);
}
Vectorized<T> cos() const {
#ifndef SLEEF_MEMORY_WORKAROUND
return mapSleef(Sleef_cosf4_u10, Sleef_cosd2_u10);
#else
return mapOrdinary(std::cos);
#endif
}
Vectorized<T> cosh() const {
return mapSleef(Sleef_coshf4_u10, Sleef_coshd2_u10);
}

Vectorized<T> tan() const {
#ifndef SLEEF_MEMORY_WORKAROUND
return mapSleef(Sleef_tanf4_u10, Sleef_tand2_u10);
#else
return mapOrdinary(std::tan);
#endif
}
Vectorized<T> tanh() const {
return mapSleef(Sleef_tanhf4_u10, Sleef_tanhd2_u10);
Expand Down

0 comments on commit 0c5f2bb

Please sign in to comment.