diff --git a/.github/workflows/trunk.yml b/.github/workflows/trunk.yml index f277ff8eeb7e4..b26611e5f0759 100644 --- a/.github/workflows/trunk.yml +++ b/.github/workflows/trunk.yml @@ -63,7 +63,7 @@ jobs: { config: "default", shard: 1, num_shards: 2, runner: "linux.4xlarge.nvidia.gpu" }, { config: "default", shard: 2, num_shards: 2, runner: "linux.4xlarge.nvidia.gpu" }, { config: "slow", shard: 1, num_shards: 1, runner: "linux.4xlarge.nvidia.gpu" }, - { config: "nogpu_NO_AVX", shard: 1, num_shards: 1, runner: "linux.2xlarge" }, + { config: "nogpu_AVX512", shard: 1, num_shards: 1, runner: "linux.2xlarge" }, { config: "nogpu_NO_AVX2", shard: 1, num_shards: 1, runner: "linux.2xlarge" }, { config: "jit_legacy", shard: 1, num_shards: 1, runner: "linux.4xlarge.nvidia.gpu" }, { config: "distributed", shard: 1, num_shards: 2, runner: "linux.8xlarge.nvidia.gpu" }, diff --git a/.jenkins/pytorch/test.sh b/.jenkins/pytorch/test.sh index 8f30bd6effe82..3ba170509e35a 100755 --- a/.jenkins/pytorch/test.sh +++ b/.jenkins/pytorch/test.sh @@ -152,11 +152,9 @@ if [[ "$BUILD_ENVIRONMENT" == *asan* ]]; then (cd test && ! get_exit_code python -c "import torch; torch._C._crash_if_aten_asan(3)") fi -if [[ "${BUILD_ENVIRONMENT}" == *-NO_AVX-* || $TEST_CONFIG == 'nogpu_NO_AVX' ]]; then +if [[ "${BUILD_ENVIRONMENT}" == *-NO_AVX2-* || $TEST_CONFIG == 'nogpu_NO_AVX2' ]]; then export ATEN_CPU_CAPABILITY=default -elif [[ "${BUILD_ENVIRONMENT}" == *-NO_AVX2-* || $TEST_CONFIG == 'nogpu_NO_AVX2' ]]; then - export ATEN_CPU_CAPABILITY=default -elif [[ "${BUILD_ENVIRONMENT}" == *-NO_AVX512-* || $TEST_CONFIG == 'nogpu_NO_AVX512' ]]; then +elif [[ "${BUILD_ENVIRONMENT}" == *-AVX512-* || $TEST_CONFIG == 'nogpu_AVX512' ]]; then export ATEN_CPU_CAPABILITY=avx2 fi diff --git a/aten/src/ATen/native/DispatchStub.cpp b/aten/src/ATen/native/DispatchStub.cpp index 327d7def16d74..39e7ac6967164 100644 --- a/aten/src/ATen/native/DispatchStub.cpp +++ b/aten/src/ATen/native/DispatchStub.cpp @@ -39,7 +39,9 @@ static CPUCapability compute_cpu_capability() { #if !defined(__powerpc__) && !defined(__s390x__) if (cpuinfo_initialize()) { -#ifdef HAVE_AVX512_CPU_DEFINITION + // AVX512 can be slower then AVX2, so lets keep it as opt-in + // see https://github.com/pytorch/pytorch/issues/80252 +#if defined(HAVE_AVX512_CPU_DEFINITION) && false // GCC supports some AVX512 intrinsics such as _mm512_set_epi16 only in // versions 9 & beyond. So, we want to ensure that only releases built with // supported compilers on supported hardware return CPU Capability AVX512,