Merge branch 'master' into master

pytorch · Nov 15, 2022 · 0ff1305 · 0ff1305
2 parents 00f1a76 + 5314af5
commit 0ff1305
Show file tree

Hide file tree

Showing 52 changed files with 1,182 additions and 655 deletions.
diff --git a/.github/ci_commit_pins/vision.txt b/.github/ci_commit_pins/vision.txt
@@ -1 +1 @@
-deba056203d009fec6b58afb9fa211f6ee3328c8
+b1f6c9e271368cd84837522af39e68dd4b5768a7
diff --git a/.github/scripts/filter_test_configs.py b/.github/scripts/filter_test_configs.py
@@ -34,6 +34,13 @@
     "xla",
 }}
 
+# Supported modes when running periodically
+SUPPORTED_PERIODICAL_MODES = {
+    "mem_leak_check",
+    "rerun_disabled_tests",
+}
+
+
 def parse_args() -> Any:
     from argparse import ArgumentParser
     parser = ArgumentParser("Filter all test configurations and keep only requested ones")
@@ -109,6 +116,23 @@ def filter(test_matrix: Dict[str, List[Any]], labels: Set[str]) -> Dict[str, Lis
         return filtered_test_matrix
 
 
+def set_periodic_modes(test_matrix: Dict[str, List[Any]]) -> Dict[str, List[Any]]:
+    """
+    Apply all periodic modes when running under a schedule
+    """
+    scheduled_test_matrix: Dict[str, List[Any]] = {
+        "include": [],
+    }
+
+    for config in test_matrix.get("include", []):
+        for mode in SUPPORTED_PERIODICAL_MODES:
+            cfg = config.copy()
+            cfg[mode] = mode
+            scheduled_test_matrix["include"].append(cfg)
+
+    return scheduled_test_matrix
+
+
 def set_output(name: str, val: Any) -> None:
     if os.getenv("GITHUB_OUTPUT"):
         with open(str(os.getenv("GITHUB_OUTPUT")), "a") as env:
@@ -163,8 +187,7 @@ def main() -> None:
         filtered_test_matrix = test_matrix
 
     if args.event_name == "schedule":
-        for config in filtered_test_matrix.get("include", []):
-            config["mem_leak_check"] = "mem_leak_check"
+        filtered_test_matrix = set_periodic_modes(filtered_test_matrix)
 
     # Set the filtered test matrix as the output
     set_output("test-matrix", json.dumps(filtered_test_matrix))

diff --git a/.github/scripts/test_filter_test_configs.py b/.github/scripts/test_filter_test_configs.py
@@ -4,7 +4,14 @@
 import yaml
 import json
 from unittest import TestCase, main, mock
-from filter_test_configs import get_labels, filter, PREFIX, VALID_TEST_CONFIG_LABELS
+from filter_test_configs import (
+    get_labels,
+    filter,
+    set_periodic_modes,
+    PREFIX,
+    VALID_TEST_CONFIG_LABELS,
+    SUPPORTED_PERIODICAL_MODES
+)
 import requests
 from requests.models import Response
 from typing import Any, Dict
@@ -86,5 +93,26 @@ def test_filter_with_valid_label(self) -> None:
             self.assertEqual(case["expected"], json.dumps(filtered_test_matrix))
 
 
+    def test_set_periodic_modes(self) -> None:
+        testcases = [
+            {
+                "test_matrix": "{include: []}",
+                "description": "Empty test matrix",
+            },
+            {
+                "test_matrix": '{include: [{config: "default", runner: "linux"}, {config: "cfg", runner: "macos"}]}',
+                "descripion": "Replicate each periodic mode in a different config",
+            },
+        ]
+
+        for case in testcases:
+            test_matrix = yaml.safe_load(case["test_matrix"])
+            scheduled_test_matrix = set_periodic_modes(test_matrix)
+            self.assertEqual(
+                len(test_matrix["include"]) * len(SUPPORTED_PERIODICAL_MODES),
+                len(scheduled_test_matrix["include"])
+            )
+
+
 if __name__ == '__main__':
     main()
diff --git a/.github/workflows/_linux-test.yml b/.github/workflows/_linux-test.yml
@@ -115,7 +115,8 @@ jobs:
           DOCKER_IMAGE: ${{ inputs.docker-image }}
           XLA_CUDA: ${{ contains(inputs.build-environment, 'xla') && '0' || '' }}
           XLA_CLANG_CACHE_S3_BUCKET_NAME: ossci-compiler-clang-cache-circleci-xla
-          PYTORCH_TEST_CUDA_MEM_LEAK_CHECK: ${{ matrix.mem_leak_check && '1' || '0'}}
+          PYTORCH_TEST_CUDA_MEM_LEAK_CHECK: ${{ matrix.mem_leak_check && '1' || '0' }}
+          PYTORCH_TEST_RERUN_DISABLED_TESTS: ${{ matrix.rerun_disabled_tests && '1' || '0' }}
         timeout-minutes: 240
         run: |
           set -x
@@ -170,6 +171,7 @@ jobs:
             -e XLA_CUDA \
             -e XLA_CLANG_CACHE_S3_BUCKET_NAME \
             -e PYTORCH_TEST_CUDA_MEM_LEAK_CHECK \
+            -e PYTORCH_TEST_RERUN_DISABLED_TESTS \
             --env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
             --ulimit stack=10485760:83886080 \
             --security-opt seccomp=unconfined \

diff --git a/.github/workflows/_mac-build.yml b/.github/workflows/_mac-build.yml
@@ -109,12 +109,17 @@ jobs:
           brew link --force libomp
 
       - name: Install sccache (only for non-forked PRs, and pushes to trunk)
+        uses: nick-fields/retry@v2.8.2
         if: ${{ github.event_name == 'push' || github.event.pull_request.head.repo.full_name == github.repository }}
-        run: |
-          sudo curl --retry 3 https://s3.amazonaws.com/ossci-macos/sccache_v2.15 --output /usr/local/bin/sccache
-          sudo chmod +x /usr/local/bin/sccache
-          echo "SCCACHE_BUCKET=ossci-compiler-cache-circleci-v2" >> "${GITHUB_ENV}"
-          echo "SCCACHE_S3_KEY_PREFIX=${GITHUB_WORKFLOW}" >> "${GITHUB_ENV}"
+        with:
+          timeout_minutes: 5
+          max_attempts: 3
+          retry_wait_seconds: 90
+          command: |
+            sudo curl --retry 3 https://s3.amazonaws.com/ossci-macos/sccache_v2.15 --output /usr/local/bin/sccache
+            sudo chmod +x /usr/local/bin/sccache
+            echo "SCCACHE_BUCKET=ossci-compiler-cache-circleci-v2" >> "${GITHUB_ENV}"
+            echo "SCCACHE_S3_KEY_PREFIX=${GITHUB_WORKFLOW}" >> "${GITHUB_ENV}"
 
       - name: Get workflow job id
         id: get-job-id

diff --git a/.github/workflows/_mac-test.yml b/.github/workflows/_mac-test.yml
@@ -129,7 +129,8 @@ jobs:
       - name: Test
         id: test
         env:
-          PYTORCH_TEST_CUDA_MEM_LEAK_CHECK: ${{ matrix.mem_leak_check && '1' || '0'}}
+          PYTORCH_TEST_CUDA_MEM_LEAK_CHECK: ${{ matrix.mem_leak_check && '1' || '0' }}
+          PYTORCH_TEST_RERUN_DISABLED_TESTS: ${{ matrix.rerun_disabled_tests && '1' || '0' }}
         run: |
           COMMIT_MESSAGES=$(git cherry -v "origin/${GIT_DEFAULT_BRANCH:-master}")
 

diff --git a/.github/workflows/_rocm-test.yml b/.github/workflows/_rocm-test.yml
@@ -97,7 +97,8 @@ jobs:
           DOCKER_IMAGE: ${{ inputs.docker-image }}
           XLA_CLANG_CACHE_S3_BUCKET_NAME: ossci-compiler-clang-cache-circleci-xla
           PYTORCH_JIT_ENABLE_NVFUSER: 1
-          PYTORCH_TEST_CUDA_MEM_LEAK_CHECK: ${{ matrix.mem_leak_check && '1' || '0'}}
+          PYTORCH_TEST_CUDA_MEM_LEAK_CHECK: ${{ matrix.mem_leak_check && '1' || '0' }}
+          PYTORCH_TEST_RERUN_DISABLED_TESTS: ${{ matrix.rerun_disabled_tests && '1' || '0' }}
         timeout-minutes: 270
         run: |
           set -x
@@ -148,6 +149,7 @@ jobs:
             -e SCCACHE_BUCKET \
             -e XLA_CLANG_CACHE_S3_BUCKET_NAME \
             -e PYTORCH_TEST_CUDA_MEM_LEAK_CHECK \
+            -e PYTORCH_TEST_RERUN_DISABLED_TESTS \
             --env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
             --ulimit stack=10485760:83886080 \
             --security-opt seccomp=unconfined \

diff --git a/.github/workflows/_win-test.yml b/.github/workflows/_win-test.yml
@@ -124,7 +124,8 @@ jobs:
           TEST_CONFIG: ${{ matrix.config }}
           PR_BODY: ${{ github.event.pull_request.body }}
           TORCH_CUDA_ARCH_LIST: "7.0"
-          PYTORCH_TEST_CUDA_MEM_LEAK_CHECK: ${{ matrix.mem_leak_check && '1' || '0'}}
+          PYTORCH_TEST_CUDA_MEM_LEAK_CHECK: ${{ matrix.mem_leak_check && '1' || '0' }}
+          PYTORCH_TEST_RERUN_DISABLED_TESTS: ${{ matrix.rerun_disabled_tests && '1' || '0' }}
         run: |
           COMMIT_MESSAGES=$(git cherry -v "origin/${GIT_DEFAULT_BRANCH:-master}")
 

diff --git a/.github/workflows/docker-release.yml b/.github/workflows/docker-release.yml
@@ -91,6 +91,20 @@ jobs:
         # WITH_PUSH is used here to determine whether or not to add the --push flag
         run: |
           make -f docker.Makefile "${BUILD_IMAGE_TYPE}-image"
+      - name: Push nightly tags
+        if: ${{ github.event.ref == 'refs/heads/nightly' && matrix.image_type == 'runtime' }}
+        run: |
+          PYTORCH_DOCKER_TAG="${PYTORCH_VERSION}-runtime"
+          CUDA_VERSION=$(python3 -c "import re;print(re.search('CUDA_VERSION\s+=\s+([0-9\.]+)',open('docker.Makefile').read())[1],end='')")
+          PYTORCH_NIGHTLY_COMMIT=$(docker run ghcr.io/pytorch/pytorch-nightly:"${PYTORCH_DOCKER_TAG}" \
+                                          python -c 'import torch; print(torch.version.git_version[:7],end="")')
+          docker tag ghcr.io/pytorch/pytorch-nightly:"${PYTORCH_DOCKER_TAG}" \
+                 ghcr.io/pytorch/pytorch-nightly:"${PYTORCH_NIGHTLY_COMMIT}-cu${CUDA_VERSION}"
+          docker push ghcr.io/pytorch/pytorch-nightly:"${PYTORCH_NIGHTLY_COMMIT}-cu${CUDA_VERSION}"
+
+          docker tag ghcr.io/pytorch/pytorch-nightly:"${PYTORCH_NIGHTLY_COMMIT}-cu${CUDA_VERSION}" \
+                 ghcr.io/pytorch/pytorch-nightly:latest
+          docker push ghcr.io/pytorch/pytorch-nightly:latest
       - name: Teardown Linux
         uses: pytorch/test-infra/.github/actions/teardown-linux@main
         if: always()
diff --git a/aten/src/ATen/native/native_functions.yaml b/aten/src/ATen/native/native_functions.yaml
@@ -13287,6 +13287,11 @@
   dispatch:
     CUDA: _efficient_attention_forward
 
+- func: _efficient_attention_backward(Tensor grad, Tensor query, Tensor key, Tensor value, Tensor logsumexp, Tensor out, bool is_causal=False) -> (Tensor, Tensor, Tensor)
+  variants: function
+  dispatch:
+    CUDA: _efficient_attention_backward
+
 - func: _transformer_decoder_only_layer_fwd(Tensor src, int embed_dim, int num_heads, Tensor qkv_weight, Tensor qkv_bias, Tensor proj_weight, Tensor proj_bias, bool use_gelu, bool norm_first, float eps, Tensor norm_weight_1, Tensor norm_bias_1, Tensor norm_weight_2, Tensor norm_bias_2, Tensor ffn_weight_1, Tensor ffn_bias_1, Tensor ffn_weight_2, Tensor ffn_bias_2, Tensor? mask=None, Tensor? incr_key=None, Tensor? incr_value=None) -> (Tensor, Tensor, Tensor)
   variants: function
   dispatch:

diff --git a/aten/src/ATen/native/transformers/cuda/attention.cu b/aten/src/ATen/native/transformers/cuda/attention.cu
@@ -746,24 +746,28 @@ std::tuple<Tensor, Tensor> flash_attention_helper_dense_unpacked(
 std::tuple<Tensor, Tensor> mem_eff_helper(
     const Tensor& query,
     const Tensor& key,
-    const Tensor& value){
+    const Tensor& value,
+    bool compute_log_sumexp,
+    bool is_causal) {
   // Query -> Query(Batch x Q_seq_len x Num_heads x Dim_per_head)
   // Key   -> Key(Batch x KV_seq_len x Num_heads x Dim_per_head)
   // Value -> Value(Batch x KV_seq_len x  Num_heads x Dim_per_head)
   Tensor q_t = query.transpose(1, 2);
   Tensor k_t = key.transpose(1, 2);
   Tensor v_t = value.transpose(1, 2);
 
-  Tensor attention = std::get<0>(at::_efficient_attention_forward(
+  Tensor attention, log_sumexp;
+  std::tie(attention, log_sumexp) = at::_efficient_attention_forward(
       q_t,
       k_t,
       v_t,
       c10::nullopt,
       c10::nullopt,
       c10::nullopt,
-      false,
-      false)).transpose(1,2);
-  return std::make_tuple(attention, Tensor());
+      compute_log_sumexp,
+      is_causal);
+  attention = attention.transpose(1,2);
+  return std::make_tuple(std::move(attention), Tensor());
 }
 
 std::tuple<Tensor, Tensor> _scaled_dot_product_attention_forward_cuda(
@@ -776,7 +780,7 @@ std::tuple<Tensor, Tensor> _scaled_dot_product_attention_forward_cuda(
       case sdp::SDPBackend::flash_attention:
           return flash_attention_helper_dense_unpacked(query_, key, value, dropout_p, need_attn_weights, is_causal);
       case sdp::SDPBackend::efficient_attention:
-          return mem_eff_helper(query_, key , value);
+          return mem_eff_helper(query_, key , value, need_attn_weights, is_causal);
       case sdp::SDPBackend::math:
         return at::_scaled_dot_product_attention_math(query_, key, value, attn_mask_, dropout_p, need_attn_weights, is_causal);
       default: