Skip to content

Commit

Permalink
Merge branch 'master' into master
Browse files Browse the repository at this point in the history
  • Loading branch information
ynonaolga committed Nov 15, 2022
2 parents 00f1a76 + 5314af5 commit 0ff1305
Show file tree
Hide file tree
Showing 52 changed files with 1,182 additions and 655 deletions.
2 changes: 1 addition & 1 deletion .github/ci_commit_pins/vision.txt
@@ -1 +1 @@
deba056203d009fec6b58afb9fa211f6ee3328c8
b1f6c9e271368cd84837522af39e68dd4b5768a7
27 changes: 25 additions & 2 deletions .github/scripts/filter_test_configs.py
Expand Up @@ -34,6 +34,13 @@
"xla",
}}

# Supported modes when running periodically
SUPPORTED_PERIODICAL_MODES = {
"mem_leak_check",
"rerun_disabled_tests",
}


def parse_args() -> Any:
from argparse import ArgumentParser
parser = ArgumentParser("Filter all test configurations and keep only requested ones")
Expand Down Expand Up @@ -109,6 +116,23 @@ def filter(test_matrix: Dict[str, List[Any]], labels: Set[str]) -> Dict[str, Lis
return filtered_test_matrix


def set_periodic_modes(test_matrix: Dict[str, List[Any]]) -> Dict[str, List[Any]]:
"""
Apply all periodic modes when running under a schedule
"""
scheduled_test_matrix: Dict[str, List[Any]] = {
"include": [],
}

for config in test_matrix.get("include", []):
for mode in SUPPORTED_PERIODICAL_MODES:
cfg = config.copy()
cfg[mode] = mode
scheduled_test_matrix["include"].append(cfg)

return scheduled_test_matrix


def set_output(name: str, val: Any) -> None:
if os.getenv("GITHUB_OUTPUT"):
with open(str(os.getenv("GITHUB_OUTPUT")), "a") as env:
Expand Down Expand Up @@ -163,8 +187,7 @@ def main() -> None:
filtered_test_matrix = test_matrix

if args.event_name == "schedule":
for config in filtered_test_matrix.get("include", []):
config["mem_leak_check"] = "mem_leak_check"
filtered_test_matrix = set_periodic_modes(filtered_test_matrix)

# Set the filtered test matrix as the output
set_output("test-matrix", json.dumps(filtered_test_matrix))
Expand Down
30 changes: 29 additions & 1 deletion .github/scripts/test_filter_test_configs.py
Expand Up @@ -4,7 +4,14 @@
import yaml
import json
from unittest import TestCase, main, mock
from filter_test_configs import get_labels, filter, PREFIX, VALID_TEST_CONFIG_LABELS
from filter_test_configs import (
get_labels,
filter,
set_periodic_modes,
PREFIX,
VALID_TEST_CONFIG_LABELS,
SUPPORTED_PERIODICAL_MODES
)
import requests
from requests.models import Response
from typing import Any, Dict
Expand Down Expand Up @@ -86,5 +93,26 @@ def test_filter_with_valid_label(self) -> None:
self.assertEqual(case["expected"], json.dumps(filtered_test_matrix))


def test_set_periodic_modes(self) -> None:
testcases = [
{
"test_matrix": "{include: []}",
"description": "Empty test matrix",
},
{
"test_matrix": '{include: [{config: "default", runner: "linux"}, {config: "cfg", runner: "macos"}]}',
"descripion": "Replicate each periodic mode in a different config",
},
]

for case in testcases:
test_matrix = yaml.safe_load(case["test_matrix"])
scheduled_test_matrix = set_periodic_modes(test_matrix)
self.assertEqual(
len(test_matrix["include"]) * len(SUPPORTED_PERIODICAL_MODES),
len(scheduled_test_matrix["include"])
)


if __name__ == '__main__':
main()
4 changes: 3 additions & 1 deletion .github/workflows/_linux-test.yml
Expand Up @@ -115,7 +115,8 @@ jobs:
DOCKER_IMAGE: ${{ inputs.docker-image }}
XLA_CUDA: ${{ contains(inputs.build-environment, 'xla') && '0' || '' }}
XLA_CLANG_CACHE_S3_BUCKET_NAME: ossci-compiler-clang-cache-circleci-xla
PYTORCH_TEST_CUDA_MEM_LEAK_CHECK: ${{ matrix.mem_leak_check && '1' || '0'}}
PYTORCH_TEST_CUDA_MEM_LEAK_CHECK: ${{ matrix.mem_leak_check && '1' || '0' }}
PYTORCH_TEST_RERUN_DISABLED_TESTS: ${{ matrix.rerun_disabled_tests && '1' || '0' }}
timeout-minutes: 240
run: |
set -x
Expand Down Expand Up @@ -170,6 +171,7 @@ jobs:
-e XLA_CUDA \
-e XLA_CLANG_CACHE_S3_BUCKET_NAME \
-e PYTORCH_TEST_CUDA_MEM_LEAK_CHECK \
-e PYTORCH_TEST_RERUN_DISABLED_TESTS \
--env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
--ulimit stack=10485760:83886080 \
--security-opt seccomp=unconfined \
Expand Down
15 changes: 10 additions & 5 deletions .github/workflows/_mac-build.yml
Expand Up @@ -109,12 +109,17 @@ jobs:
brew link --force libomp
- name: Install sccache (only for non-forked PRs, and pushes to trunk)
uses: nick-fields/retry@v2.8.2
if: ${{ github.event_name == 'push' || github.event.pull_request.head.repo.full_name == github.repository }}
run: |
sudo curl --retry 3 https://s3.amazonaws.com/ossci-macos/sccache_v2.15 --output /usr/local/bin/sccache
sudo chmod +x /usr/local/bin/sccache
echo "SCCACHE_BUCKET=ossci-compiler-cache-circleci-v2" >> "${GITHUB_ENV}"
echo "SCCACHE_S3_KEY_PREFIX=${GITHUB_WORKFLOW}" >> "${GITHUB_ENV}"
with:
timeout_minutes: 5
max_attempts: 3
retry_wait_seconds: 90
command: |
sudo curl --retry 3 https://s3.amazonaws.com/ossci-macos/sccache_v2.15 --output /usr/local/bin/sccache
sudo chmod +x /usr/local/bin/sccache
echo "SCCACHE_BUCKET=ossci-compiler-cache-circleci-v2" >> "${GITHUB_ENV}"
echo "SCCACHE_S3_KEY_PREFIX=${GITHUB_WORKFLOW}" >> "${GITHUB_ENV}"
- name: Get workflow job id
id: get-job-id
Expand Down
3 changes: 2 additions & 1 deletion .github/workflows/_mac-test.yml
Expand Up @@ -129,7 +129,8 @@ jobs:
- name: Test
id: test
env:
PYTORCH_TEST_CUDA_MEM_LEAK_CHECK: ${{ matrix.mem_leak_check && '1' || '0'}}
PYTORCH_TEST_CUDA_MEM_LEAK_CHECK: ${{ matrix.mem_leak_check && '1' || '0' }}
PYTORCH_TEST_RERUN_DISABLED_TESTS: ${{ matrix.rerun_disabled_tests && '1' || '0' }}
run: |
COMMIT_MESSAGES=$(git cherry -v "origin/${GIT_DEFAULT_BRANCH:-master}")
Expand Down
4 changes: 3 additions & 1 deletion .github/workflows/_rocm-test.yml
Expand Up @@ -97,7 +97,8 @@ jobs:
DOCKER_IMAGE: ${{ inputs.docker-image }}
XLA_CLANG_CACHE_S3_BUCKET_NAME: ossci-compiler-clang-cache-circleci-xla
PYTORCH_JIT_ENABLE_NVFUSER: 1
PYTORCH_TEST_CUDA_MEM_LEAK_CHECK: ${{ matrix.mem_leak_check && '1' || '0'}}
PYTORCH_TEST_CUDA_MEM_LEAK_CHECK: ${{ matrix.mem_leak_check && '1' || '0' }}
PYTORCH_TEST_RERUN_DISABLED_TESTS: ${{ matrix.rerun_disabled_tests && '1' || '0' }}
timeout-minutes: 270
run: |
set -x
Expand Down Expand Up @@ -148,6 +149,7 @@ jobs:
-e SCCACHE_BUCKET \
-e XLA_CLANG_CACHE_S3_BUCKET_NAME \
-e PYTORCH_TEST_CUDA_MEM_LEAK_CHECK \
-e PYTORCH_TEST_RERUN_DISABLED_TESTS \
--env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
--ulimit stack=10485760:83886080 \
--security-opt seccomp=unconfined \
Expand Down
3 changes: 2 additions & 1 deletion .github/workflows/_win-test.yml
Expand Up @@ -124,7 +124,8 @@ jobs:
TEST_CONFIG: ${{ matrix.config }}
PR_BODY: ${{ github.event.pull_request.body }}
TORCH_CUDA_ARCH_LIST: "7.0"
PYTORCH_TEST_CUDA_MEM_LEAK_CHECK: ${{ matrix.mem_leak_check && '1' || '0'}}
PYTORCH_TEST_CUDA_MEM_LEAK_CHECK: ${{ matrix.mem_leak_check && '1' || '0' }}
PYTORCH_TEST_RERUN_DISABLED_TESTS: ${{ matrix.rerun_disabled_tests && '1' || '0' }}
run: |
COMMIT_MESSAGES=$(git cherry -v "origin/${GIT_DEFAULT_BRANCH:-master}")
Expand Down
14 changes: 14 additions & 0 deletions .github/workflows/docker-release.yml
Expand Up @@ -91,6 +91,20 @@ jobs:
# WITH_PUSH is used here to determine whether or not to add the --push flag
run: |
make -f docker.Makefile "${BUILD_IMAGE_TYPE}-image"
- name: Push nightly tags
if: ${{ github.event.ref == 'refs/heads/nightly' && matrix.image_type == 'runtime' }}
run: |
PYTORCH_DOCKER_TAG="${PYTORCH_VERSION}-runtime"
CUDA_VERSION=$(python3 -c "import re;print(re.search('CUDA_VERSION\s+=\s+([0-9\.]+)',open('docker.Makefile').read())[1],end='')")
PYTORCH_NIGHTLY_COMMIT=$(docker run ghcr.io/pytorch/pytorch-nightly:"${PYTORCH_DOCKER_TAG}" \
python -c 'import torch; print(torch.version.git_version[:7],end="")')
docker tag ghcr.io/pytorch/pytorch-nightly:"${PYTORCH_DOCKER_TAG}" \
ghcr.io/pytorch/pytorch-nightly:"${PYTORCH_NIGHTLY_COMMIT}-cu${CUDA_VERSION}"
docker push ghcr.io/pytorch/pytorch-nightly:"${PYTORCH_NIGHTLY_COMMIT}-cu${CUDA_VERSION}"
docker tag ghcr.io/pytorch/pytorch-nightly:"${PYTORCH_NIGHTLY_COMMIT}-cu${CUDA_VERSION}" \
ghcr.io/pytorch/pytorch-nightly:latest
docker push ghcr.io/pytorch/pytorch-nightly:latest
- name: Teardown Linux
uses: pytorch/test-infra/.github/actions/teardown-linux@main
if: always()
5 changes: 5 additions & 0 deletions aten/src/ATen/native/native_functions.yaml
Expand Up @@ -13287,6 +13287,11 @@
dispatch:
CUDA: _efficient_attention_forward

- func: _efficient_attention_backward(Tensor grad, Tensor query, Tensor key, Tensor value, Tensor logsumexp, Tensor out, bool is_causal=False) -> (Tensor, Tensor, Tensor)
variants: function
dispatch:
CUDA: _efficient_attention_backward

- func: _transformer_decoder_only_layer_fwd(Tensor src, int embed_dim, int num_heads, Tensor qkv_weight, Tensor qkv_bias, Tensor proj_weight, Tensor proj_bias, bool use_gelu, bool norm_first, float eps, Tensor norm_weight_1, Tensor norm_bias_1, Tensor norm_weight_2, Tensor norm_bias_2, Tensor ffn_weight_1, Tensor ffn_bias_1, Tensor ffn_weight_2, Tensor ffn_bias_2, Tensor? mask=None, Tensor? incr_key=None, Tensor? incr_value=None) -> (Tensor, Tensor, Tensor)
variants: function
dispatch:
Expand Down
16 changes: 10 additions & 6 deletions aten/src/ATen/native/transformers/cuda/attention.cu
Expand Up @@ -746,24 +746,28 @@ std::tuple<Tensor, Tensor> flash_attention_helper_dense_unpacked(
std::tuple<Tensor, Tensor> mem_eff_helper(
const Tensor& query,
const Tensor& key,
const Tensor& value){
const Tensor& value,
bool compute_log_sumexp,
bool is_causal) {
// Query -> Query(Batch x Q_seq_len x Num_heads x Dim_per_head)
// Key -> Key(Batch x KV_seq_len x Num_heads x Dim_per_head)
// Value -> Value(Batch x KV_seq_len x Num_heads x Dim_per_head)
Tensor q_t = query.transpose(1, 2);
Tensor k_t = key.transpose(1, 2);
Tensor v_t = value.transpose(1, 2);

Tensor attention = std::get<0>(at::_efficient_attention_forward(
Tensor attention, log_sumexp;
std::tie(attention, log_sumexp) = at::_efficient_attention_forward(
q_t,
k_t,
v_t,
c10::nullopt,
c10::nullopt,
c10::nullopt,
false,
false)).transpose(1,2);
return std::make_tuple(attention, Tensor());
compute_log_sumexp,
is_causal);
attention = attention.transpose(1,2);
return std::make_tuple(std::move(attention), Tensor());
}

std::tuple<Tensor, Tensor> _scaled_dot_product_attention_forward_cuda(
Expand All @@ -776,7 +780,7 @@ std::tuple<Tensor, Tensor> _scaled_dot_product_attention_forward_cuda(
case sdp::SDPBackend::flash_attention:
return flash_attention_helper_dense_unpacked(query_, key, value, dropout_p, need_attn_weights, is_causal);
case sdp::SDPBackend::efficient_attention:
return mem_eff_helper(query_, key , value);
return mem_eff_helper(query_, key , value, need_attn_weights, is_causal);
case sdp::SDPBackend::math:
return at::_scaled_dot_product_attention_math(query_, key, value, attn_mask_, dropout_p, need_attn_weights, is_causal);
default:
Expand Down

0 comments on commit 0ff1305

Please sign in to comment.