diff --git a/.circleci/docker/build.sh b/.circleci/docker/build.sh index b7fef829b798..d8772329379c 100755 --- a/.circleci/docker/build.sh +++ b/.circleci/docker/build.sh @@ -379,7 +379,7 @@ docker build \ --build-arg "NINJA_VERSION=${NINJA_VERSION:-}" \ --build-arg "KATEX=${KATEX:-}" \ --build-arg "ROCM_VERSION=${ROCM_VERSION:-}" \ - --build-arg "PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH:-gfx900;gfx906}" \ + --build-arg "PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH:-gfx906}" \ --build-arg "IMAGE_NAME=${IMAGE_NAME}" \ --build-arg "UCX_COMMIT=${UCX_COMMIT}" \ --build-arg "UCC_COMMIT=${UCC_COMMIT}" \ diff --git a/.circleci/docker/common/install_cudnn.sh b/.circleci/docker/common/install_cudnn.sh index 1f1c34ea200d..4a8829b1cba1 100644 --- a/.circleci/docker/common/install_cudnn.sh +++ b/.circleci/docker/common/install_cudnn.sh @@ -4,7 +4,13 @@ if [[ ${CUDNN_VERSION} == 8 ]]; then # cuDNN license: https://developer.nvidia.com/cudnn/license_agreement mkdir tmp_cudnn && cd tmp_cudnn CUDNN_NAME="cudnn-linux-x86_64-8.3.2.44_cuda11.5-archive" - curl -OLs https://developer.download.nvidia.com/compute/redist/cudnn/v8.3.2/local_installers/11.5/${CUDNN_NAME}.tar.xz + if [[ ${CUDA_VERSION:0:4} == "11.7" ]]; then + CUDNN_NAME="cudnn-linux-x86_64-8.5.0.96_cuda11-archive" + curl -OLs https://ossci-linux.s3.amazonaws.com/${CUDNN_NAME}.tar.xz + else + curl -OLs https://developer.download.nvidia.com/compute/redist/cudnn/v8.3.2/local_installers/11.5/${CUDNN_NAME}.tar.xz + fi + tar xf ${CUDNN_NAME}.tar.xz cp -a ${CUDNN_NAME}/include/* /usr/include/ cp -a ${CUDNN_NAME}/include/* /usr/local/cuda/include/ diff --git a/.circleci/docker/common/install_ucc.sh b/.circleci/docker/common/install_ucc.sh index 4d691ebb5e9e..333e44e6f779 100755 --- a/.circleci/docker/common/install_ucc.sh +++ b/.circleci/docker/common/install_ucc.sh @@ -36,7 +36,7 @@ function install_ucc() { git submodule update --init --recursive ./autogen.sh - ./configure --prefix=$UCC_HOME --with-ucx=$UCX_HOME --with-nccl=no --with-cuda=$with_cuda + ./configure --prefix=$UCC_HOME --with-ucx=$UCX_HOME --with-cuda=$with_cuda time make -j sudo make install diff --git a/.circleci/docker/ubuntu-cuda/Dockerfile b/.circleci/docker/ubuntu-cuda/Dockerfile index 4375b612a308..53349bfec533 100644 --- a/.circleci/docker/ubuntu-cuda/Dockerfile +++ b/.circleci/docker/ubuntu-cuda/Dockerfile @@ -118,6 +118,7 @@ COPY --from=pytorch/llvm:9.0.1 /opt/llvm /opt/llvm # Install CUDNN ARG CUDNN_VERSION +ARG CUDA_VERSION COPY ./common/install_cudnn.sh install_cudnn.sh RUN if [ "${CUDNN_VERSION}" -eq 8 ]; then bash install_cudnn.sh; fi RUN rm install_cudnn.sh diff --git a/.circleci/scripts/windows_cudnn_install.sh b/.circleci/scripts/windows_cudnn_install.sh index 763bc950fc4b..c279259e8341 100644 --- a/.circleci/scripts/windows_cudnn_install.sh +++ b/.circleci/scripts/windows_cudnn_install.sh @@ -18,7 +18,7 @@ case ${CUDA_VERSION} in ;; 11.7) # Use cudnn8.3 with hard-coded cuda11.5 version - cudnn_file_name="cudnn-windows-x86_64-8.3.2.44_cuda11.5-archive" + cudnn_file_name="cudnn-windows-x86_64-8.5.0.96_cuda11-archive" ;; *) echo "CUDA_VERSION: ${CUDA_VERSION} not supported yet" diff --git a/.github/ci_commit_pins/torchdynamo.txt b/.github/ci_commit_pins/torchdynamo.txt index 992c70b96b45..945ad626f6e6 100644 --- a/.github/ci_commit_pins/torchdynamo.txt +++ b/.github/ci_commit_pins/torchdynamo.txt @@ -1 +1 @@ -fe3173f7e6c804e6330ac187ea8e4101f45ff9a2 +41c44bc1d080d6cf063419a4166732b983b84eef diff --git a/.github/ci_commit_pins/vision.txt b/.github/ci_commit_pins/vision.txt index 2a1cd7720c6b..a0500b9f6bc1 100644 --- a/.github/ci_commit_pins/vision.txt +++ b/.github/ci_commit_pins/vision.txt @@ -1 +1 @@ -84dcf695d64c15f8a0be845ac65901bdde845429 +a4f53308b2d0f1aa9191686e326f45c26053f686 diff --git a/.github/ci_commit_pins/xla.txt b/.github/ci_commit_pins/xla.txt index 31bf7123c62e..d536a71eaf88 100644 --- a/.github/ci_commit_pins/xla.txt +++ b/.github/ci_commit_pins/xla.txt @@ -1 +1 @@ -b8688ee3c03120a15978844db6c4fa73eceb6594 +4dec902617aea14ca4013e402eea56e92701cac9 diff --git a/.github/merge_rules.yaml b/.github/merge_rules.yaml index 209f71bde842..7f091a55c62f 100644 --- a/.github/merge_rules.yaml +++ b/.github/merge_rules.yaml @@ -3,6 +3,7 @@ - .jenkins/caffe2/* - aten/src/ATen/core/interned_strings.h - docs/source/onnx.rst + - docs/source/onnx* - docs/source/scripts/onnx/** - scripts/onnx/** - test/jit/test_export_modes.py @@ -15,6 +16,8 @@ - torch/csrc/jit/serialization/onnx.* - torch/csrc/onnx/** - torch/onnx/** + - third_party/onnx + - caffe2/python/onnx/** approved_by: - BowenBao - abock @@ -323,6 +326,7 @@ - '*' approved_by: - pytorch/metamates + - mruberry mandatory_checks_name: - Facebook CLA Check - Lint diff --git a/.github/scale-config.yml b/.github/scale-config.yml deleted file mode 100644 index 1cf99b326ba8..000000000000 --- a/.github/scale-config.yml +++ /dev/null @@ -1,69 +0,0 @@ -# scale-config.yml: -# Powers what instance types are available for GHA auto-scaled -# runners. Runners listed here will be available as self hosted -# runners, configuration is directly pulled from the main branch. -# -# NOTE (Apr, 5, 2021): Linux runners are currently all an amazonlinux2 -# -# NOTE (Jan 5, 2021): Linux runners are all non-ephemeral to reduce the amount of CreateInstaces calls -# to avoid RequestLimitExceeded issues -# -# TODO: Add some documentation on how the auto-scaling works -# -# NOTE: Default values, -# -# runner_types: -# runner_label: -# instance_type: m4.large -# os: linux -# max_available: 20 -# disk_size: 50 -# is_ephemeral: true - -runner_types: - # mainly used for ciflow-should-run, not made to run any serious tests - linux.large: - instance_type: c5.large - os: linux - disk_size: 10 - is_ephemeral: false - linux.2xlarge: - instance_type: c5.2xlarge - os: linux - max_available: 1000 - disk_size: 150 - is_ephemeral: false - linux.4xlarge: # for binary-builds - instance_type: c5.4xlarge - os: linux - max_available: 500 - disk_size: 150 - is_ephemeral: false - linux.8xlarge.nvidia.gpu: - instance_type: g3.8xlarge - os: linux - max_available: 200 - disk_size: 150 - is_ephemeral: false - linux.4xlarge.nvidia.gpu: - instance_type: g3.4xlarge - os: linux - max_available: 250 - disk_size: 150 - is_ephemeral: false - linux.16xlarge.nvidia.gpu: - instance_type: g3.16xlarge - os: linux - max_available: 10 - disk_size: 150 - is_ephemeral: false - windows.4xlarge: - instance_type: c5d.4xlarge - os: windows - max_available: 200 - disk_size: 256 - windows.8xlarge.nvidia.gpu: - instance_type: p3.2xlarge - os: windows - max_available: 100 - disk_size: 256 diff --git a/.github/scripts/generate_binary_build_matrix.py b/.github/scripts/generate_binary_build_matrix.py index b1e3b46bda34..0f2693a9aa54 100644 --- a/.github/scripts/generate_binary_build_matrix.py +++ b/.github/scripts/generate_binary_build_matrix.py @@ -13,7 +13,7 @@ from typing import Dict, List, Tuple, Optional -CUDA_ARCHES = ["10.2", "11.3", "11.6", "11.7"] +CUDA_ARCHES = ["10.2", "11.6", "11.7"] ROCM_ARCHES = ["5.1.1", "5.2"] diff --git a/.github/scripts/generate_ci_workflows.py b/.github/scripts/generate_ci_workflows.py index 653cfeebaab7..3722f4307029 100755 --- a/.github/scripts/generate_ci_workflows.py +++ b/.github/scripts/generate_ci_workflows.py @@ -207,15 +207,6 @@ class OperatingSystem: ), ] WINDOWS_BINARY_SMOKE_WORKFLOWS = [ - BinaryBuildWorkflow( - os=OperatingSystem.WINDOWS, - package_type="wheel", - build_configs=generate_binary_build_matrix.generate_wheels_matrix( - OperatingSystem.WINDOWS, - arches=["11.3"], - python_versions=["3.7"]), - branches="master", - ), BinaryBuildWorkflow( os=OperatingSystem.WINDOWS, package_type="libtorch", diff --git a/.github/scripts/run_torchbench.py b/.github/scripts/run_torchbench.py index 51bd1e33f5df..352da69c8158 100644 --- a/.github/scripts/run_torchbench.py +++ b/.github/scripts/run_torchbench.py @@ -13,10 +13,12 @@ # 1. Does not reuse the build artifact in other CI workflows # 2. CI jobs are serialized because there is only one worker import os +import boto3 # type: ignore[import] import git # type: ignore[import] import pathlib import argparse import subprocess +from pathlib import Path from typing import List, Tuple @@ -31,6 +33,25 @@ direction: decrease timeout: 720 tests:""" +S3_BUCKET = "ossci-metrics" +S3_PREFIX = "torchbench-pr-test" +S3_URL_BASE = f"https://{S3_BUCKET}.s3.amazonaws.com/" + +class S3Client: + def __init__(self, bucket: str = S3_BUCKET, prefix: str = S3_PREFIX): + self.s3 = boto3.client('s3') + self.resource = boto3.resource('s3') + self.bucket = bucket + self.prefix = prefix + + def upload_file(self, file_path: Path, filekey_prefix: str) -> None: + assert file_path.is_file(), f"Specified file path {file_path} does not exist or not file." + file_name = file_path.name + s3_key = f"{self.prefix}/{filekey_prefix}/{file_name}" + print(f"Uploading file {file_name} to S3 with key: {s3_key}") + self.s3.upload_file(str(file_path), self.bucket, s3_key) + # output the result URL + print(f"Uploaded the result file {file_name} to {S3_URL_BASE}{s3_key}") def gen_abtest_config(control: str, treatment: str, models: List[str]) -> str: d = {} @@ -137,9 +158,21 @@ def run_userbenchmarks(pytorch_path: str, torchbench_path: str, base_sha: str, h print(f"Running torchbench userbenchmark command: {command}") subprocess.check_call(command, cwd=torchbench_path, env=env) +def process_upload_s3(result_dir: str) -> None: + # validate result directory + result_dir_path = Path(result_dir) + assert result_dir_path.exists(), f"Specified result directory {result_dir} doesn't exist." + # upload all files to S3 bucket oss-ci-metrics + files = [x for x in result_dir_path.iterdir() if x.is_file()] + # upload file to S3 bucket + s3_client: S3Client = S3Client() + filekey_prefix = result_dir_path.name + for f in files: + s3_client.upload_file(f, filekey_prefix) + if __name__ == "__main__": parser = argparse.ArgumentParser(description='Run TorchBench tests based on PR') - parser.add_argument('--pr-body', required=True, help="The file that contains body of a Pull Request") + parser.add_argument('--pr-body', help="The file that contains body of a Pull Request") subparsers = parser.add_subparsers(dest='command') # parser for setup the torchbench branch name env @@ -151,6 +184,9 @@ def run_userbenchmarks(pytorch_path: str, torchbench_path: str, base_sha: str, h run_parser.add_argument('--pr-head-sha', required=True, type=str, help="The Pull Request head hash") run_parser.add_argument('--pytorch-path', required=True, type=str, help="Path to pytorch repository") run_parser.add_argument('--torchbench-path', required=True, type=str, help="Path to TorchBench repository") + # parser to upload results to S3 + upload_parser = subparsers.add_parser("upload-s3") + upload_parser.add_argument('--result-dir', required=True, type=str, help="Path to benchmark output") args = parser.parse_args() if args.command == 'set-torchbench-branch': @@ -181,6 +217,8 @@ def run_userbenchmarks(pytorch_path: str, torchbench_path: str, base_sha: str, h if not models and not userbenchmarks: print("Can't parse valid models or userbenchmarks from the pr body. Quit.") exit(-1) + elif args.command == 'upload-s3': + process_upload_s3(args.result_dir) else: print(f"The command {args.command} is not supported.") exit(-1) diff --git a/.github/scripts/trymerge.py b/.github/scripts/trymerge.py index 610c70cdc0d9..6c28b69ec692 100755 --- a/.github/scripts/trymerge.py +++ b/.github/scripts/trymerge.py @@ -912,6 +912,8 @@ def merge_into(self, repo: GitRepo, *, repo.push(self.default_branch(), dry_run) if not dry_run: + if land_check_commit: + self.delete_land_time_check_branch(repo) gh_add_labels(self.org, self.project, self.pr_num, ["merged"]) def merge_changes(self, @@ -962,6 +964,11 @@ def create_land_time_check_branch(self, repo.checkout(orig_branch) return commit + def delete_land_time_check_branch(self, + repo: GitRepo) -> None: + land_check_branch = f'landchecks/{self.pr_num}' + repo._run_git('push', 'origin', '-d', land_check_branch) + class MandatoryChecksMissingError(Exception): pass @@ -1344,7 +1351,7 @@ def merge(pr_num: int, repo: GitRepo, # here to stop the merge process right away find_matching_merge_rule(pr, repo, skip_mandatory_checks=True) - if land_checks: + if land_checks and not dry_run: land_check_commit = pr.create_land_time_check_branch( repo, 'viable/strict', @@ -1354,6 +1361,8 @@ def merge(pr_num: int, repo: GitRepo, gh_post_pr_comment(org, project, pr.pr_num, explainer.get_merge_message(land_check_commit)) if (datetime.utcnow() - pr.last_pushed_at()).days > stale_pr_days: + if land_checks and not dry_run: + pr.delete_land_time_check_branch(repo) raise RuntimeError("This PR is too stale; the last push date was more than 3 days ago. Please rebase and try again.") start_time = time.time() @@ -1366,6 +1375,8 @@ def merge(pr_num: int, repo: GitRepo, print(f"Attempting merge of https://github.com/{org}/{project}/pull/{pr_num} ({elapsed_time / 60} minutes elapsed)") pr = GitHubPR(org, project, pr_num) if initial_commit_sha != pr.last_commit()['oid']: + if land_checks and not dry_run: + pr.delete_land_time_check_branch(repo) raise RuntimeError("New commits were pushed while merging. Please rerun the merge command.") try: find_matching_merge_rule(pr, repo) @@ -1400,10 +1411,16 @@ def merge(pr_num: int, repo: GitRepo, last_exception = str(ex) print(f"Merge of https://github.com/{org}/{project}/pull/{pr_num} failed due to: {ex}. Retrying in 5 min") time.sleep(5 * 60) + except RuntimeError: + if land_checks and not dry_run: + pr.delete_land_time_check_branch(repo) + raise # Finally report timeout back msg = f"Merged timed out after {timeout_minutes} minutes. Please contact the pytorch_dev_infra team." msg += f"The last exception was: {last_exception}" if not dry_run: + if land_checks: + pr.delete_land_time_check_branch(repo) gh_add_labels(org, project, pr_num, ["land-failed"]) raise RuntimeError(msg) diff --git a/.github/workflows/_linux-test.yml b/.github/workflows/_linux-test.yml index 4305ed04e0d2..95ed840025a7 100644 --- a/.github/workflows/_linux-test.yml +++ b/.github/workflows/_linux-test.yml @@ -117,6 +117,7 @@ jobs: NUM_TEST_SHARDS: ${{ matrix.num_shards }} PR_BODY: ${{ github.event.pull_request.body }} SCCACHE_BUCKET: ossci-compiler-cache-circleci-v2 + SCCACHE_S3_KEY_PREFIX: ${{ github.workflow }} SHM_SIZE: ${{ contains(inputs.build-environment, 'cuda') && '2g' || '1g' }} DOCKER_IMAGE: ${{ inputs.docker-image }} XLA_CUDA: ${{ contains(inputs.build-environment, 'xla') && '0' || '' }} @@ -171,6 +172,7 @@ jobs: -e PR_LABELS \ -e MAX_JOBS="$(nproc --ignore=2)" \ -e SCCACHE_BUCKET \ + -e SCCACHE_S3_KEY_PREFIX \ -e XLA_CUDA \ -e XLA_CLANG_CACHE_S3_BUCKET_NAME \ --env-file="/tmp/github_env_${GITHUB_RUN_ID}" \ diff --git a/.github/workflows/_mac-build.yml b/.github/workflows/_mac-build.yml index 20dae569717e..bc3f10de40f9 100644 --- a/.github/workflows/_mac-build.yml +++ b/.github/workflows/_mac-build.yml @@ -33,6 +33,21 @@ on: default: "3.8" description: | The python version to be used. Will be 3.8 by default + test-matrix: + required: false + type: string + description: | + An option JSON description of what test configs to run later on. This + is moved here from the Linux test workflow so that we can apply filter + logic using test-config labels earlier and skip unnecessary builds + + outputs: + test-matrix: + value: ${{ inputs.test-matrix }} + description: An optional JSON description of what test configs to run later on. + build-outcome: + value: ${{ jobs.build.outputs.build-outcome }} + description: The outcome of the build step. This is used to influence test filtering logic later on. secrets: MACOS_SCCACHE_S3_ACCESS_KEY_ID: @@ -52,6 +67,8 @@ jobs: AWS_ACCESS_KEY_ID: ${{ secrets.MACOS_SCCACHE_S3_ACCESS_KEY_ID }} AWS_SECRET_ACCESS_KEY: ${{ secrets.MACOS_SCCACHE_S3_SECRET_ACCESS_KEY }} BUILD_ENVIRONMENT: ${{ inputs.build-environment }} + outputs: + build-outcome: ${{ steps.build.outcome }} steps: # [see note: pytorch repo ref] - name: Checkout PyTorch @@ -90,7 +107,17 @@ jobs: with: github-token: ${{ secrets.GITHUB_TOKEN }} + # Apply the filter logic to the build step too if the test-config label is already there + - name: Select all requested test configurations (if the test matrix is available) + id: filter + uses: ./.github/actions/filter-test-configs + with: + github-token: ${{ secrets.GITHUB_TOKEN }} + test-matrix: ${{ inputs.test-matrix }} + - name: Build + if: steps.filter.outputs.is-test-matrix-empty == 'False' || inputs.test-matrix == '' + id: build env: OUR_GITHUB_JOB_ID: ${{ steps.get-job-id.outputs.job-id }} run: | @@ -98,13 +125,13 @@ jobs: ${CONDA_RUN} .jenkins/pytorch/macos-build.sh - name: Archive artifacts into zip - if: inputs.build-generates-artifacts + if: inputs.build-generates-artifacts && steps.build.outcome != 'skipped' run: | zip -1 -r artifacts.zip dist/ build/.ninja_log build/compile_commands.json .pytorch-test-times.json - name: Store PyTorch Build Artifacts on GHA uses: actions/upload-artifact@v2 - if: inputs.build-generates-artifacts + if: inputs.build-generates-artifacts && steps.build.outcome != 'skipped' with: name: ${{ env.BUILD_ENVIRONMENT }} retention-days: 14 @@ -114,7 +141,7 @@ jobs: - name: Upload sccache stats to GHA uses: actions/upload-artifact@v2 # Only if sccache is installed, see above - if: ${{ github.event_name == 'push' || github.event.pull_request.head.repo.full_name == github.repository }} + if: ${{ (github.event_name == 'push' || github.event.pull_request.head.repo.full_name == github.repository) && steps.build.outcome != 'skipped' }} with: name: sccache-stats-${{ inputs.build-environment }}-runattempt${{ github.run_attempt }}-${{ steps.get-job-id.outputs.job-id }} retention-days: 14 diff --git a/.github/workflows/_mac-test.yml b/.github/workflows/_mac-test.yml index bb711ccefb6d..4b3d25717307 100644 --- a/.github/workflows/_mac-test.yml +++ b/.github/workflows/_mac-test.yml @@ -33,16 +33,38 @@ on: description: secret acess key for test stats upload jobs: + # This needs to be run right before the test starts so that it can gather the + # latest labels from the PR + filter: + runs-on: [self-hosted, linux.large] + outputs: + test-matrix: ${{ steps.filter.outputs.test-matrix }} + is-test-matrix-empty: ${{ steps.filter.outputs.is-test-matrix-empty }} + steps: + - name: Checkout PyTorch + uses: pytorch/pytorch/.github/actions/checkout-pytorch@master + with: + fetch-depth: 1 + submodules: false + + - name: Select all requested test configurations + id: filter + uses: ./.github/actions/filter-test-configs + with: + github-token: ${{ secrets.GITHUB_TOKEN }} + test-matrix: ${{ inputs.test-matrix }} + test: - # Don't run on forked repos. - if: github.repository_owner == 'pytorch' + needs: filter + # Don't run on forked repos or empty test matrix + if: github.repository_owner == 'pytorch' && needs.filter.outputs.is-test-matrix-empty == 'False' # For setup-miniconda, see https://github.com/conda-incubator/setup-miniconda/issues/179 # Also ensure that we always run with the right architecture defaults: run: shell: arch -arch ${{ inputs.arch }} bash -e -l {0} strategy: - matrix: ${{ fromJSON(inputs.test-matrix) }} + matrix: ${{ fromJSON(needs.filter.outputs.test-matrix) }} fail-fast: false runs-on: ${{ matrix.runner }} timeout-minutes: 240 diff --git a/.github/workflows/_win-build.yml b/.github/workflows/_win-build.yml index fb2195fafce6..d4704129a141 100644 --- a/.github/workflows/_win-build.yml +++ b/.github/workflows/_win-build.yml @@ -23,6 +23,18 @@ on: description: | If this is set, our linter will use this to make sure that every other job with the same `sync-tag` is identical. + test-matrix: + required: false + type: string + description: | + An option JSON description of what test configs to run later on. This + is moved here from the Linux test workflow so that we can apply filter + logic using test-config labels earlier and skip unnecessary builds + + outputs: + test-matrix: + value: ${{ inputs.test-matrix }} + description: An optional JSON description of what test configs to run later on. env: GIT_DEFAULT_BRANCH: ${{ github.event.repository.default_branch }} @@ -61,7 +73,17 @@ jobs: with: github-token: ${{ secrets.GITHUB_TOKEN }} + # Apply the filter logic to the build step too if the test-config label is already there + - name: Select all requested test configurations (if the test matrix is available) + id: filter + uses: ./.github/actions/filter-test-configs + with: + github-token: ${{ secrets.GITHUB_TOKEN }} + test-matrix: ${{ inputs.test-matrix }} + - name: Build + if: steps.filter.outputs.is-test-matrix-empty == 'False' || inputs.test-matrix == '' + id: build shell: bash env: PYTORCH_FINAL_PACKAGE_DIR: /c/${{ github.run_id }}/build-results/ @@ -89,6 +111,7 @@ jobs: # Upload to github so that people can click and download artifacts - name: Upload artifacts to s3 + if: steps.build.outcome != 'skipped' uses: seemethere/upload-artifact-s3@v5 with: retention-days: 14 @@ -97,6 +120,7 @@ jobs: path: C:\${{ github.run_id }}\build-results - name: Upload sccache stats + if: steps.build.outcome != 'skipped' uses: seemethere/upload-artifact-s3@v5 with: s3-prefix: | diff --git a/.github/workflows/_win-test.yml b/.github/workflows/_win-test.yml index 243bd7563639..6d5ae369b709 100644 --- a/.github/workflows/_win-test.yml +++ b/.github/workflows/_win-test.yml @@ -27,11 +27,33 @@ env: GIT_DEFAULT_BRANCH: ${{ github.event.repository.default_branch }} jobs: + # This needs to be run right before the test starts so that it can gather the + # latest labels from the PR + filter: + runs-on: [self-hosted, linux.large] + outputs: + test-matrix: ${{ steps.filter.outputs.test-matrix }} + is-test-matrix-empty: ${{ steps.filter.outputs.is-test-matrix-empty }} + steps: + - name: Checkout PyTorch + uses: pytorch/pytorch/.github/actions/checkout-pytorch@master + with: + fetch-depth: 1 + submodules: false + + - name: Select all requested test configurations + id: filter + uses: ./.github/actions/filter-test-configs + with: + github-token: ${{ secrets.GITHUB_TOKEN }} + test-matrix: ${{ inputs.test-matrix }} + test: - # Don't run on forked repos. - if: github.repository_owner == 'pytorch' + needs: filter + # Don't run on forked repos or empty test matrix + if: github.repository_owner == 'pytorch' && needs.filter.outputs.is-test-matrix-empty == 'False' strategy: - matrix: ${{ fromJSON(inputs.test-matrix) }} + matrix: ${{ fromJSON(needs.filter.outputs.test-matrix) }} fail-fast: false runs-on: ${{ matrix.runner }} timeout-minutes: 300 diff --git a/.github/workflows/generated-linux-binary-conda-nightly.yml b/.github/workflows/generated-linux-binary-conda-nightly.yml index 81f779f2f014..0f3148bc28c1 100644 --- a/.github/workflows/generated-linux-binary-conda-nightly.yml +++ b/.github/workflows/generated-linux-binary-conda-nightly.yml @@ -153,66 +153,6 @@ jobs: aws-pytorch-uploader-secret-access-key: ${{ secrets.AWS_PYTORCH_UPLOADER_SECRET_ACCESS_KEY }} conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} uses: ./.github/workflows/_binary-upload.yml - conda-py3_7-cuda11_3-build: - if: ${{ github.repository_owner == 'pytorch' }} - uses: ./.github/workflows/_binary-build-linux.yml - with: - PYTORCH_ROOT: /pytorch - BUILDER_ROOT: /builder - PACKAGE_TYPE: conda - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 - GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/conda-builder:cuda11.3 - DESIRED_PYTHON: "3.7" - build_name: conda-py3_7-cuda11_3 - build_environment: linux-binary-conda - secrets: - github-token: ${{ secrets.GITHUB_TOKEN }} - - conda-py3_7-cuda11_3-test: # Testing - if: ${{ github.repository_owner == 'pytorch' }} - needs: conda-py3_7-cuda11_3-build - uses: ./.github/workflows/_binary-test-linux.yml - with: - PYTORCH_ROOT: /pytorch - BUILDER_ROOT: /builder - PACKAGE_TYPE: conda - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 - GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/conda-builder:cuda11.3 - DESIRED_PYTHON: "3.7" - build_name: conda-py3_7-cuda11_3 - build_environment: linux-binary-conda - runs_on: linux.4xlarge.nvidia.gpu - secrets: - github-token: ${{ secrets.GITHUB_TOKEN }} - conda-py3_7-cuda11_3-upload: # Uploading - if: ${{ github.repository_owner == 'pytorch' }} - needs: conda-py3_7-cuda11_3-test - with: - PYTORCH_ROOT: /pytorch - BUILDER_ROOT: /builder - PACKAGE_TYPE: conda - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 - GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/conda-builder:cuda11.3 - DESIRED_PYTHON: "3.7" - build_name: conda-py3_7-cuda11_3 - secrets: - github-token: ${{ secrets.GITHUB_TOKEN }} - aws-access-key-id: ${{ secrets.AWS_PYTORCH_UPLOADER_ACCESS_KEY_ID }} - aws-pytorch-uploader-secret-access-key: ${{ secrets.AWS_PYTORCH_UPLOADER_SECRET_ACCESS_KEY }} - conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} - uses: ./.github/workflows/_binary-upload.yml conda-py3_7-cuda11_6-build: if: ${{ github.repository_owner == 'pytorch' }} uses: ./.github/workflows/_binary-build-linux.yml @@ -450,66 +390,6 @@ jobs: aws-pytorch-uploader-secret-access-key: ${{ secrets.AWS_PYTORCH_UPLOADER_SECRET_ACCESS_KEY }} conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} uses: ./.github/workflows/_binary-upload.yml - conda-py3_8-cuda11_3-build: - if: ${{ github.repository_owner == 'pytorch' }} - uses: ./.github/workflows/_binary-build-linux.yml - with: - PYTORCH_ROOT: /pytorch - BUILDER_ROOT: /builder - PACKAGE_TYPE: conda - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 - GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/conda-builder:cuda11.3 - DESIRED_PYTHON: "3.8" - build_name: conda-py3_8-cuda11_3 - build_environment: linux-binary-conda - secrets: - github-token: ${{ secrets.GITHUB_TOKEN }} - - conda-py3_8-cuda11_3-test: # Testing - if: ${{ github.repository_owner == 'pytorch' }} - needs: conda-py3_8-cuda11_3-build - uses: ./.github/workflows/_binary-test-linux.yml - with: - PYTORCH_ROOT: /pytorch - BUILDER_ROOT: /builder - PACKAGE_TYPE: conda - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 - GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/conda-builder:cuda11.3 - DESIRED_PYTHON: "3.8" - build_name: conda-py3_8-cuda11_3 - build_environment: linux-binary-conda - runs_on: linux.4xlarge.nvidia.gpu - secrets: - github-token: ${{ secrets.GITHUB_TOKEN }} - conda-py3_8-cuda11_3-upload: # Uploading - if: ${{ github.repository_owner == 'pytorch' }} - needs: conda-py3_8-cuda11_3-test - with: - PYTORCH_ROOT: /pytorch - BUILDER_ROOT: /builder - PACKAGE_TYPE: conda - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 - GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/conda-builder:cuda11.3 - DESIRED_PYTHON: "3.8" - build_name: conda-py3_8-cuda11_3 - secrets: - github-token: ${{ secrets.GITHUB_TOKEN }} - aws-access-key-id: ${{ secrets.AWS_PYTORCH_UPLOADER_ACCESS_KEY_ID }} - aws-pytorch-uploader-secret-access-key: ${{ secrets.AWS_PYTORCH_UPLOADER_SECRET_ACCESS_KEY }} - conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} - uses: ./.github/workflows/_binary-upload.yml conda-py3_8-cuda11_6-build: if: ${{ github.repository_owner == 'pytorch' }} uses: ./.github/workflows/_binary-build-linux.yml @@ -747,66 +627,6 @@ jobs: aws-pytorch-uploader-secret-access-key: ${{ secrets.AWS_PYTORCH_UPLOADER_SECRET_ACCESS_KEY }} conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} uses: ./.github/workflows/_binary-upload.yml - conda-py3_9-cuda11_3-build: - if: ${{ github.repository_owner == 'pytorch' }} - uses: ./.github/workflows/_binary-build-linux.yml - with: - PYTORCH_ROOT: /pytorch - BUILDER_ROOT: /builder - PACKAGE_TYPE: conda - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 - GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/conda-builder:cuda11.3 - DESIRED_PYTHON: "3.9" - build_name: conda-py3_9-cuda11_3 - build_environment: linux-binary-conda - secrets: - github-token: ${{ secrets.GITHUB_TOKEN }} - - conda-py3_9-cuda11_3-test: # Testing - if: ${{ github.repository_owner == 'pytorch' }} - needs: conda-py3_9-cuda11_3-build - uses: ./.github/workflows/_binary-test-linux.yml - with: - PYTORCH_ROOT: /pytorch - BUILDER_ROOT: /builder - PACKAGE_TYPE: conda - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 - GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/conda-builder:cuda11.3 - DESIRED_PYTHON: "3.9" - build_name: conda-py3_9-cuda11_3 - build_environment: linux-binary-conda - runs_on: linux.4xlarge.nvidia.gpu - secrets: - github-token: ${{ secrets.GITHUB_TOKEN }} - conda-py3_9-cuda11_3-upload: # Uploading - if: ${{ github.repository_owner == 'pytorch' }} - needs: conda-py3_9-cuda11_3-test - with: - PYTORCH_ROOT: /pytorch - BUILDER_ROOT: /builder - PACKAGE_TYPE: conda - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 - GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/conda-builder:cuda11.3 - DESIRED_PYTHON: "3.9" - build_name: conda-py3_9-cuda11_3 - secrets: - github-token: ${{ secrets.GITHUB_TOKEN }} - aws-access-key-id: ${{ secrets.AWS_PYTORCH_UPLOADER_ACCESS_KEY_ID }} - aws-pytorch-uploader-secret-access-key: ${{ secrets.AWS_PYTORCH_UPLOADER_SECRET_ACCESS_KEY }} - conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} - uses: ./.github/workflows/_binary-upload.yml conda-py3_9-cuda11_6-build: if: ${{ github.repository_owner == 'pytorch' }} uses: ./.github/workflows/_binary-build-linux.yml @@ -1044,66 +864,6 @@ jobs: aws-pytorch-uploader-secret-access-key: ${{ secrets.AWS_PYTORCH_UPLOADER_SECRET_ACCESS_KEY }} conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} uses: ./.github/workflows/_binary-upload.yml - conda-py3_10-cuda11_3-build: - if: ${{ github.repository_owner == 'pytorch' }} - uses: ./.github/workflows/_binary-build-linux.yml - with: - PYTORCH_ROOT: /pytorch - BUILDER_ROOT: /builder - PACKAGE_TYPE: conda - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 - GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/conda-builder:cuda11.3 - DESIRED_PYTHON: "3.10" - build_name: conda-py3_10-cuda11_3 - build_environment: linux-binary-conda - secrets: - github-token: ${{ secrets.GITHUB_TOKEN }} - - conda-py3_10-cuda11_3-test: # Testing - if: ${{ github.repository_owner == 'pytorch' }} - needs: conda-py3_10-cuda11_3-build - uses: ./.github/workflows/_binary-test-linux.yml - with: - PYTORCH_ROOT: /pytorch - BUILDER_ROOT: /builder - PACKAGE_TYPE: conda - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 - GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/conda-builder:cuda11.3 - DESIRED_PYTHON: "3.10" - build_name: conda-py3_10-cuda11_3 - build_environment: linux-binary-conda - runs_on: linux.4xlarge.nvidia.gpu - secrets: - github-token: ${{ secrets.GITHUB_TOKEN }} - conda-py3_10-cuda11_3-upload: # Uploading - if: ${{ github.repository_owner == 'pytorch' }} - needs: conda-py3_10-cuda11_3-test - with: - PYTORCH_ROOT: /pytorch - BUILDER_ROOT: /builder - PACKAGE_TYPE: conda - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 - GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/conda-builder:cuda11.3 - DESIRED_PYTHON: "3.10" - build_name: conda-py3_10-cuda11_3 - secrets: - github-token: ${{ secrets.GITHUB_TOKEN }} - aws-access-key-id: ${{ secrets.AWS_PYTORCH_UPLOADER_ACCESS_KEY_ID }} - aws-pytorch-uploader-secret-access-key: ${{ secrets.AWS_PYTORCH_UPLOADER_SECRET_ACCESS_KEY }} - conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} - uses: ./.github/workflows/_binary-upload.yml conda-py3_10-cuda11_6-build: if: ${{ github.repository_owner == 'pytorch' }} uses: ./.github/workflows/_binary-build-linux.yml diff --git a/.github/workflows/generated-linux-binary-libtorch-cxx11-abi-nightly.yml b/.github/workflows/generated-linux-binary-libtorch-cxx11-abi-nightly.yml index cf629ed3358a..53033239bf44 100644 --- a/.github/workflows/generated-linux-binary-libtorch-cxx11-abi-nightly.yml +++ b/.github/workflows/generated-linux-binary-libtorch-cxx11-abi-nightly.yml @@ -528,258 +528,6 @@ jobs: aws-pytorch-uploader-secret-access-key: ${{ secrets.AWS_PYTORCH_UPLOADER_SECRET_ACCESS_KEY }} conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} uses: ./.github/workflows/_binary-upload.yml - libtorch-cuda11_3-shared-with-deps-cxx11-abi-build: - if: ${{ github.repository_owner == 'pytorch' }} - uses: ./.github/workflows/_binary-build-linux.yml - with: - PYTORCH_ROOT: /pytorch - BUILDER_ROOT: /builder - PACKAGE_TYPE: libtorch - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 - GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:cuda11.3 - LIBTORCH_VARIANT: shared-with-deps - DESIRED_DEVTOOLSET: cxx11-abi - build_name: libtorch-cuda11_3-shared-with-deps-cxx11-abi - build_environment: linux-binary-libtorch-cxx11-abi - secrets: - github-token: ${{ secrets.GITHUB_TOKEN }} - - libtorch-cuda11_3-shared-with-deps-cxx11-abi-test: # Testing - if: ${{ github.repository_owner == 'pytorch' }} - needs: libtorch-cuda11_3-shared-with-deps-cxx11-abi-build - uses: ./.github/workflows/_binary-test-linux.yml - with: - PYTORCH_ROOT: /pytorch - BUILDER_ROOT: /builder - PACKAGE_TYPE: libtorch - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 - GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:cuda11.3 - LIBTORCH_VARIANT: shared-with-deps - DESIRED_DEVTOOLSET: cxx11-abi - build_name: libtorch-cuda11_3-shared-with-deps-cxx11-abi - build_environment: linux-binary-libtorch-cxx11-abi - runs_on: linux.4xlarge.nvidia.gpu - secrets: - github-token: ${{ secrets.GITHUB_TOKEN }} - libtorch-cuda11_3-shared-with-deps-cxx11-abi-upload: # Uploading - if: ${{ github.repository_owner == 'pytorch' }} - needs: libtorch-cuda11_3-shared-with-deps-cxx11-abi-test - with: - PYTORCH_ROOT: /pytorch - BUILDER_ROOT: /builder - PACKAGE_TYPE: libtorch - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 - GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:cuda11.3 - LIBTORCH_VARIANT: shared-with-deps - DESIRED_DEVTOOLSET: cxx11-abi - build_name: libtorch-cuda11_3-shared-with-deps-cxx11-abi - secrets: - github-token: ${{ secrets.GITHUB_TOKEN }} - aws-access-key-id: ${{ secrets.AWS_PYTORCH_UPLOADER_ACCESS_KEY_ID }} - aws-pytorch-uploader-secret-access-key: ${{ secrets.AWS_PYTORCH_UPLOADER_SECRET_ACCESS_KEY }} - conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} - uses: ./.github/workflows/_binary-upload.yml - libtorch-cuda11_3-shared-without-deps-cxx11-abi-build: - if: ${{ github.repository_owner == 'pytorch' }} - uses: ./.github/workflows/_binary-build-linux.yml - with: - PYTORCH_ROOT: /pytorch - BUILDER_ROOT: /builder - PACKAGE_TYPE: libtorch - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 - GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:cuda11.3 - LIBTORCH_VARIANT: shared-without-deps - DESIRED_DEVTOOLSET: cxx11-abi - build_name: libtorch-cuda11_3-shared-without-deps-cxx11-abi - build_environment: linux-binary-libtorch-cxx11-abi - secrets: - github-token: ${{ secrets.GITHUB_TOKEN }} - - libtorch-cuda11_3-shared-without-deps-cxx11-abi-test: # Testing - if: ${{ github.repository_owner == 'pytorch' }} - needs: libtorch-cuda11_3-shared-without-deps-cxx11-abi-build - uses: ./.github/workflows/_binary-test-linux.yml - with: - PYTORCH_ROOT: /pytorch - BUILDER_ROOT: /builder - PACKAGE_TYPE: libtorch - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 - GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:cuda11.3 - LIBTORCH_VARIANT: shared-without-deps - DESIRED_DEVTOOLSET: cxx11-abi - build_name: libtorch-cuda11_3-shared-without-deps-cxx11-abi - build_environment: linux-binary-libtorch-cxx11-abi - runs_on: linux.4xlarge.nvidia.gpu - secrets: - github-token: ${{ secrets.GITHUB_TOKEN }} - libtorch-cuda11_3-shared-without-deps-cxx11-abi-upload: # Uploading - if: ${{ github.repository_owner == 'pytorch' }} - needs: libtorch-cuda11_3-shared-without-deps-cxx11-abi-test - with: - PYTORCH_ROOT: /pytorch - BUILDER_ROOT: /builder - PACKAGE_TYPE: libtorch - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 - GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:cuda11.3 - LIBTORCH_VARIANT: shared-without-deps - DESIRED_DEVTOOLSET: cxx11-abi - build_name: libtorch-cuda11_3-shared-without-deps-cxx11-abi - secrets: - github-token: ${{ secrets.GITHUB_TOKEN }} - aws-access-key-id: ${{ secrets.AWS_PYTORCH_UPLOADER_ACCESS_KEY_ID }} - aws-pytorch-uploader-secret-access-key: ${{ secrets.AWS_PYTORCH_UPLOADER_SECRET_ACCESS_KEY }} - conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} - uses: ./.github/workflows/_binary-upload.yml - libtorch-cuda11_3-static-with-deps-cxx11-abi-build: - if: ${{ github.repository_owner == 'pytorch' }} - uses: ./.github/workflows/_binary-build-linux.yml - with: - PYTORCH_ROOT: /pytorch - BUILDER_ROOT: /builder - PACKAGE_TYPE: libtorch - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 - GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:cuda11.3 - LIBTORCH_VARIANT: static-with-deps - DESIRED_DEVTOOLSET: cxx11-abi - build_name: libtorch-cuda11_3-static-with-deps-cxx11-abi - build_environment: linux-binary-libtorch-cxx11-abi - secrets: - github-token: ${{ secrets.GITHUB_TOKEN }} - - libtorch-cuda11_3-static-with-deps-cxx11-abi-test: # Testing - if: ${{ github.repository_owner == 'pytorch' }} - needs: libtorch-cuda11_3-static-with-deps-cxx11-abi-build - uses: ./.github/workflows/_binary-test-linux.yml - with: - PYTORCH_ROOT: /pytorch - BUILDER_ROOT: /builder - PACKAGE_TYPE: libtorch - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 - GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:cuda11.3 - LIBTORCH_VARIANT: static-with-deps - DESIRED_DEVTOOLSET: cxx11-abi - build_name: libtorch-cuda11_3-static-with-deps-cxx11-abi - build_environment: linux-binary-libtorch-cxx11-abi - runs_on: linux.4xlarge.nvidia.gpu - secrets: - github-token: ${{ secrets.GITHUB_TOKEN }} - libtorch-cuda11_3-static-with-deps-cxx11-abi-upload: # Uploading - if: ${{ github.repository_owner == 'pytorch' }} - needs: libtorch-cuda11_3-static-with-deps-cxx11-abi-test - with: - PYTORCH_ROOT: /pytorch - BUILDER_ROOT: /builder - PACKAGE_TYPE: libtorch - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 - GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:cuda11.3 - LIBTORCH_VARIANT: static-with-deps - DESIRED_DEVTOOLSET: cxx11-abi - build_name: libtorch-cuda11_3-static-with-deps-cxx11-abi - secrets: - github-token: ${{ secrets.GITHUB_TOKEN }} - aws-access-key-id: ${{ secrets.AWS_PYTORCH_UPLOADER_ACCESS_KEY_ID }} - aws-pytorch-uploader-secret-access-key: ${{ secrets.AWS_PYTORCH_UPLOADER_SECRET_ACCESS_KEY }} - conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} - uses: ./.github/workflows/_binary-upload.yml - libtorch-cuda11_3-static-without-deps-cxx11-abi-build: - if: ${{ github.repository_owner == 'pytorch' }} - uses: ./.github/workflows/_binary-build-linux.yml - with: - PYTORCH_ROOT: /pytorch - BUILDER_ROOT: /builder - PACKAGE_TYPE: libtorch - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 - GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:cuda11.3 - LIBTORCH_VARIANT: static-without-deps - DESIRED_DEVTOOLSET: cxx11-abi - build_name: libtorch-cuda11_3-static-without-deps-cxx11-abi - build_environment: linux-binary-libtorch-cxx11-abi - secrets: - github-token: ${{ secrets.GITHUB_TOKEN }} - - libtorch-cuda11_3-static-without-deps-cxx11-abi-test: # Testing - if: ${{ github.repository_owner == 'pytorch' }} - needs: libtorch-cuda11_3-static-without-deps-cxx11-abi-build - uses: ./.github/workflows/_binary-test-linux.yml - with: - PYTORCH_ROOT: /pytorch - BUILDER_ROOT: /builder - PACKAGE_TYPE: libtorch - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 - GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:cuda11.3 - LIBTORCH_VARIANT: static-without-deps - DESIRED_DEVTOOLSET: cxx11-abi - build_name: libtorch-cuda11_3-static-without-deps-cxx11-abi - build_environment: linux-binary-libtorch-cxx11-abi - runs_on: linux.4xlarge.nvidia.gpu - secrets: - github-token: ${{ secrets.GITHUB_TOKEN }} - libtorch-cuda11_3-static-without-deps-cxx11-abi-upload: # Uploading - if: ${{ github.repository_owner == 'pytorch' }} - needs: libtorch-cuda11_3-static-without-deps-cxx11-abi-test - with: - PYTORCH_ROOT: /pytorch - BUILDER_ROOT: /builder - PACKAGE_TYPE: libtorch - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 - GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:cuda11.3 - LIBTORCH_VARIANT: static-without-deps - DESIRED_DEVTOOLSET: cxx11-abi - build_name: libtorch-cuda11_3-static-without-deps-cxx11-abi - secrets: - github-token: ${{ secrets.GITHUB_TOKEN }} - aws-access-key-id: ${{ secrets.AWS_PYTORCH_UPLOADER_ACCESS_KEY_ID }} - aws-pytorch-uploader-secret-access-key: ${{ secrets.AWS_PYTORCH_UPLOADER_SECRET_ACCESS_KEY }} - conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} - uses: ./.github/workflows/_binary-upload.yml libtorch-cuda11_6-shared-with-deps-cxx11-abi-build: if: ${{ github.repository_owner == 'pytorch' }} uses: ./.github/workflows/_binary-build-linux.yml diff --git a/.github/workflows/generated-linux-binary-libtorch-pre-cxx11-nightly.yml b/.github/workflows/generated-linux-binary-libtorch-pre-cxx11-nightly.yml index 0bf3534290c2..23e39d3345ba 100644 --- a/.github/workflows/generated-linux-binary-libtorch-pre-cxx11-nightly.yml +++ b/.github/workflows/generated-linux-binary-libtorch-pre-cxx11-nightly.yml @@ -528,258 +528,6 @@ jobs: aws-pytorch-uploader-secret-access-key: ${{ secrets.AWS_PYTORCH_UPLOADER_SECRET_ACCESS_KEY }} conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} uses: ./.github/workflows/_binary-upload.yml - libtorch-cuda11_3-shared-with-deps-pre-cxx11-build: - if: ${{ github.repository_owner == 'pytorch' }} - uses: ./.github/workflows/_binary-build-linux.yml - with: - PYTORCH_ROOT: /pytorch - BUILDER_ROOT: /builder - PACKAGE_TYPE: libtorch - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 - GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.3 - LIBTORCH_VARIANT: shared-with-deps - DESIRED_DEVTOOLSET: pre-cxx11 - build_name: libtorch-cuda11_3-shared-with-deps-pre-cxx11 - build_environment: linux-binary-libtorch-pre-cxx11 - secrets: - github-token: ${{ secrets.GITHUB_TOKEN }} - - libtorch-cuda11_3-shared-with-deps-pre-cxx11-test: # Testing - if: ${{ github.repository_owner == 'pytorch' }} - needs: libtorch-cuda11_3-shared-with-deps-pre-cxx11-build - uses: ./.github/workflows/_binary-test-linux.yml - with: - PYTORCH_ROOT: /pytorch - BUILDER_ROOT: /builder - PACKAGE_TYPE: libtorch - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 - GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.3 - LIBTORCH_VARIANT: shared-with-deps - DESIRED_DEVTOOLSET: pre-cxx11 - build_name: libtorch-cuda11_3-shared-with-deps-pre-cxx11 - build_environment: linux-binary-libtorch-pre-cxx11 - runs_on: linux.4xlarge.nvidia.gpu - secrets: - github-token: ${{ secrets.GITHUB_TOKEN }} - libtorch-cuda11_3-shared-with-deps-pre-cxx11-upload: # Uploading - if: ${{ github.repository_owner == 'pytorch' }} - needs: libtorch-cuda11_3-shared-with-deps-pre-cxx11-test - with: - PYTORCH_ROOT: /pytorch - BUILDER_ROOT: /builder - PACKAGE_TYPE: libtorch - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 - GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.3 - LIBTORCH_VARIANT: shared-with-deps - DESIRED_DEVTOOLSET: pre-cxx11 - build_name: libtorch-cuda11_3-shared-with-deps-pre-cxx11 - secrets: - github-token: ${{ secrets.GITHUB_TOKEN }} - aws-access-key-id: ${{ secrets.AWS_PYTORCH_UPLOADER_ACCESS_KEY_ID }} - aws-pytorch-uploader-secret-access-key: ${{ secrets.AWS_PYTORCH_UPLOADER_SECRET_ACCESS_KEY }} - conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} - uses: ./.github/workflows/_binary-upload.yml - libtorch-cuda11_3-shared-without-deps-pre-cxx11-build: - if: ${{ github.repository_owner == 'pytorch' }} - uses: ./.github/workflows/_binary-build-linux.yml - with: - PYTORCH_ROOT: /pytorch - BUILDER_ROOT: /builder - PACKAGE_TYPE: libtorch - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 - GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.3 - LIBTORCH_VARIANT: shared-without-deps - DESIRED_DEVTOOLSET: pre-cxx11 - build_name: libtorch-cuda11_3-shared-without-deps-pre-cxx11 - build_environment: linux-binary-libtorch-pre-cxx11 - secrets: - github-token: ${{ secrets.GITHUB_TOKEN }} - - libtorch-cuda11_3-shared-without-deps-pre-cxx11-test: # Testing - if: ${{ github.repository_owner == 'pytorch' }} - needs: libtorch-cuda11_3-shared-without-deps-pre-cxx11-build - uses: ./.github/workflows/_binary-test-linux.yml - with: - PYTORCH_ROOT: /pytorch - BUILDER_ROOT: /builder - PACKAGE_TYPE: libtorch - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 - GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.3 - LIBTORCH_VARIANT: shared-without-deps - DESIRED_DEVTOOLSET: pre-cxx11 - build_name: libtorch-cuda11_3-shared-without-deps-pre-cxx11 - build_environment: linux-binary-libtorch-pre-cxx11 - runs_on: linux.4xlarge.nvidia.gpu - secrets: - github-token: ${{ secrets.GITHUB_TOKEN }} - libtorch-cuda11_3-shared-without-deps-pre-cxx11-upload: # Uploading - if: ${{ github.repository_owner == 'pytorch' }} - needs: libtorch-cuda11_3-shared-without-deps-pre-cxx11-test - with: - PYTORCH_ROOT: /pytorch - BUILDER_ROOT: /builder - PACKAGE_TYPE: libtorch - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 - GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.3 - LIBTORCH_VARIANT: shared-without-deps - DESIRED_DEVTOOLSET: pre-cxx11 - build_name: libtorch-cuda11_3-shared-without-deps-pre-cxx11 - secrets: - github-token: ${{ secrets.GITHUB_TOKEN }} - aws-access-key-id: ${{ secrets.AWS_PYTORCH_UPLOADER_ACCESS_KEY_ID }} - aws-pytorch-uploader-secret-access-key: ${{ secrets.AWS_PYTORCH_UPLOADER_SECRET_ACCESS_KEY }} - conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} - uses: ./.github/workflows/_binary-upload.yml - libtorch-cuda11_3-static-with-deps-pre-cxx11-build: - if: ${{ github.repository_owner == 'pytorch' }} - uses: ./.github/workflows/_binary-build-linux.yml - with: - PYTORCH_ROOT: /pytorch - BUILDER_ROOT: /builder - PACKAGE_TYPE: libtorch - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 - GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.3 - LIBTORCH_VARIANT: static-with-deps - DESIRED_DEVTOOLSET: pre-cxx11 - build_name: libtorch-cuda11_3-static-with-deps-pre-cxx11 - build_environment: linux-binary-libtorch-pre-cxx11 - secrets: - github-token: ${{ secrets.GITHUB_TOKEN }} - - libtorch-cuda11_3-static-with-deps-pre-cxx11-test: # Testing - if: ${{ github.repository_owner == 'pytorch' }} - needs: libtorch-cuda11_3-static-with-deps-pre-cxx11-build - uses: ./.github/workflows/_binary-test-linux.yml - with: - PYTORCH_ROOT: /pytorch - BUILDER_ROOT: /builder - PACKAGE_TYPE: libtorch - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 - GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.3 - LIBTORCH_VARIANT: static-with-deps - DESIRED_DEVTOOLSET: pre-cxx11 - build_name: libtorch-cuda11_3-static-with-deps-pre-cxx11 - build_environment: linux-binary-libtorch-pre-cxx11 - runs_on: linux.4xlarge.nvidia.gpu - secrets: - github-token: ${{ secrets.GITHUB_TOKEN }} - libtorch-cuda11_3-static-with-deps-pre-cxx11-upload: # Uploading - if: ${{ github.repository_owner == 'pytorch' }} - needs: libtorch-cuda11_3-static-with-deps-pre-cxx11-test - with: - PYTORCH_ROOT: /pytorch - BUILDER_ROOT: /builder - PACKAGE_TYPE: libtorch - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 - GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.3 - LIBTORCH_VARIANT: static-with-deps - DESIRED_DEVTOOLSET: pre-cxx11 - build_name: libtorch-cuda11_3-static-with-deps-pre-cxx11 - secrets: - github-token: ${{ secrets.GITHUB_TOKEN }} - aws-access-key-id: ${{ secrets.AWS_PYTORCH_UPLOADER_ACCESS_KEY_ID }} - aws-pytorch-uploader-secret-access-key: ${{ secrets.AWS_PYTORCH_UPLOADER_SECRET_ACCESS_KEY }} - conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} - uses: ./.github/workflows/_binary-upload.yml - libtorch-cuda11_3-static-without-deps-pre-cxx11-build: - if: ${{ github.repository_owner == 'pytorch' }} - uses: ./.github/workflows/_binary-build-linux.yml - with: - PYTORCH_ROOT: /pytorch - BUILDER_ROOT: /builder - PACKAGE_TYPE: libtorch - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 - GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.3 - LIBTORCH_VARIANT: static-without-deps - DESIRED_DEVTOOLSET: pre-cxx11 - build_name: libtorch-cuda11_3-static-without-deps-pre-cxx11 - build_environment: linux-binary-libtorch-pre-cxx11 - secrets: - github-token: ${{ secrets.GITHUB_TOKEN }} - - libtorch-cuda11_3-static-without-deps-pre-cxx11-test: # Testing - if: ${{ github.repository_owner == 'pytorch' }} - needs: libtorch-cuda11_3-static-without-deps-pre-cxx11-build - uses: ./.github/workflows/_binary-test-linux.yml - with: - PYTORCH_ROOT: /pytorch - BUILDER_ROOT: /builder - PACKAGE_TYPE: libtorch - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 - GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.3 - LIBTORCH_VARIANT: static-without-deps - DESIRED_DEVTOOLSET: pre-cxx11 - build_name: libtorch-cuda11_3-static-without-deps-pre-cxx11 - build_environment: linux-binary-libtorch-pre-cxx11 - runs_on: linux.4xlarge.nvidia.gpu - secrets: - github-token: ${{ secrets.GITHUB_TOKEN }} - libtorch-cuda11_3-static-without-deps-pre-cxx11-upload: # Uploading - if: ${{ github.repository_owner == 'pytorch' }} - needs: libtorch-cuda11_3-static-without-deps-pre-cxx11-test - with: - PYTORCH_ROOT: /pytorch - BUILDER_ROOT: /builder - PACKAGE_TYPE: libtorch - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 - GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.3 - LIBTORCH_VARIANT: static-without-deps - DESIRED_DEVTOOLSET: pre-cxx11 - build_name: libtorch-cuda11_3-static-without-deps-pre-cxx11 - secrets: - github-token: ${{ secrets.GITHUB_TOKEN }} - aws-access-key-id: ${{ secrets.AWS_PYTORCH_UPLOADER_ACCESS_KEY_ID }} - aws-pytorch-uploader-secret-access-key: ${{ secrets.AWS_PYTORCH_UPLOADER_SECRET_ACCESS_KEY }} - conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} - uses: ./.github/workflows/_binary-upload.yml libtorch-cuda11_6-shared-with-deps-pre-cxx11-build: if: ${{ github.repository_owner == 'pytorch' }} uses: ./.github/workflows/_binary-build-linux.yml diff --git a/.github/workflows/generated-linux-binary-manywheel-nightly.yml b/.github/workflows/generated-linux-binary-manywheel-nightly.yml index ba912b65fd14..3c94498cc3ad 100644 --- a/.github/workflows/generated-linux-binary-manywheel-nightly.yml +++ b/.github/workflows/generated-linux-binary-manywheel-nightly.yml @@ -153,66 +153,6 @@ jobs: aws-pytorch-uploader-secret-access-key: ${{ secrets.AWS_PYTORCH_UPLOADER_SECRET_ACCESS_KEY }} conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} uses: ./.github/workflows/_binary-upload.yml - manywheel-py3_7-cuda11_3-build: - if: ${{ github.repository_owner == 'pytorch' }} - uses: ./.github/workflows/_binary-build-linux.yml - with: - PYTORCH_ROOT: /pytorch - BUILDER_ROOT: /builder - PACKAGE_TYPE: manywheel - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 - GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.3 - DESIRED_PYTHON: "3.7" - build_name: manywheel-py3_7-cuda11_3 - build_environment: linux-binary-manywheel - secrets: - github-token: ${{ secrets.GITHUB_TOKEN }} - - manywheel-py3_7-cuda11_3-test: # Testing - if: ${{ github.repository_owner == 'pytorch' }} - needs: manywheel-py3_7-cuda11_3-build - uses: ./.github/workflows/_binary-test-linux.yml - with: - PYTORCH_ROOT: /pytorch - BUILDER_ROOT: /builder - PACKAGE_TYPE: manywheel - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 - GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.3 - DESIRED_PYTHON: "3.7" - build_name: manywheel-py3_7-cuda11_3 - build_environment: linux-binary-manywheel - runs_on: linux.4xlarge.nvidia.gpu - secrets: - github-token: ${{ secrets.GITHUB_TOKEN }} - manywheel-py3_7-cuda11_3-upload: # Uploading - if: ${{ github.repository_owner == 'pytorch' }} - needs: manywheel-py3_7-cuda11_3-test - with: - PYTORCH_ROOT: /pytorch - BUILDER_ROOT: /builder - PACKAGE_TYPE: manywheel - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 - GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.3 - DESIRED_PYTHON: "3.7" - build_name: manywheel-py3_7-cuda11_3 - secrets: - github-token: ${{ secrets.GITHUB_TOKEN }} - aws-access-key-id: ${{ secrets.AWS_PYTORCH_UPLOADER_ACCESS_KEY_ID }} - aws-pytorch-uploader-secret-access-key: ${{ secrets.AWS_PYTORCH_UPLOADER_SECRET_ACCESS_KEY }} - conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} - uses: ./.github/workflows/_binary-upload.yml manywheel-py3_7-cuda11_6-build: if: ${{ github.repository_owner == 'pytorch' }} uses: ./.github/workflows/_binary-build-linux.yml @@ -730,66 +670,6 @@ jobs: aws-pytorch-uploader-secret-access-key: ${{ secrets.AWS_PYTORCH_UPLOADER_SECRET_ACCESS_KEY }} conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} uses: ./.github/workflows/_binary-upload.yml - manywheel-py3_8-cuda11_3-build: - if: ${{ github.repository_owner == 'pytorch' }} - uses: ./.github/workflows/_binary-build-linux.yml - with: - PYTORCH_ROOT: /pytorch - BUILDER_ROOT: /builder - PACKAGE_TYPE: manywheel - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 - GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.3 - DESIRED_PYTHON: "3.8" - build_name: manywheel-py3_8-cuda11_3 - build_environment: linux-binary-manywheel - secrets: - github-token: ${{ secrets.GITHUB_TOKEN }} - - manywheel-py3_8-cuda11_3-test: # Testing - if: ${{ github.repository_owner == 'pytorch' }} - needs: manywheel-py3_8-cuda11_3-build - uses: ./.github/workflows/_binary-test-linux.yml - with: - PYTORCH_ROOT: /pytorch - BUILDER_ROOT: /builder - PACKAGE_TYPE: manywheel - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 - GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.3 - DESIRED_PYTHON: "3.8" - build_name: manywheel-py3_8-cuda11_3 - build_environment: linux-binary-manywheel - runs_on: linux.4xlarge.nvidia.gpu - secrets: - github-token: ${{ secrets.GITHUB_TOKEN }} - manywheel-py3_8-cuda11_3-upload: # Uploading - if: ${{ github.repository_owner == 'pytorch' }} - needs: manywheel-py3_8-cuda11_3-test - with: - PYTORCH_ROOT: /pytorch - BUILDER_ROOT: /builder - PACKAGE_TYPE: manywheel - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 - GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.3 - DESIRED_PYTHON: "3.8" - build_name: manywheel-py3_8-cuda11_3 - secrets: - github-token: ${{ secrets.GITHUB_TOKEN }} - aws-access-key-id: ${{ secrets.AWS_PYTORCH_UPLOADER_ACCESS_KEY_ID }} - aws-pytorch-uploader-secret-access-key: ${{ secrets.AWS_PYTORCH_UPLOADER_SECRET_ACCESS_KEY }} - conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} - uses: ./.github/workflows/_binary-upload.yml manywheel-py3_8-cuda11_6-build: if: ${{ github.repository_owner == 'pytorch' }} uses: ./.github/workflows/_binary-build-linux.yml @@ -1307,66 +1187,6 @@ jobs: aws-pytorch-uploader-secret-access-key: ${{ secrets.AWS_PYTORCH_UPLOADER_SECRET_ACCESS_KEY }} conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} uses: ./.github/workflows/_binary-upload.yml - manywheel-py3_9-cuda11_3-build: - if: ${{ github.repository_owner == 'pytorch' }} - uses: ./.github/workflows/_binary-build-linux.yml - with: - PYTORCH_ROOT: /pytorch - BUILDER_ROOT: /builder - PACKAGE_TYPE: manywheel - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 - GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.3 - DESIRED_PYTHON: "3.9" - build_name: manywheel-py3_9-cuda11_3 - build_environment: linux-binary-manywheel - secrets: - github-token: ${{ secrets.GITHUB_TOKEN }} - - manywheel-py3_9-cuda11_3-test: # Testing - if: ${{ github.repository_owner == 'pytorch' }} - needs: manywheel-py3_9-cuda11_3-build - uses: ./.github/workflows/_binary-test-linux.yml - with: - PYTORCH_ROOT: /pytorch - BUILDER_ROOT: /builder - PACKAGE_TYPE: manywheel - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 - GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.3 - DESIRED_PYTHON: "3.9" - build_name: manywheel-py3_9-cuda11_3 - build_environment: linux-binary-manywheel - runs_on: linux.4xlarge.nvidia.gpu - secrets: - github-token: ${{ secrets.GITHUB_TOKEN }} - manywheel-py3_9-cuda11_3-upload: # Uploading - if: ${{ github.repository_owner == 'pytorch' }} - needs: manywheel-py3_9-cuda11_3-test - with: - PYTORCH_ROOT: /pytorch - BUILDER_ROOT: /builder - PACKAGE_TYPE: manywheel - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 - GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.3 - DESIRED_PYTHON: "3.9" - build_name: manywheel-py3_9-cuda11_3 - secrets: - github-token: ${{ secrets.GITHUB_TOKEN }} - aws-access-key-id: ${{ secrets.AWS_PYTORCH_UPLOADER_ACCESS_KEY_ID }} - aws-pytorch-uploader-secret-access-key: ${{ secrets.AWS_PYTORCH_UPLOADER_SECRET_ACCESS_KEY }} - conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} - uses: ./.github/workflows/_binary-upload.yml manywheel-py3_9-cuda11_6-build: if: ${{ github.repository_owner == 'pytorch' }} uses: ./.github/workflows/_binary-build-linux.yml @@ -1884,66 +1704,6 @@ jobs: aws-pytorch-uploader-secret-access-key: ${{ secrets.AWS_PYTORCH_UPLOADER_SECRET_ACCESS_KEY }} conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} uses: ./.github/workflows/_binary-upload.yml - manywheel-py3_10-cuda11_3-build: - if: ${{ github.repository_owner == 'pytorch' }} - uses: ./.github/workflows/_binary-build-linux.yml - with: - PYTORCH_ROOT: /pytorch - BUILDER_ROOT: /builder - PACKAGE_TYPE: manywheel - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 - GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.3 - DESIRED_PYTHON: "3.10" - build_name: manywheel-py3_10-cuda11_3 - build_environment: linux-binary-manywheel - secrets: - github-token: ${{ secrets.GITHUB_TOKEN }} - - manywheel-py3_10-cuda11_3-test: # Testing - if: ${{ github.repository_owner == 'pytorch' }} - needs: manywheel-py3_10-cuda11_3-build - uses: ./.github/workflows/_binary-test-linux.yml - with: - PYTORCH_ROOT: /pytorch - BUILDER_ROOT: /builder - PACKAGE_TYPE: manywheel - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 - GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.3 - DESIRED_PYTHON: "3.10" - build_name: manywheel-py3_10-cuda11_3 - build_environment: linux-binary-manywheel - runs_on: linux.4xlarge.nvidia.gpu - secrets: - github-token: ${{ secrets.GITHUB_TOKEN }} - manywheel-py3_10-cuda11_3-upload: # Uploading - if: ${{ github.repository_owner == 'pytorch' }} - needs: manywheel-py3_10-cuda11_3-test - with: - PYTORCH_ROOT: /pytorch - BUILDER_ROOT: /builder - PACKAGE_TYPE: manywheel - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 - GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.3 - DESIRED_PYTHON: "3.10" - build_name: manywheel-py3_10-cuda11_3 - secrets: - github-token: ${{ secrets.GITHUB_TOKEN }} - aws-access-key-id: ${{ secrets.AWS_PYTORCH_UPLOADER_ACCESS_KEY_ID }} - aws-pytorch-uploader-secret-access-key: ${{ secrets.AWS_PYTORCH_UPLOADER_SECRET_ACCESS_KEY }} - conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} - uses: ./.github/workflows/_binary-upload.yml manywheel-py3_10-cuda11_6-build: if: ${{ github.repository_owner == 'pytorch' }} uses: ./.github/workflows/_binary-build-linux.yml @@ -2461,66 +2221,6 @@ jobs: aws-pytorch-uploader-secret-access-key: ${{ secrets.AWS_PYTORCH_UPLOADER_SECRET_ACCESS_KEY }} conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} uses: ./.github/workflows/_binary-upload.yml - manywheel-py3_11-cuda11_3-build: - if: ${{ github.repository_owner == 'pytorch' }} - uses: ./.github/workflows/_binary-build-linux.yml - with: - PYTORCH_ROOT: /pytorch - BUILDER_ROOT: /builder - PACKAGE_TYPE: manywheel - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 - GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.3 - DESIRED_PYTHON: "3.11" - build_name: manywheel-py3_11-cuda11_3 - build_environment: linux-binary-manywheel - secrets: - github-token: ${{ secrets.GITHUB_TOKEN }} - - manywheel-py3_11-cuda11_3-test: # Testing - if: ${{ github.repository_owner == 'pytorch' }} - needs: manywheel-py3_11-cuda11_3-build - uses: ./.github/workflows/_binary-test-linux.yml - with: - PYTORCH_ROOT: /pytorch - BUILDER_ROOT: /builder - PACKAGE_TYPE: manywheel - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 - GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.3 - DESIRED_PYTHON: "3.11" - build_name: manywheel-py3_11-cuda11_3 - build_environment: linux-binary-manywheel - runs_on: linux.4xlarge.nvidia.gpu - secrets: - github-token: ${{ secrets.GITHUB_TOKEN }} - manywheel-py3_11-cuda11_3-upload: # Uploading - if: ${{ github.repository_owner == 'pytorch' }} - needs: manywheel-py3_11-cuda11_3-test - with: - PYTORCH_ROOT: /pytorch - BUILDER_ROOT: /builder - PACKAGE_TYPE: manywheel - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 - GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.3 - DESIRED_PYTHON: "3.11" - build_name: manywheel-py3_11-cuda11_3 - secrets: - github-token: ${{ secrets.GITHUB_TOKEN }} - aws-access-key-id: ${{ secrets.AWS_PYTORCH_UPLOADER_ACCESS_KEY_ID }} - aws-pytorch-uploader-secret-access-key: ${{ secrets.AWS_PYTORCH_UPLOADER_SECRET_ACCESS_KEY }} - conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} - uses: ./.github/workflows/_binary-upload.yml manywheel-py3_11-cuda11_6-build: if: ${{ github.repository_owner == 'pytorch' }} uses: ./.github/workflows/_binary-build-linux.yml diff --git a/.github/workflows/generated-windows-binary-conda-nightly.yml b/.github/workflows/generated-windows-binary-conda-nightly.yml index 9b78d6139700..df7cc13d8a26 100644 --- a/.github/workflows/generated-windows-binary-conda-nightly.yml +++ b/.github/workflows/generated-windows-binary-conda-nightly.yml @@ -256,7 +256,7 @@ jobs: aws-pytorch-uploader-secret-access-key: ${{ secrets.AWS_PYTORCH_UPLOADER_SECRET_ACCESS_KEY }} conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} uses: ./.github/workflows/_binary-upload.yml - conda-py3_7-cuda11_3-build: + conda-py3_7-cuda11_6-build: if: ${{ github.repository_owner == 'pytorch' }} runs-on: windows.4xlarge timeout-minutes: 240 @@ -266,8 +266,8 @@ jobs: PACKAGE_TYPE: conda # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 + DESIRED_CUDA: cu116 + GPU_ARCH_VERSION: 11.6 GPU_ARCH_TYPE: cuda SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.7" @@ -343,7 +343,7 @@ jobs: - uses: actions/upload-artifact@v3 if: always() with: - name: conda-py3_7-cuda11_3 + name: conda-py3_7-cuda11_6 retention-days: 14 if-no-files-found: error path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" @@ -360,9 +360,9 @@ jobs: if: always() run: | .github\scripts\kill_active_ssh_sessions.ps1 - conda-py3_7-cuda11_3-test: # Testing + conda-py3_7-cuda11_6-test: # Testing if: ${{ github.repository_owner == 'pytorch' }} - needs: conda-py3_7-cuda11_3-build + needs: conda-py3_7-cuda11_6-build runs-on: windows.8xlarge.nvidia.gpu timeout-minutes: 240 env: @@ -371,8 +371,8 @@ jobs: PACKAGE_TYPE: conda # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 + DESIRED_CUDA: cu116 + GPU_ARCH_VERSION: 11.6 GPU_ARCH_TYPE: cuda SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.7" @@ -417,7 +417,7 @@ jobs: - uses: actions/download-artifact@v3 name: Download Build Artifacts with: - name: conda-py3_7-cuda11_3 + name: conda-py3_7-cuda11_6 path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - name: Checkout PyTorch uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 @@ -463,27 +463,27 @@ jobs: if: always() run: | .github\scripts\kill_active_ssh_sessions.ps1 - conda-py3_7-cuda11_3-upload: # Uploading + conda-py3_7-cuda11_6-upload: # Uploading if: ${{ github.repository_owner == 'pytorch' }} - needs: conda-py3_7-cuda11_3-test + needs: conda-py3_7-cuda11_6-test with: PYTORCH_ROOT: ${{ github.workspace }}/pytorch BUILDER_ROOT: ${{ github.workspace }}/builder PACKAGE_TYPE: conda # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 + DESIRED_CUDA: cu116 + GPU_ARCH_VERSION: 11.6 GPU_ARCH_TYPE: cuda DESIRED_PYTHON: "3.7" - build_name: conda-py3_7-cuda11_3 + build_name: conda-py3_7-cuda11_6 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} aws-access-key-id: ${{ secrets.AWS_PYTORCH_UPLOADER_ACCESS_KEY_ID }} aws-pytorch-uploader-secret-access-key: ${{ secrets.AWS_PYTORCH_UPLOADER_SECRET_ACCESS_KEY }} conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} uses: ./.github/workflows/_binary-upload.yml - conda-py3_7-cuda11_6-build: + conda-py3_7-cuda11_7-build: if: ${{ github.repository_owner == 'pytorch' }} runs-on: windows.4xlarge timeout-minutes: 240 @@ -493,8 +493,8 @@ jobs: PACKAGE_TYPE: conda # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu116 - GPU_ARCH_VERSION: 11.6 + DESIRED_CUDA: cu117 + GPU_ARCH_VERSION: 11.7 GPU_ARCH_TYPE: cuda SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.7" @@ -570,7 +570,7 @@ jobs: - uses: actions/upload-artifact@v3 if: always() with: - name: conda-py3_7-cuda11_6 + name: conda-py3_7-cuda11_7 retention-days: 14 if-no-files-found: error path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" @@ -587,9 +587,9 @@ jobs: if: always() run: | .github\scripts\kill_active_ssh_sessions.ps1 - conda-py3_7-cuda11_6-test: # Testing + conda-py3_7-cuda11_7-test: # Testing if: ${{ github.repository_owner == 'pytorch' }} - needs: conda-py3_7-cuda11_6-build + needs: conda-py3_7-cuda11_7-build runs-on: windows.8xlarge.nvidia.gpu timeout-minutes: 240 env: @@ -598,8 +598,8 @@ jobs: PACKAGE_TYPE: conda # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu116 - GPU_ARCH_VERSION: 11.6 + DESIRED_CUDA: cu117 + GPU_ARCH_VERSION: 11.7 GPU_ARCH_TYPE: cuda SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.7" @@ -644,7 +644,7 @@ jobs: - uses: actions/download-artifact@v3 name: Download Build Artifacts with: - name: conda-py3_7-cuda11_6 + name: conda-py3_7-cuda11_7 path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - name: Checkout PyTorch uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 @@ -690,27 +690,27 @@ jobs: if: always() run: | .github\scripts\kill_active_ssh_sessions.ps1 - conda-py3_7-cuda11_6-upload: # Uploading + conda-py3_7-cuda11_7-upload: # Uploading if: ${{ github.repository_owner == 'pytorch' }} - needs: conda-py3_7-cuda11_6-test + needs: conda-py3_7-cuda11_7-test with: PYTORCH_ROOT: ${{ github.workspace }}/pytorch BUILDER_ROOT: ${{ github.workspace }}/builder PACKAGE_TYPE: conda # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu116 - GPU_ARCH_VERSION: 11.6 + DESIRED_CUDA: cu117 + GPU_ARCH_VERSION: 11.7 GPU_ARCH_TYPE: cuda DESIRED_PYTHON: "3.7" - build_name: conda-py3_7-cuda11_6 + build_name: conda-py3_7-cuda11_7 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} aws-access-key-id: ${{ secrets.AWS_PYTORCH_UPLOADER_ACCESS_KEY_ID }} aws-pytorch-uploader-secret-access-key: ${{ secrets.AWS_PYTORCH_UPLOADER_SECRET_ACCESS_KEY }} conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} uses: ./.github/workflows/_binary-upload.yml - conda-py3_7-cuda11_7-build: + conda-py3_8-cpu-build: if: ${{ github.repository_owner == 'pytorch' }} runs-on: windows.4xlarge timeout-minutes: 240 @@ -720,11 +720,10 @@ jobs: PACKAGE_TYPE: conda # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu117 - GPU_ARCH_VERSION: 11.7 - GPU_ARCH_TYPE: cuda + DESIRED_CUDA: cpu + GPU_ARCH_TYPE: cpu SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.7" + DESIRED_PYTHON: "3.8" steps: - name: Display EC2 information shell: bash @@ -797,7 +796,7 @@ jobs: - uses: actions/upload-artifact@v3 if: always() with: - name: conda-py3_7-cuda11_7 + name: conda-py3_8-cpu retention-days: 14 if-no-files-found: error path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" @@ -814,10 +813,10 @@ jobs: if: always() run: | .github\scripts\kill_active_ssh_sessions.ps1 - conda-py3_7-cuda11_7-test: # Testing + conda-py3_8-cpu-test: # Testing if: ${{ github.repository_owner == 'pytorch' }} - needs: conda-py3_7-cuda11_7-build - runs-on: windows.8xlarge.nvidia.gpu + needs: conda-py3_8-cpu-build + runs-on: windows.4xlarge timeout-minutes: 240 env: PYTORCH_ROOT: ${{ github.workspace }}/pytorch @@ -825,11 +824,10 @@ jobs: PACKAGE_TYPE: conda # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu117 - GPU_ARCH_VERSION: 11.7 - GPU_ARCH_TYPE: cuda + DESIRED_CUDA: cpu + GPU_ARCH_TYPE: cpu SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.7" + DESIRED_PYTHON: "3.8" steps: - name: Display EC2 information shell: bash @@ -871,7 +869,7 @@ jobs: - uses: actions/download-artifact@v3 name: Download Build Artifacts with: - name: conda-py3_7-cuda11_7 + name: conda-py3_8-cpu path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - name: Checkout PyTorch uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 @@ -917,27 +915,26 @@ jobs: if: always() run: | .github\scripts\kill_active_ssh_sessions.ps1 - conda-py3_7-cuda11_7-upload: # Uploading + conda-py3_8-cpu-upload: # Uploading if: ${{ github.repository_owner == 'pytorch' }} - needs: conda-py3_7-cuda11_7-test + needs: conda-py3_8-cpu-test with: PYTORCH_ROOT: ${{ github.workspace }}/pytorch BUILDER_ROOT: ${{ github.workspace }}/builder PACKAGE_TYPE: conda # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu117 - GPU_ARCH_VERSION: 11.7 - GPU_ARCH_TYPE: cuda - DESIRED_PYTHON: "3.7" - build_name: conda-py3_7-cuda11_7 + DESIRED_CUDA: cpu + GPU_ARCH_TYPE: cpu + DESIRED_PYTHON: "3.8" + build_name: conda-py3_8-cpu secrets: github-token: ${{ secrets.GITHUB_TOKEN }} aws-access-key-id: ${{ secrets.AWS_PYTORCH_UPLOADER_ACCESS_KEY_ID }} aws-pytorch-uploader-secret-access-key: ${{ secrets.AWS_PYTORCH_UPLOADER_SECRET_ACCESS_KEY }} conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} uses: ./.github/workflows/_binary-upload.yml - conda-py3_8-cpu-build: + conda-py3_8-cuda11_6-build: if: ${{ github.repository_owner == 'pytorch' }} runs-on: windows.4xlarge timeout-minutes: 240 @@ -947,8 +944,9 @@ jobs: PACKAGE_TYPE: conda # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cpu - GPU_ARCH_TYPE: cpu + DESIRED_CUDA: cu116 + GPU_ARCH_VERSION: 11.6 + GPU_ARCH_TYPE: cuda SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.8" steps: @@ -1023,7 +1021,7 @@ jobs: - uses: actions/upload-artifact@v3 if: always() with: - name: conda-py3_8-cpu + name: conda-py3_8-cuda11_6 retention-days: 14 if-no-files-found: error path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" @@ -1040,10 +1038,10 @@ jobs: if: always() run: | .github\scripts\kill_active_ssh_sessions.ps1 - conda-py3_8-cpu-test: # Testing + conda-py3_8-cuda11_6-test: # Testing if: ${{ github.repository_owner == 'pytorch' }} - needs: conda-py3_8-cpu-build - runs-on: windows.4xlarge + needs: conda-py3_8-cuda11_6-build + runs-on: windows.8xlarge.nvidia.gpu timeout-minutes: 240 env: PYTORCH_ROOT: ${{ github.workspace }}/pytorch @@ -1051,8 +1049,9 @@ jobs: PACKAGE_TYPE: conda # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cpu - GPU_ARCH_TYPE: cpu + DESIRED_CUDA: cu116 + GPU_ARCH_VERSION: 11.6 + GPU_ARCH_TYPE: cuda SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.8" steps: @@ -1096,7 +1095,7 @@ jobs: - uses: actions/download-artifact@v3 name: Download Build Artifacts with: - name: conda-py3_8-cpu + name: conda-py3_8-cuda11_6 path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - name: Checkout PyTorch uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 @@ -1142,26 +1141,27 @@ jobs: if: always() run: | .github\scripts\kill_active_ssh_sessions.ps1 - conda-py3_8-cpu-upload: # Uploading + conda-py3_8-cuda11_6-upload: # Uploading if: ${{ github.repository_owner == 'pytorch' }} - needs: conda-py3_8-cpu-test + needs: conda-py3_8-cuda11_6-test with: PYTORCH_ROOT: ${{ github.workspace }}/pytorch BUILDER_ROOT: ${{ github.workspace }}/builder PACKAGE_TYPE: conda # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cpu - GPU_ARCH_TYPE: cpu + DESIRED_CUDA: cu116 + GPU_ARCH_VERSION: 11.6 + GPU_ARCH_TYPE: cuda DESIRED_PYTHON: "3.8" - build_name: conda-py3_8-cpu + build_name: conda-py3_8-cuda11_6 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} aws-access-key-id: ${{ secrets.AWS_PYTORCH_UPLOADER_ACCESS_KEY_ID }} aws-pytorch-uploader-secret-access-key: ${{ secrets.AWS_PYTORCH_UPLOADER_SECRET_ACCESS_KEY }} conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} uses: ./.github/workflows/_binary-upload.yml - conda-py3_8-cuda11_3-build: + conda-py3_8-cuda11_7-build: if: ${{ github.repository_owner == 'pytorch' }} runs-on: windows.4xlarge timeout-minutes: 240 @@ -1171,8 +1171,8 @@ jobs: PACKAGE_TYPE: conda # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 + DESIRED_CUDA: cu117 + GPU_ARCH_VERSION: 11.7 GPU_ARCH_TYPE: cuda SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.8" @@ -1248,7 +1248,7 @@ jobs: - uses: actions/upload-artifact@v3 if: always() with: - name: conda-py3_8-cuda11_3 + name: conda-py3_8-cuda11_7 retention-days: 14 if-no-files-found: error path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" @@ -1265,9 +1265,9 @@ jobs: if: always() run: | .github\scripts\kill_active_ssh_sessions.ps1 - conda-py3_8-cuda11_3-test: # Testing + conda-py3_8-cuda11_7-test: # Testing if: ${{ github.repository_owner == 'pytorch' }} - needs: conda-py3_8-cuda11_3-build + needs: conda-py3_8-cuda11_7-build runs-on: windows.8xlarge.nvidia.gpu timeout-minutes: 240 env: @@ -1276,8 +1276,8 @@ jobs: PACKAGE_TYPE: conda # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 + DESIRED_CUDA: cu117 + GPU_ARCH_VERSION: 11.7 GPU_ARCH_TYPE: cuda SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.8" @@ -1322,7 +1322,7 @@ jobs: - uses: actions/download-artifact@v3 name: Download Build Artifacts with: - name: conda-py3_8-cuda11_3 + name: conda-py3_8-cuda11_7 path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - name: Checkout PyTorch uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 @@ -1368,27 +1368,27 @@ jobs: if: always() run: | .github\scripts\kill_active_ssh_sessions.ps1 - conda-py3_8-cuda11_3-upload: # Uploading + conda-py3_8-cuda11_7-upload: # Uploading if: ${{ github.repository_owner == 'pytorch' }} - needs: conda-py3_8-cuda11_3-test + needs: conda-py3_8-cuda11_7-test with: PYTORCH_ROOT: ${{ github.workspace }}/pytorch BUILDER_ROOT: ${{ github.workspace }}/builder PACKAGE_TYPE: conda # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 + DESIRED_CUDA: cu117 + GPU_ARCH_VERSION: 11.7 GPU_ARCH_TYPE: cuda DESIRED_PYTHON: "3.8" - build_name: conda-py3_8-cuda11_3 + build_name: conda-py3_8-cuda11_7 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} aws-access-key-id: ${{ secrets.AWS_PYTORCH_UPLOADER_ACCESS_KEY_ID }} aws-pytorch-uploader-secret-access-key: ${{ secrets.AWS_PYTORCH_UPLOADER_SECRET_ACCESS_KEY }} conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} uses: ./.github/workflows/_binary-upload.yml - conda-py3_8-cuda11_6-build: + conda-py3_9-cpu-build: if: ${{ github.repository_owner == 'pytorch' }} runs-on: windows.4xlarge timeout-minutes: 240 @@ -1398,11 +1398,10 @@ jobs: PACKAGE_TYPE: conda # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu116 - GPU_ARCH_VERSION: 11.6 - GPU_ARCH_TYPE: cuda + DESIRED_CUDA: cpu + GPU_ARCH_TYPE: cpu SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.8" + DESIRED_PYTHON: "3.9" steps: - name: Display EC2 information shell: bash @@ -1475,7 +1474,7 @@ jobs: - uses: actions/upload-artifact@v3 if: always() with: - name: conda-py3_8-cuda11_6 + name: conda-py3_9-cpu retention-days: 14 if-no-files-found: error path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" @@ -1492,10 +1491,10 @@ jobs: if: always() run: | .github\scripts\kill_active_ssh_sessions.ps1 - conda-py3_8-cuda11_6-test: # Testing + conda-py3_9-cpu-test: # Testing if: ${{ github.repository_owner == 'pytorch' }} - needs: conda-py3_8-cuda11_6-build - runs-on: windows.8xlarge.nvidia.gpu + needs: conda-py3_9-cpu-build + runs-on: windows.4xlarge timeout-minutes: 240 env: PYTORCH_ROOT: ${{ github.workspace }}/pytorch @@ -1503,11 +1502,10 @@ jobs: PACKAGE_TYPE: conda # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu116 - GPU_ARCH_VERSION: 11.6 - GPU_ARCH_TYPE: cuda + DESIRED_CUDA: cpu + GPU_ARCH_TYPE: cpu SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.8" + DESIRED_PYTHON: "3.9" steps: - name: Display EC2 information shell: bash @@ -1549,7 +1547,7 @@ jobs: - uses: actions/download-artifact@v3 name: Download Build Artifacts with: - name: conda-py3_8-cuda11_6 + name: conda-py3_9-cpu path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - name: Checkout PyTorch uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 @@ -1595,27 +1593,26 @@ jobs: if: always() run: | .github\scripts\kill_active_ssh_sessions.ps1 - conda-py3_8-cuda11_6-upload: # Uploading + conda-py3_9-cpu-upload: # Uploading if: ${{ github.repository_owner == 'pytorch' }} - needs: conda-py3_8-cuda11_6-test + needs: conda-py3_9-cpu-test with: PYTORCH_ROOT: ${{ github.workspace }}/pytorch BUILDER_ROOT: ${{ github.workspace }}/builder PACKAGE_TYPE: conda # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu116 - GPU_ARCH_VERSION: 11.6 - GPU_ARCH_TYPE: cuda - DESIRED_PYTHON: "3.8" - build_name: conda-py3_8-cuda11_6 + DESIRED_CUDA: cpu + GPU_ARCH_TYPE: cpu + DESIRED_PYTHON: "3.9" + build_name: conda-py3_9-cpu secrets: github-token: ${{ secrets.GITHUB_TOKEN }} aws-access-key-id: ${{ secrets.AWS_PYTORCH_UPLOADER_ACCESS_KEY_ID }} aws-pytorch-uploader-secret-access-key: ${{ secrets.AWS_PYTORCH_UPLOADER_SECRET_ACCESS_KEY }} conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} uses: ./.github/workflows/_binary-upload.yml - conda-py3_8-cuda11_7-build: + conda-py3_9-cuda11_6-build: if: ${{ github.repository_owner == 'pytorch' }} runs-on: windows.4xlarge timeout-minutes: 240 @@ -1625,11 +1622,11 @@ jobs: PACKAGE_TYPE: conda # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu117 - GPU_ARCH_VERSION: 11.7 + DESIRED_CUDA: cu116 + GPU_ARCH_VERSION: 11.6 GPU_ARCH_TYPE: cuda SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.8" + DESIRED_PYTHON: "3.9" steps: - name: Display EC2 information shell: bash @@ -1702,7 +1699,7 @@ jobs: - uses: actions/upload-artifact@v3 if: always() with: - name: conda-py3_8-cuda11_7 + name: conda-py3_9-cuda11_6 retention-days: 14 if-no-files-found: error path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" @@ -1719,9 +1716,9 @@ jobs: if: always() run: | .github\scripts\kill_active_ssh_sessions.ps1 - conda-py3_8-cuda11_7-test: # Testing + conda-py3_9-cuda11_6-test: # Testing if: ${{ github.repository_owner == 'pytorch' }} - needs: conda-py3_8-cuda11_7-build + needs: conda-py3_9-cuda11_6-build runs-on: windows.8xlarge.nvidia.gpu timeout-minutes: 240 env: @@ -1730,11 +1727,11 @@ jobs: PACKAGE_TYPE: conda # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu117 - GPU_ARCH_VERSION: 11.7 + DESIRED_CUDA: cu116 + GPU_ARCH_VERSION: 11.6 GPU_ARCH_TYPE: cuda SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.8" + DESIRED_PYTHON: "3.9" steps: - name: Display EC2 information shell: bash @@ -1776,7 +1773,7 @@ jobs: - uses: actions/download-artifact@v3 name: Download Build Artifacts with: - name: conda-py3_8-cuda11_7 + name: conda-py3_9-cuda11_6 path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - name: Checkout PyTorch uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 @@ -1822,27 +1819,27 @@ jobs: if: always() run: | .github\scripts\kill_active_ssh_sessions.ps1 - conda-py3_8-cuda11_7-upload: # Uploading + conda-py3_9-cuda11_6-upload: # Uploading if: ${{ github.repository_owner == 'pytorch' }} - needs: conda-py3_8-cuda11_7-test + needs: conda-py3_9-cuda11_6-test with: PYTORCH_ROOT: ${{ github.workspace }}/pytorch BUILDER_ROOT: ${{ github.workspace }}/builder PACKAGE_TYPE: conda # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu117 - GPU_ARCH_VERSION: 11.7 + DESIRED_CUDA: cu116 + GPU_ARCH_VERSION: 11.6 GPU_ARCH_TYPE: cuda - DESIRED_PYTHON: "3.8" - build_name: conda-py3_8-cuda11_7 + DESIRED_PYTHON: "3.9" + build_name: conda-py3_9-cuda11_6 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} aws-access-key-id: ${{ secrets.AWS_PYTORCH_UPLOADER_ACCESS_KEY_ID }} aws-pytorch-uploader-secret-access-key: ${{ secrets.AWS_PYTORCH_UPLOADER_SECRET_ACCESS_KEY }} conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} uses: ./.github/workflows/_binary-upload.yml - conda-py3_9-cpu-build: + conda-py3_9-cuda11_7-build: if: ${{ github.repository_owner == 'pytorch' }} runs-on: windows.4xlarge timeout-minutes: 240 @@ -1852,8 +1849,9 @@ jobs: PACKAGE_TYPE: conda # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cpu - GPU_ARCH_TYPE: cpu + DESIRED_CUDA: cu117 + GPU_ARCH_VERSION: 11.7 + GPU_ARCH_TYPE: cuda SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.9" steps: @@ -1928,7 +1926,7 @@ jobs: - uses: actions/upload-artifact@v3 if: always() with: - name: conda-py3_9-cpu + name: conda-py3_9-cuda11_7 retention-days: 14 if-no-files-found: error path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" @@ -1945,10 +1943,10 @@ jobs: if: always() run: | .github\scripts\kill_active_ssh_sessions.ps1 - conda-py3_9-cpu-test: # Testing + conda-py3_9-cuda11_7-test: # Testing if: ${{ github.repository_owner == 'pytorch' }} - needs: conda-py3_9-cpu-build - runs-on: windows.4xlarge + needs: conda-py3_9-cuda11_7-build + runs-on: windows.8xlarge.nvidia.gpu timeout-minutes: 240 env: PYTORCH_ROOT: ${{ github.workspace }}/pytorch @@ -1956,8 +1954,9 @@ jobs: PACKAGE_TYPE: conda # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cpu - GPU_ARCH_TYPE: cpu + DESIRED_CUDA: cu117 + GPU_ARCH_VERSION: 11.7 + GPU_ARCH_TYPE: cuda SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.9" steps: @@ -2001,7 +2000,7 @@ jobs: - uses: actions/download-artifact@v3 name: Download Build Artifacts with: - name: conda-py3_9-cpu + name: conda-py3_9-cuda11_7 path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - name: Checkout PyTorch uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 @@ -2047,26 +2046,27 @@ jobs: if: always() run: | .github\scripts\kill_active_ssh_sessions.ps1 - conda-py3_9-cpu-upload: # Uploading + conda-py3_9-cuda11_7-upload: # Uploading if: ${{ github.repository_owner == 'pytorch' }} - needs: conda-py3_9-cpu-test + needs: conda-py3_9-cuda11_7-test with: PYTORCH_ROOT: ${{ github.workspace }}/pytorch BUILDER_ROOT: ${{ github.workspace }}/builder PACKAGE_TYPE: conda # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cpu - GPU_ARCH_TYPE: cpu + DESIRED_CUDA: cu117 + GPU_ARCH_VERSION: 11.7 + GPU_ARCH_TYPE: cuda DESIRED_PYTHON: "3.9" - build_name: conda-py3_9-cpu + build_name: conda-py3_9-cuda11_7 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} aws-access-key-id: ${{ secrets.AWS_PYTORCH_UPLOADER_ACCESS_KEY_ID }} aws-pytorch-uploader-secret-access-key: ${{ secrets.AWS_PYTORCH_UPLOADER_SECRET_ACCESS_KEY }} conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} uses: ./.github/workflows/_binary-upload.yml - conda-py3_9-cuda11_3-build: + conda-py3_10-cpu-build: if: ${{ github.repository_owner == 'pytorch' }} runs-on: windows.4xlarge timeout-minutes: 240 @@ -2076,11 +2076,10 @@ jobs: PACKAGE_TYPE: conda # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 - GPU_ARCH_TYPE: cuda + DESIRED_CUDA: cpu + GPU_ARCH_TYPE: cpu SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.9" + DESIRED_PYTHON: "3.10" steps: - name: Display EC2 information shell: bash @@ -2153,7 +2152,7 @@ jobs: - uses: actions/upload-artifact@v3 if: always() with: - name: conda-py3_9-cuda11_3 + name: conda-py3_10-cpu retention-days: 14 if-no-files-found: error path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" @@ -2170,10 +2169,10 @@ jobs: if: always() run: | .github\scripts\kill_active_ssh_sessions.ps1 - conda-py3_9-cuda11_3-test: # Testing + conda-py3_10-cpu-test: # Testing if: ${{ github.repository_owner == 'pytorch' }} - needs: conda-py3_9-cuda11_3-build - runs-on: windows.8xlarge.nvidia.gpu + needs: conda-py3_10-cpu-build + runs-on: windows.4xlarge timeout-minutes: 240 env: PYTORCH_ROOT: ${{ github.workspace }}/pytorch @@ -2181,11 +2180,10 @@ jobs: PACKAGE_TYPE: conda # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 - GPU_ARCH_TYPE: cuda + DESIRED_CUDA: cpu + GPU_ARCH_TYPE: cpu SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.9" + DESIRED_PYTHON: "3.10" steps: - name: Display EC2 information shell: bash @@ -2227,7 +2225,7 @@ jobs: - uses: actions/download-artifact@v3 name: Download Build Artifacts with: - name: conda-py3_9-cuda11_3 + name: conda-py3_10-cpu path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - name: Checkout PyTorch uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 @@ -2273,688 +2271,9 @@ jobs: if: always() run: | .github\scripts\kill_active_ssh_sessions.ps1 - conda-py3_9-cuda11_3-upload: # Uploading + conda-py3_10-cpu-upload: # Uploading if: ${{ github.repository_owner == 'pytorch' }} - needs: conda-py3_9-cuda11_3-test - with: - PYTORCH_ROOT: ${{ github.workspace }}/pytorch - BUILDER_ROOT: ${{ github.workspace }}/builder - PACKAGE_TYPE: conda - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 - GPU_ARCH_TYPE: cuda - DESIRED_PYTHON: "3.9" - build_name: conda-py3_9-cuda11_3 - secrets: - github-token: ${{ secrets.GITHUB_TOKEN }} - aws-access-key-id: ${{ secrets.AWS_PYTORCH_UPLOADER_ACCESS_KEY_ID }} - aws-pytorch-uploader-secret-access-key: ${{ secrets.AWS_PYTORCH_UPLOADER_SECRET_ACCESS_KEY }} - conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} - uses: ./.github/workflows/_binary-upload.yml - conda-py3_9-cuda11_6-build: - if: ${{ github.repository_owner == 'pytorch' }} - runs-on: windows.4xlarge - timeout-minutes: 240 - env: - PYTORCH_ROOT: ${{ github.workspace }}/pytorch - BUILDER_ROOT: ${{ github.workspace }}/builder - PACKAGE_TYPE: conda - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu116 - GPU_ARCH_VERSION: 11.6 - GPU_ARCH_TYPE: cuda - SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.9" - steps: - - name: Display EC2 information - shell: bash - run: | - set -euo pipefail - function get_ec2_metadata() { - # Pulled from instance metadata endpoint for EC2 - # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html - category=$1 - curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" - } - echo "ami-id: $(get_ec2_metadata ami-id)" - echo "instance-id: $(get_ec2_metadata instance-id)" - echo "instance-type: $(get_ec2_metadata instance-type)" - echo "system info $(uname -a)" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - # Needed for binary builds, see: https://github.com/pytorch/pytorch/issues/73339#issuecomment-1058981560 - - name: Enable long paths on Windows - shell: powershell - run: | - Set-ItemProperty -Path "HKLM:\\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1 - # Since it's just a defensive command, the workflow should continue even the command fails - - name: Disables Windows Defender scheduled and real-time scanning for files in pytorch directory. - shell: powershell - run: | - Add-MpPreference -ExclusionPath $(Get-Location).tostring() -ErrorAction Ignore - # NOTE: These environment variables are put here so that they can be applied on every job equally - # They are also here because setting them at a workflow level doesn't give us access to the - # runner.temp variable, which we need. - - name: Populate binary env - shell: bash - run: | - echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}" - echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" - echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}" - - name: Checkout PyTorch - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - submodules: recursive - path: pytorch - - name: Clean PyTorch checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: pytorch - - name: Checkout pytorch/builder - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: main - submodules: recursive - repository: pytorch/builder - path: builder - - name: Clean pytorch/builder checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: builder - - name: Populate binary env - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" - - name: Build PyTorch binary - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_build.sh" - - uses: actions/upload-artifact@v3 - if: always() - with: - name: conda-py3_9-cuda11_6 - retention-days: 14 - if-no-files-found: error - path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - - name: Wait until all sessions have drained - shell: powershell - working-directory: pytorch - if: always() - timeout-minutes: 120 - run: | - .github\scripts\wait_for_ssh_to_drain.ps1 - - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) - shell: powershell - working-directory: pytorch - if: always() - run: | - .github\scripts\kill_active_ssh_sessions.ps1 - conda-py3_9-cuda11_6-test: # Testing - if: ${{ github.repository_owner == 'pytorch' }} - needs: conda-py3_9-cuda11_6-build - runs-on: windows.8xlarge.nvidia.gpu - timeout-minutes: 240 - env: - PYTORCH_ROOT: ${{ github.workspace }}/pytorch - BUILDER_ROOT: ${{ github.workspace }}/builder - PACKAGE_TYPE: conda - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu116 - GPU_ARCH_VERSION: 11.6 - GPU_ARCH_TYPE: cuda - SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.9" - steps: - - name: Display EC2 information - shell: bash - run: | - set -euo pipefail - function get_ec2_metadata() { - # Pulled from instance metadata endpoint for EC2 - # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html - category=$1 - curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" - } - echo "ami-id: $(get_ec2_metadata ami-id)" - echo "instance-id: $(get_ec2_metadata instance-id)" - echo "instance-type: $(get_ec2_metadata instance-type)" - echo "system info $(uname -a)" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - # Needed for binary builds, see: https://github.com/pytorch/pytorch/issues/73339#issuecomment-1058981560 - - name: Enable long paths on Windows - shell: powershell - run: | - Set-ItemProperty -Path "HKLM:\\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1 - # Since it's just a defensive command, the workflow should continue even the command fails - - name: Disables Windows Defender scheduled and real-time scanning for files in pytorch directory. - shell: powershell - run: | - Add-MpPreference -ExclusionPath $(Get-Location).tostring() -ErrorAction Ignore - # NOTE: These environment variables are put here so that they can be applied on every job equally - # They are also here because setting them at a workflow level doesn't give us access to the - # runner.temp variable, which we need. - - name: Populate binary env - shell: bash - run: | - echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}" - echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" - echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}" - - uses: actions/download-artifact@v3 - name: Download Build Artifacts - with: - name: conda-py3_9-cuda11_6 - path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - - name: Checkout PyTorch - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - submodules: recursive - path: pytorch - - name: Clean PyTorch checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: pytorch - - name: Checkout pytorch/builder - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: main - submodules: recursive - repository: pytorch/builder - path: builder - - name: Clean pytorch/builder checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: builder - - name: Populate binary env - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" - - name: Test PyTorch binary - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_test.sh" - - name: Wait until all sessions have drained - shell: powershell - working-directory: pytorch - if: always() - timeout-minutes: 120 - run: | - .github\scripts\wait_for_ssh_to_drain.ps1 - - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) - shell: powershell - working-directory: pytorch - if: always() - run: | - .github\scripts\kill_active_ssh_sessions.ps1 - conda-py3_9-cuda11_6-upload: # Uploading - if: ${{ github.repository_owner == 'pytorch' }} - needs: conda-py3_9-cuda11_6-test - with: - PYTORCH_ROOT: ${{ github.workspace }}/pytorch - BUILDER_ROOT: ${{ github.workspace }}/builder - PACKAGE_TYPE: conda - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu116 - GPU_ARCH_VERSION: 11.6 - GPU_ARCH_TYPE: cuda - DESIRED_PYTHON: "3.9" - build_name: conda-py3_9-cuda11_6 - secrets: - github-token: ${{ secrets.GITHUB_TOKEN }} - aws-access-key-id: ${{ secrets.AWS_PYTORCH_UPLOADER_ACCESS_KEY_ID }} - aws-pytorch-uploader-secret-access-key: ${{ secrets.AWS_PYTORCH_UPLOADER_SECRET_ACCESS_KEY }} - conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} - uses: ./.github/workflows/_binary-upload.yml - conda-py3_9-cuda11_7-build: - if: ${{ github.repository_owner == 'pytorch' }} - runs-on: windows.4xlarge - timeout-minutes: 240 - env: - PYTORCH_ROOT: ${{ github.workspace }}/pytorch - BUILDER_ROOT: ${{ github.workspace }}/builder - PACKAGE_TYPE: conda - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu117 - GPU_ARCH_VERSION: 11.7 - GPU_ARCH_TYPE: cuda - SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.9" - steps: - - name: Display EC2 information - shell: bash - run: | - set -euo pipefail - function get_ec2_metadata() { - # Pulled from instance metadata endpoint for EC2 - # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html - category=$1 - curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" - } - echo "ami-id: $(get_ec2_metadata ami-id)" - echo "instance-id: $(get_ec2_metadata instance-id)" - echo "instance-type: $(get_ec2_metadata instance-type)" - echo "system info $(uname -a)" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - # Needed for binary builds, see: https://github.com/pytorch/pytorch/issues/73339#issuecomment-1058981560 - - name: Enable long paths on Windows - shell: powershell - run: | - Set-ItemProperty -Path "HKLM:\\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1 - # Since it's just a defensive command, the workflow should continue even the command fails - - name: Disables Windows Defender scheduled and real-time scanning for files in pytorch directory. - shell: powershell - run: | - Add-MpPreference -ExclusionPath $(Get-Location).tostring() -ErrorAction Ignore - # NOTE: These environment variables are put here so that they can be applied on every job equally - # They are also here because setting them at a workflow level doesn't give us access to the - # runner.temp variable, which we need. - - name: Populate binary env - shell: bash - run: | - echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}" - echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" - echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}" - - name: Checkout PyTorch - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - submodules: recursive - path: pytorch - - name: Clean PyTorch checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: pytorch - - name: Checkout pytorch/builder - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: main - submodules: recursive - repository: pytorch/builder - path: builder - - name: Clean pytorch/builder checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: builder - - name: Populate binary env - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" - - name: Build PyTorch binary - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_build.sh" - - uses: actions/upload-artifact@v3 - if: always() - with: - name: conda-py3_9-cuda11_7 - retention-days: 14 - if-no-files-found: error - path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - - name: Wait until all sessions have drained - shell: powershell - working-directory: pytorch - if: always() - timeout-minutes: 120 - run: | - .github\scripts\wait_for_ssh_to_drain.ps1 - - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) - shell: powershell - working-directory: pytorch - if: always() - run: | - .github\scripts\kill_active_ssh_sessions.ps1 - conda-py3_9-cuda11_7-test: # Testing - if: ${{ github.repository_owner == 'pytorch' }} - needs: conda-py3_9-cuda11_7-build - runs-on: windows.8xlarge.nvidia.gpu - timeout-minutes: 240 - env: - PYTORCH_ROOT: ${{ github.workspace }}/pytorch - BUILDER_ROOT: ${{ github.workspace }}/builder - PACKAGE_TYPE: conda - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu117 - GPU_ARCH_VERSION: 11.7 - GPU_ARCH_TYPE: cuda - SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.9" - steps: - - name: Display EC2 information - shell: bash - run: | - set -euo pipefail - function get_ec2_metadata() { - # Pulled from instance metadata endpoint for EC2 - # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html - category=$1 - curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" - } - echo "ami-id: $(get_ec2_metadata ami-id)" - echo "instance-id: $(get_ec2_metadata instance-id)" - echo "instance-type: $(get_ec2_metadata instance-type)" - echo "system info $(uname -a)" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - # Needed for binary builds, see: https://github.com/pytorch/pytorch/issues/73339#issuecomment-1058981560 - - name: Enable long paths on Windows - shell: powershell - run: | - Set-ItemProperty -Path "HKLM:\\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1 - # Since it's just a defensive command, the workflow should continue even the command fails - - name: Disables Windows Defender scheduled and real-time scanning for files in pytorch directory. - shell: powershell - run: | - Add-MpPreference -ExclusionPath $(Get-Location).tostring() -ErrorAction Ignore - # NOTE: These environment variables are put here so that they can be applied on every job equally - # They are also here because setting them at a workflow level doesn't give us access to the - # runner.temp variable, which we need. - - name: Populate binary env - shell: bash - run: | - echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}" - echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" - echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}" - - uses: actions/download-artifact@v3 - name: Download Build Artifacts - with: - name: conda-py3_9-cuda11_7 - path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - - name: Checkout PyTorch - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - submodules: recursive - path: pytorch - - name: Clean PyTorch checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: pytorch - - name: Checkout pytorch/builder - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: main - submodules: recursive - repository: pytorch/builder - path: builder - - name: Clean pytorch/builder checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: builder - - name: Populate binary env - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" - - name: Test PyTorch binary - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_test.sh" - - name: Wait until all sessions have drained - shell: powershell - working-directory: pytorch - if: always() - timeout-minutes: 120 - run: | - .github\scripts\wait_for_ssh_to_drain.ps1 - - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) - shell: powershell - working-directory: pytorch - if: always() - run: | - .github\scripts\kill_active_ssh_sessions.ps1 - conda-py3_9-cuda11_7-upload: # Uploading - if: ${{ github.repository_owner == 'pytorch' }} - needs: conda-py3_9-cuda11_7-test - with: - PYTORCH_ROOT: ${{ github.workspace }}/pytorch - BUILDER_ROOT: ${{ github.workspace }}/builder - PACKAGE_TYPE: conda - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu117 - GPU_ARCH_VERSION: 11.7 - GPU_ARCH_TYPE: cuda - DESIRED_PYTHON: "3.9" - build_name: conda-py3_9-cuda11_7 - secrets: - github-token: ${{ secrets.GITHUB_TOKEN }} - aws-access-key-id: ${{ secrets.AWS_PYTORCH_UPLOADER_ACCESS_KEY_ID }} - aws-pytorch-uploader-secret-access-key: ${{ secrets.AWS_PYTORCH_UPLOADER_SECRET_ACCESS_KEY }} - conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} - uses: ./.github/workflows/_binary-upload.yml - conda-py3_10-cpu-build: - if: ${{ github.repository_owner == 'pytorch' }} - runs-on: windows.4xlarge - timeout-minutes: 240 - env: - PYTORCH_ROOT: ${{ github.workspace }}/pytorch - BUILDER_ROOT: ${{ github.workspace }}/builder - PACKAGE_TYPE: conda - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cpu - GPU_ARCH_TYPE: cpu - SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.10" - steps: - - name: Display EC2 information - shell: bash - run: | - set -euo pipefail - function get_ec2_metadata() { - # Pulled from instance metadata endpoint for EC2 - # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html - category=$1 - curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" - } - echo "ami-id: $(get_ec2_metadata ami-id)" - echo "instance-id: $(get_ec2_metadata instance-id)" - echo "instance-type: $(get_ec2_metadata instance-type)" - echo "system info $(uname -a)" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - # Needed for binary builds, see: https://github.com/pytorch/pytorch/issues/73339#issuecomment-1058981560 - - name: Enable long paths on Windows - shell: powershell - run: | - Set-ItemProperty -Path "HKLM:\\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1 - # Since it's just a defensive command, the workflow should continue even the command fails - - name: Disables Windows Defender scheduled and real-time scanning for files in pytorch directory. - shell: powershell - run: | - Add-MpPreference -ExclusionPath $(Get-Location).tostring() -ErrorAction Ignore - # NOTE: These environment variables are put here so that they can be applied on every job equally - # They are also here because setting them at a workflow level doesn't give us access to the - # runner.temp variable, which we need. - - name: Populate binary env - shell: bash - run: | - echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}" - echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" - echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}" - - name: Checkout PyTorch - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - submodules: recursive - path: pytorch - - name: Clean PyTorch checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: pytorch - - name: Checkout pytorch/builder - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: main - submodules: recursive - repository: pytorch/builder - path: builder - - name: Clean pytorch/builder checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: builder - - name: Populate binary env - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" - - name: Build PyTorch binary - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_build.sh" - - uses: actions/upload-artifact@v3 - if: always() - with: - name: conda-py3_10-cpu - retention-days: 14 - if-no-files-found: error - path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - - name: Wait until all sessions have drained - shell: powershell - working-directory: pytorch - if: always() - timeout-minutes: 120 - run: | - .github\scripts\wait_for_ssh_to_drain.ps1 - - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) - shell: powershell - working-directory: pytorch - if: always() - run: | - .github\scripts\kill_active_ssh_sessions.ps1 - conda-py3_10-cpu-test: # Testing - if: ${{ github.repository_owner == 'pytorch' }} - needs: conda-py3_10-cpu-build - runs-on: windows.4xlarge - timeout-minutes: 240 - env: - PYTORCH_ROOT: ${{ github.workspace }}/pytorch - BUILDER_ROOT: ${{ github.workspace }}/builder - PACKAGE_TYPE: conda - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cpu - GPU_ARCH_TYPE: cpu - SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.10" - steps: - - name: Display EC2 information - shell: bash - run: | - set -euo pipefail - function get_ec2_metadata() { - # Pulled from instance metadata endpoint for EC2 - # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html - category=$1 - curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" - } - echo "ami-id: $(get_ec2_metadata ami-id)" - echo "instance-id: $(get_ec2_metadata instance-id)" - echo "instance-type: $(get_ec2_metadata instance-type)" - echo "system info $(uname -a)" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - # Needed for binary builds, see: https://github.com/pytorch/pytorch/issues/73339#issuecomment-1058981560 - - name: Enable long paths on Windows - shell: powershell - run: | - Set-ItemProperty -Path "HKLM:\\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1 - # Since it's just a defensive command, the workflow should continue even the command fails - - name: Disables Windows Defender scheduled and real-time scanning for files in pytorch directory. - shell: powershell - run: | - Add-MpPreference -ExclusionPath $(Get-Location).tostring() -ErrorAction Ignore - # NOTE: These environment variables are put here so that they can be applied on every job equally - # They are also here because setting them at a workflow level doesn't give us access to the - # runner.temp variable, which we need. - - name: Populate binary env - shell: bash - run: | - echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}" - echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" - echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}" - - uses: actions/download-artifact@v3 - name: Download Build Artifacts - with: - name: conda-py3_10-cpu - path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - - name: Checkout PyTorch - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - submodules: recursive - path: pytorch - - name: Clean PyTorch checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: pytorch - - name: Checkout pytorch/builder - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: main - submodules: recursive - repository: pytorch/builder - path: builder - - name: Clean pytorch/builder checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: builder - - name: Populate binary env - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" - - name: Test PyTorch binary - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_test.sh" - - name: Wait until all sessions have drained - shell: powershell - working-directory: pytorch - if: always() - timeout-minutes: 120 - run: | - .github\scripts\wait_for_ssh_to_drain.ps1 - - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) - shell: powershell - working-directory: pytorch - if: always() - run: | - .github\scripts\kill_active_ssh_sessions.ps1 - conda-py3_10-cpu-upload: # Uploading - if: ${{ github.repository_owner == 'pytorch' }} - needs: conda-py3_10-cpu-test + needs: conda-py3_10-cpu-test with: PYTORCH_ROOT: ${{ github.workspace }}/pytorch BUILDER_ROOT: ${{ github.workspace }}/builder @@ -2971,233 +2290,6 @@ jobs: aws-pytorch-uploader-secret-access-key: ${{ secrets.AWS_PYTORCH_UPLOADER_SECRET_ACCESS_KEY }} conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} uses: ./.github/workflows/_binary-upload.yml - conda-py3_10-cuda11_3-build: - if: ${{ github.repository_owner == 'pytorch' }} - runs-on: windows.4xlarge - timeout-minutes: 240 - env: - PYTORCH_ROOT: ${{ github.workspace }}/pytorch - BUILDER_ROOT: ${{ github.workspace }}/builder - PACKAGE_TYPE: conda - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 - GPU_ARCH_TYPE: cuda - SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.10" - steps: - - name: Display EC2 information - shell: bash - run: | - set -euo pipefail - function get_ec2_metadata() { - # Pulled from instance metadata endpoint for EC2 - # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html - category=$1 - curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" - } - echo "ami-id: $(get_ec2_metadata ami-id)" - echo "instance-id: $(get_ec2_metadata instance-id)" - echo "instance-type: $(get_ec2_metadata instance-type)" - echo "system info $(uname -a)" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - # Needed for binary builds, see: https://github.com/pytorch/pytorch/issues/73339#issuecomment-1058981560 - - name: Enable long paths on Windows - shell: powershell - run: | - Set-ItemProperty -Path "HKLM:\\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1 - # Since it's just a defensive command, the workflow should continue even the command fails - - name: Disables Windows Defender scheduled and real-time scanning for files in pytorch directory. - shell: powershell - run: | - Add-MpPreference -ExclusionPath $(Get-Location).tostring() -ErrorAction Ignore - # NOTE: These environment variables are put here so that they can be applied on every job equally - # They are also here because setting them at a workflow level doesn't give us access to the - # runner.temp variable, which we need. - - name: Populate binary env - shell: bash - run: | - echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}" - echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" - echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}" - - name: Checkout PyTorch - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - submodules: recursive - path: pytorch - - name: Clean PyTorch checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: pytorch - - name: Checkout pytorch/builder - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: main - submodules: recursive - repository: pytorch/builder - path: builder - - name: Clean pytorch/builder checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: builder - - name: Populate binary env - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" - - name: Build PyTorch binary - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_build.sh" - - uses: actions/upload-artifact@v3 - if: always() - with: - name: conda-py3_10-cuda11_3 - retention-days: 14 - if-no-files-found: error - path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - - name: Wait until all sessions have drained - shell: powershell - working-directory: pytorch - if: always() - timeout-minutes: 120 - run: | - .github\scripts\wait_for_ssh_to_drain.ps1 - - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) - shell: powershell - working-directory: pytorch - if: always() - run: | - .github\scripts\kill_active_ssh_sessions.ps1 - conda-py3_10-cuda11_3-test: # Testing - if: ${{ github.repository_owner == 'pytorch' }} - needs: conda-py3_10-cuda11_3-build - runs-on: windows.8xlarge.nvidia.gpu - timeout-minutes: 240 - env: - PYTORCH_ROOT: ${{ github.workspace }}/pytorch - BUILDER_ROOT: ${{ github.workspace }}/builder - PACKAGE_TYPE: conda - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 - GPU_ARCH_TYPE: cuda - SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.10" - steps: - - name: Display EC2 information - shell: bash - run: | - set -euo pipefail - function get_ec2_metadata() { - # Pulled from instance metadata endpoint for EC2 - # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html - category=$1 - curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" - } - echo "ami-id: $(get_ec2_metadata ami-id)" - echo "instance-id: $(get_ec2_metadata instance-id)" - echo "instance-type: $(get_ec2_metadata instance-type)" - echo "system info $(uname -a)" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - # Needed for binary builds, see: https://github.com/pytorch/pytorch/issues/73339#issuecomment-1058981560 - - name: Enable long paths on Windows - shell: powershell - run: | - Set-ItemProperty -Path "HKLM:\\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1 - # Since it's just a defensive command, the workflow should continue even the command fails - - name: Disables Windows Defender scheduled and real-time scanning for files in pytorch directory. - shell: powershell - run: | - Add-MpPreference -ExclusionPath $(Get-Location).tostring() -ErrorAction Ignore - # NOTE: These environment variables are put here so that they can be applied on every job equally - # They are also here because setting them at a workflow level doesn't give us access to the - # runner.temp variable, which we need. - - name: Populate binary env - shell: bash - run: | - echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}" - echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" - echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}" - - uses: actions/download-artifact@v3 - name: Download Build Artifacts - with: - name: conda-py3_10-cuda11_3 - path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - - name: Checkout PyTorch - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - submodules: recursive - path: pytorch - - name: Clean PyTorch checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: pytorch - - name: Checkout pytorch/builder - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: main - submodules: recursive - repository: pytorch/builder - path: builder - - name: Clean pytorch/builder checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: builder - - name: Populate binary env - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" - - name: Test PyTorch binary - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_test.sh" - - name: Wait until all sessions have drained - shell: powershell - working-directory: pytorch - if: always() - timeout-minutes: 120 - run: | - .github\scripts\wait_for_ssh_to_drain.ps1 - - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) - shell: powershell - working-directory: pytorch - if: always() - run: | - .github\scripts\kill_active_ssh_sessions.ps1 - conda-py3_10-cuda11_3-upload: # Uploading - if: ${{ github.repository_owner == 'pytorch' }} - needs: conda-py3_10-cuda11_3-test - with: - PYTORCH_ROOT: ${{ github.workspace }}/pytorch - BUILDER_ROOT: ${{ github.workspace }}/builder - PACKAGE_TYPE: conda - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 - GPU_ARCH_TYPE: cuda - DESIRED_PYTHON: "3.10" - build_name: conda-py3_10-cuda11_3 - secrets: - github-token: ${{ secrets.GITHUB_TOKEN }} - aws-access-key-id: ${{ secrets.AWS_PYTORCH_UPLOADER_ACCESS_KEY_ID }} - aws-pytorch-uploader-secret-access-key: ${{ secrets.AWS_PYTORCH_UPLOADER_SECRET_ACCESS_KEY }} - conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} - uses: ./.github/workflows/_binary-upload.yml conda-py3_10-cuda11_6-build: if: ${{ github.repository_owner == 'pytorch' }} runs-on: windows.4xlarge diff --git a/.github/workflows/generated-windows-binary-libtorch-debug-nightly.yml b/.github/workflows/generated-windows-binary-libtorch-debug-nightly.yml index 5eb61291b684..c0b5ddae71fa 100644 --- a/.github/workflows/generated-windows-binary-libtorch-debug-nightly.yml +++ b/.github/workflows/generated-windows-binary-libtorch-debug-nightly.yml @@ -976,962 +976,6 @@ jobs: aws-pytorch-uploader-secret-access-key: ${{ secrets.AWS_PYTORCH_UPLOADER_SECRET_ACCESS_KEY }} conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} uses: ./.github/workflows/_binary-upload.yml - libtorch-cuda11_3-shared-with-deps-debug-build: - if: ${{ github.repository_owner == 'pytorch' }} - runs-on: windows.4xlarge - timeout-minutes: 240 - env: - PYTORCH_ROOT: ${{ github.workspace }}/pytorch - BUILDER_ROOT: ${{ github.workspace }}/builder - PACKAGE_TYPE: libtorch - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 - GPU_ARCH_TYPE: cuda - SKIP_ALL_TESTS: 1 - LIBTORCH_CONFIG: debug - LIBTORCH_VARIANT: shared-with-deps - # This is a dummy value for libtorch to work correctly with our batch scripts - # without this value pip does not get installed for some reason - DESIRED_PYTHON: "3.7" - steps: - - name: Display EC2 information - shell: bash - run: | - set -euo pipefail - function get_ec2_metadata() { - # Pulled from instance metadata endpoint for EC2 - # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html - category=$1 - curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" - } - echo "ami-id: $(get_ec2_metadata ami-id)" - echo "instance-id: $(get_ec2_metadata instance-id)" - echo "instance-type: $(get_ec2_metadata instance-type)" - echo "system info $(uname -a)" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - # Needed for binary builds, see: https://github.com/pytorch/pytorch/issues/73339#issuecomment-1058981560 - - name: Enable long paths on Windows - shell: powershell - run: | - Set-ItemProperty -Path "HKLM:\\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1 - # Since it's just a defensive command, the workflow should continue even the command fails - - name: Disables Windows Defender scheduled and real-time scanning for files in pytorch directory. - shell: powershell - run: | - Add-MpPreference -ExclusionPath $(Get-Location).tostring() -ErrorAction Ignore - # NOTE: These environment variables are put here so that they can be applied on every job equally - # They are also here because setting them at a workflow level doesn't give us access to the - # runner.temp variable, which we need. - - name: Populate binary env - shell: bash - run: | - echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}" - echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" - echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}" - - name: Checkout PyTorch - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - submodules: recursive - path: pytorch - - name: Clean PyTorch checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: pytorch - - name: Checkout pytorch/builder - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: main - submodules: recursive - repository: pytorch/builder - path: builder - - name: Clean pytorch/builder checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: builder - - name: Populate binary env - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" - - name: Build PyTorch binary - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_build.sh" - - uses: actions/upload-artifact@v3 - if: always() - with: - name: libtorch-cuda11_3-shared-with-deps-debug - retention-days: 14 - if-no-files-found: error - path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - - name: Wait until all sessions have drained - shell: powershell - working-directory: pytorch - if: always() - timeout-minutes: 120 - run: | - .github\scripts\wait_for_ssh_to_drain.ps1 - - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) - shell: powershell - working-directory: pytorch - if: always() - run: | - .github\scripts\kill_active_ssh_sessions.ps1 - libtorch-cuda11_3-shared-with-deps-debug-test: # Testing - if: ${{ github.repository_owner == 'pytorch' }} - needs: libtorch-cuda11_3-shared-with-deps-debug-build - runs-on: windows.8xlarge.nvidia.gpu - timeout-minutes: 240 - env: - PYTORCH_ROOT: ${{ github.workspace }}/pytorch - BUILDER_ROOT: ${{ github.workspace }}/builder - PACKAGE_TYPE: libtorch - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 - GPU_ARCH_TYPE: cuda - SKIP_ALL_TESTS: 1 - LIBTORCH_CONFIG: debug - LIBTORCH_VARIANT: shared-with-deps - # This is a dummy value for libtorch to work correctly with our batch scripts - # without this value pip does not get installed for some reason - DESIRED_PYTHON: "3.7" - steps: - - name: Display EC2 information - shell: bash - run: | - set -euo pipefail - function get_ec2_metadata() { - # Pulled from instance metadata endpoint for EC2 - # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html - category=$1 - curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" - } - echo "ami-id: $(get_ec2_metadata ami-id)" - echo "instance-id: $(get_ec2_metadata instance-id)" - echo "instance-type: $(get_ec2_metadata instance-type)" - echo "system info $(uname -a)" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - # Needed for binary builds, see: https://github.com/pytorch/pytorch/issues/73339#issuecomment-1058981560 - - name: Enable long paths on Windows - shell: powershell - run: | - Set-ItemProperty -Path "HKLM:\\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1 - # Since it's just a defensive command, the workflow should continue even the command fails - - name: Disables Windows Defender scheduled and real-time scanning for files in pytorch directory. - shell: powershell - run: | - Add-MpPreference -ExclusionPath $(Get-Location).tostring() -ErrorAction Ignore - # NOTE: These environment variables are put here so that they can be applied on every job equally - # They are also here because setting them at a workflow level doesn't give us access to the - # runner.temp variable, which we need. - - name: Populate binary env - shell: bash - run: | - echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}" - echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" - echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}" - - uses: actions/download-artifact@v3 - name: Download Build Artifacts - with: - name: libtorch-cuda11_3-shared-with-deps-debug - path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - - name: Checkout PyTorch - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - submodules: recursive - path: pytorch - - name: Clean PyTorch checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: pytorch - - name: Checkout pytorch/builder - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: main - submodules: recursive - repository: pytorch/builder - path: builder - - name: Clean pytorch/builder checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: builder - - name: Populate binary env - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" - - name: Test PyTorch binary - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_test.sh" - - name: Wait until all sessions have drained - shell: powershell - working-directory: pytorch - if: always() - timeout-minutes: 120 - run: | - .github\scripts\wait_for_ssh_to_drain.ps1 - - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) - shell: powershell - working-directory: pytorch - if: always() - run: | - .github\scripts\kill_active_ssh_sessions.ps1 - libtorch-cuda11_3-shared-with-deps-debug-upload: # Uploading - if: ${{ github.repository_owner == 'pytorch' }} - needs: libtorch-cuda11_3-shared-with-deps-debug-test - with: - PYTORCH_ROOT: ${{ github.workspace }}/pytorch - BUILDER_ROOT: ${{ github.workspace }}/builder - PACKAGE_TYPE: libtorch - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 - GPU_ARCH_TYPE: cuda - LIBTORCH_CONFIG: debug - LIBTORCH_VARIANT: shared-with-deps - # This is a dummy value for libtorch to work correctly with our batch scripts - # without this value pip does not get installed for some reason - DESIRED_PYTHON: "3.7" - build_name: libtorch-cuda11_3-shared-with-deps-debug - secrets: - github-token: ${{ secrets.GITHUB_TOKEN }} - aws-access-key-id: ${{ secrets.AWS_PYTORCH_UPLOADER_ACCESS_KEY_ID }} - aws-pytorch-uploader-secret-access-key: ${{ secrets.AWS_PYTORCH_UPLOADER_SECRET_ACCESS_KEY }} - conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} - uses: ./.github/workflows/_binary-upload.yml - libtorch-cuda11_3-shared-without-deps-debug-build: - if: ${{ github.repository_owner == 'pytorch' }} - runs-on: windows.4xlarge - timeout-minutes: 240 - env: - PYTORCH_ROOT: ${{ github.workspace }}/pytorch - BUILDER_ROOT: ${{ github.workspace }}/builder - PACKAGE_TYPE: libtorch - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 - GPU_ARCH_TYPE: cuda - SKIP_ALL_TESTS: 1 - LIBTORCH_CONFIG: debug - LIBTORCH_VARIANT: shared-without-deps - # This is a dummy value for libtorch to work correctly with our batch scripts - # without this value pip does not get installed for some reason - DESIRED_PYTHON: "3.7" - steps: - - name: Display EC2 information - shell: bash - run: | - set -euo pipefail - function get_ec2_metadata() { - # Pulled from instance metadata endpoint for EC2 - # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html - category=$1 - curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" - } - echo "ami-id: $(get_ec2_metadata ami-id)" - echo "instance-id: $(get_ec2_metadata instance-id)" - echo "instance-type: $(get_ec2_metadata instance-type)" - echo "system info $(uname -a)" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - # Needed for binary builds, see: https://github.com/pytorch/pytorch/issues/73339#issuecomment-1058981560 - - name: Enable long paths on Windows - shell: powershell - run: | - Set-ItemProperty -Path "HKLM:\\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1 - # Since it's just a defensive command, the workflow should continue even the command fails - - name: Disables Windows Defender scheduled and real-time scanning for files in pytorch directory. - shell: powershell - run: | - Add-MpPreference -ExclusionPath $(Get-Location).tostring() -ErrorAction Ignore - # NOTE: These environment variables are put here so that they can be applied on every job equally - # They are also here because setting them at a workflow level doesn't give us access to the - # runner.temp variable, which we need. - - name: Populate binary env - shell: bash - run: | - echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}" - echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" - echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}" - - name: Checkout PyTorch - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - submodules: recursive - path: pytorch - - name: Clean PyTorch checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: pytorch - - name: Checkout pytorch/builder - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: main - submodules: recursive - repository: pytorch/builder - path: builder - - name: Clean pytorch/builder checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: builder - - name: Populate binary env - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" - - name: Build PyTorch binary - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_build.sh" - - uses: actions/upload-artifact@v3 - if: always() - with: - name: libtorch-cuda11_3-shared-without-deps-debug - retention-days: 14 - if-no-files-found: error - path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - - name: Wait until all sessions have drained - shell: powershell - working-directory: pytorch - if: always() - timeout-minutes: 120 - run: | - .github\scripts\wait_for_ssh_to_drain.ps1 - - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) - shell: powershell - working-directory: pytorch - if: always() - run: | - .github\scripts\kill_active_ssh_sessions.ps1 - libtorch-cuda11_3-shared-without-deps-debug-test: # Testing - if: ${{ github.repository_owner == 'pytorch' }} - needs: libtorch-cuda11_3-shared-without-deps-debug-build - runs-on: windows.8xlarge.nvidia.gpu - timeout-minutes: 240 - env: - PYTORCH_ROOT: ${{ github.workspace }}/pytorch - BUILDER_ROOT: ${{ github.workspace }}/builder - PACKAGE_TYPE: libtorch - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 - GPU_ARCH_TYPE: cuda - SKIP_ALL_TESTS: 1 - LIBTORCH_CONFIG: debug - LIBTORCH_VARIANT: shared-without-deps - # This is a dummy value for libtorch to work correctly with our batch scripts - # without this value pip does not get installed for some reason - DESIRED_PYTHON: "3.7" - steps: - - name: Display EC2 information - shell: bash - run: | - set -euo pipefail - function get_ec2_metadata() { - # Pulled from instance metadata endpoint for EC2 - # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html - category=$1 - curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" - } - echo "ami-id: $(get_ec2_metadata ami-id)" - echo "instance-id: $(get_ec2_metadata instance-id)" - echo "instance-type: $(get_ec2_metadata instance-type)" - echo "system info $(uname -a)" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - # Needed for binary builds, see: https://github.com/pytorch/pytorch/issues/73339#issuecomment-1058981560 - - name: Enable long paths on Windows - shell: powershell - run: | - Set-ItemProperty -Path "HKLM:\\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1 - # Since it's just a defensive command, the workflow should continue even the command fails - - name: Disables Windows Defender scheduled and real-time scanning for files in pytorch directory. - shell: powershell - run: | - Add-MpPreference -ExclusionPath $(Get-Location).tostring() -ErrorAction Ignore - # NOTE: These environment variables are put here so that they can be applied on every job equally - # They are also here because setting them at a workflow level doesn't give us access to the - # runner.temp variable, which we need. - - name: Populate binary env - shell: bash - run: | - echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}" - echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" - echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}" - - uses: actions/download-artifact@v3 - name: Download Build Artifacts - with: - name: libtorch-cuda11_3-shared-without-deps-debug - path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - - name: Checkout PyTorch - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - submodules: recursive - path: pytorch - - name: Clean PyTorch checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: pytorch - - name: Checkout pytorch/builder - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: main - submodules: recursive - repository: pytorch/builder - path: builder - - name: Clean pytorch/builder checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: builder - - name: Populate binary env - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" - - name: Test PyTorch binary - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_test.sh" - - name: Wait until all sessions have drained - shell: powershell - working-directory: pytorch - if: always() - timeout-minutes: 120 - run: | - .github\scripts\wait_for_ssh_to_drain.ps1 - - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) - shell: powershell - working-directory: pytorch - if: always() - run: | - .github\scripts\kill_active_ssh_sessions.ps1 - libtorch-cuda11_3-shared-without-deps-debug-upload: # Uploading - if: ${{ github.repository_owner == 'pytorch' }} - needs: libtorch-cuda11_3-shared-without-deps-debug-test - with: - PYTORCH_ROOT: ${{ github.workspace }}/pytorch - BUILDER_ROOT: ${{ github.workspace }}/builder - PACKAGE_TYPE: libtorch - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 - GPU_ARCH_TYPE: cuda - LIBTORCH_CONFIG: debug - LIBTORCH_VARIANT: shared-without-deps - # This is a dummy value for libtorch to work correctly with our batch scripts - # without this value pip does not get installed for some reason - DESIRED_PYTHON: "3.7" - build_name: libtorch-cuda11_3-shared-without-deps-debug - secrets: - github-token: ${{ secrets.GITHUB_TOKEN }} - aws-access-key-id: ${{ secrets.AWS_PYTORCH_UPLOADER_ACCESS_KEY_ID }} - aws-pytorch-uploader-secret-access-key: ${{ secrets.AWS_PYTORCH_UPLOADER_SECRET_ACCESS_KEY }} - conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} - uses: ./.github/workflows/_binary-upload.yml - libtorch-cuda11_3-static-with-deps-debug-build: - if: ${{ github.repository_owner == 'pytorch' }} - runs-on: windows.4xlarge - timeout-minutes: 240 - env: - PYTORCH_ROOT: ${{ github.workspace }}/pytorch - BUILDER_ROOT: ${{ github.workspace }}/builder - PACKAGE_TYPE: libtorch - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 - GPU_ARCH_TYPE: cuda - SKIP_ALL_TESTS: 1 - LIBTORCH_CONFIG: debug - LIBTORCH_VARIANT: static-with-deps - # This is a dummy value for libtorch to work correctly with our batch scripts - # without this value pip does not get installed for some reason - DESIRED_PYTHON: "3.7" - steps: - - name: Display EC2 information - shell: bash - run: | - set -euo pipefail - function get_ec2_metadata() { - # Pulled from instance metadata endpoint for EC2 - # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html - category=$1 - curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" - } - echo "ami-id: $(get_ec2_metadata ami-id)" - echo "instance-id: $(get_ec2_metadata instance-id)" - echo "instance-type: $(get_ec2_metadata instance-type)" - echo "system info $(uname -a)" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - # Needed for binary builds, see: https://github.com/pytorch/pytorch/issues/73339#issuecomment-1058981560 - - name: Enable long paths on Windows - shell: powershell - run: | - Set-ItemProperty -Path "HKLM:\\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1 - # Since it's just a defensive command, the workflow should continue even the command fails - - name: Disables Windows Defender scheduled and real-time scanning for files in pytorch directory. - shell: powershell - run: | - Add-MpPreference -ExclusionPath $(Get-Location).tostring() -ErrorAction Ignore - # NOTE: These environment variables are put here so that they can be applied on every job equally - # They are also here because setting them at a workflow level doesn't give us access to the - # runner.temp variable, which we need. - - name: Populate binary env - shell: bash - run: | - echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}" - echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" - echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}" - - name: Checkout PyTorch - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - submodules: recursive - path: pytorch - - name: Clean PyTorch checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: pytorch - - name: Checkout pytorch/builder - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: main - submodules: recursive - repository: pytorch/builder - path: builder - - name: Clean pytorch/builder checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: builder - - name: Populate binary env - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" - - name: Build PyTorch binary - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_build.sh" - - uses: actions/upload-artifact@v3 - if: always() - with: - name: libtorch-cuda11_3-static-with-deps-debug - retention-days: 14 - if-no-files-found: error - path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - - name: Wait until all sessions have drained - shell: powershell - working-directory: pytorch - if: always() - timeout-minutes: 120 - run: | - .github\scripts\wait_for_ssh_to_drain.ps1 - - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) - shell: powershell - working-directory: pytorch - if: always() - run: | - .github\scripts\kill_active_ssh_sessions.ps1 - libtorch-cuda11_3-static-with-deps-debug-test: # Testing - if: ${{ github.repository_owner == 'pytorch' }} - needs: libtorch-cuda11_3-static-with-deps-debug-build - runs-on: windows.8xlarge.nvidia.gpu - timeout-minutes: 240 - env: - PYTORCH_ROOT: ${{ github.workspace }}/pytorch - BUILDER_ROOT: ${{ github.workspace }}/builder - PACKAGE_TYPE: libtorch - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 - GPU_ARCH_TYPE: cuda - SKIP_ALL_TESTS: 1 - LIBTORCH_CONFIG: debug - LIBTORCH_VARIANT: static-with-deps - # This is a dummy value for libtorch to work correctly with our batch scripts - # without this value pip does not get installed for some reason - DESIRED_PYTHON: "3.7" - steps: - - name: Display EC2 information - shell: bash - run: | - set -euo pipefail - function get_ec2_metadata() { - # Pulled from instance metadata endpoint for EC2 - # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html - category=$1 - curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" - } - echo "ami-id: $(get_ec2_metadata ami-id)" - echo "instance-id: $(get_ec2_metadata instance-id)" - echo "instance-type: $(get_ec2_metadata instance-type)" - echo "system info $(uname -a)" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - # Needed for binary builds, see: https://github.com/pytorch/pytorch/issues/73339#issuecomment-1058981560 - - name: Enable long paths on Windows - shell: powershell - run: | - Set-ItemProperty -Path "HKLM:\\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1 - # Since it's just a defensive command, the workflow should continue even the command fails - - name: Disables Windows Defender scheduled and real-time scanning for files in pytorch directory. - shell: powershell - run: | - Add-MpPreference -ExclusionPath $(Get-Location).tostring() -ErrorAction Ignore - # NOTE: These environment variables are put here so that they can be applied on every job equally - # They are also here because setting them at a workflow level doesn't give us access to the - # runner.temp variable, which we need. - - name: Populate binary env - shell: bash - run: | - echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}" - echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" - echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}" - - uses: actions/download-artifact@v3 - name: Download Build Artifacts - with: - name: libtorch-cuda11_3-static-with-deps-debug - path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - - name: Checkout PyTorch - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - submodules: recursive - path: pytorch - - name: Clean PyTorch checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: pytorch - - name: Checkout pytorch/builder - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: main - submodules: recursive - repository: pytorch/builder - path: builder - - name: Clean pytorch/builder checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: builder - - name: Populate binary env - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" - - name: Test PyTorch binary - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_test.sh" - - name: Wait until all sessions have drained - shell: powershell - working-directory: pytorch - if: always() - timeout-minutes: 120 - run: | - .github\scripts\wait_for_ssh_to_drain.ps1 - - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) - shell: powershell - working-directory: pytorch - if: always() - run: | - .github\scripts\kill_active_ssh_sessions.ps1 - libtorch-cuda11_3-static-with-deps-debug-upload: # Uploading - if: ${{ github.repository_owner == 'pytorch' }} - needs: libtorch-cuda11_3-static-with-deps-debug-test - with: - PYTORCH_ROOT: ${{ github.workspace }}/pytorch - BUILDER_ROOT: ${{ github.workspace }}/builder - PACKAGE_TYPE: libtorch - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 - GPU_ARCH_TYPE: cuda - LIBTORCH_CONFIG: debug - LIBTORCH_VARIANT: static-with-deps - # This is a dummy value for libtorch to work correctly with our batch scripts - # without this value pip does not get installed for some reason - DESIRED_PYTHON: "3.7" - build_name: libtorch-cuda11_3-static-with-deps-debug - secrets: - github-token: ${{ secrets.GITHUB_TOKEN }} - aws-access-key-id: ${{ secrets.AWS_PYTORCH_UPLOADER_ACCESS_KEY_ID }} - aws-pytorch-uploader-secret-access-key: ${{ secrets.AWS_PYTORCH_UPLOADER_SECRET_ACCESS_KEY }} - conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} - uses: ./.github/workflows/_binary-upload.yml - libtorch-cuda11_3-static-without-deps-debug-build: - if: ${{ github.repository_owner == 'pytorch' }} - runs-on: windows.4xlarge - timeout-minutes: 240 - env: - PYTORCH_ROOT: ${{ github.workspace }}/pytorch - BUILDER_ROOT: ${{ github.workspace }}/builder - PACKAGE_TYPE: libtorch - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 - GPU_ARCH_TYPE: cuda - SKIP_ALL_TESTS: 1 - LIBTORCH_CONFIG: debug - LIBTORCH_VARIANT: static-without-deps - # This is a dummy value for libtorch to work correctly with our batch scripts - # without this value pip does not get installed for some reason - DESIRED_PYTHON: "3.7" - steps: - - name: Display EC2 information - shell: bash - run: | - set -euo pipefail - function get_ec2_metadata() { - # Pulled from instance metadata endpoint for EC2 - # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html - category=$1 - curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" - } - echo "ami-id: $(get_ec2_metadata ami-id)" - echo "instance-id: $(get_ec2_metadata instance-id)" - echo "instance-type: $(get_ec2_metadata instance-type)" - echo "system info $(uname -a)" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - # Needed for binary builds, see: https://github.com/pytorch/pytorch/issues/73339#issuecomment-1058981560 - - name: Enable long paths on Windows - shell: powershell - run: | - Set-ItemProperty -Path "HKLM:\\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1 - # Since it's just a defensive command, the workflow should continue even the command fails - - name: Disables Windows Defender scheduled and real-time scanning for files in pytorch directory. - shell: powershell - run: | - Add-MpPreference -ExclusionPath $(Get-Location).tostring() -ErrorAction Ignore - # NOTE: These environment variables are put here so that they can be applied on every job equally - # They are also here because setting them at a workflow level doesn't give us access to the - # runner.temp variable, which we need. - - name: Populate binary env - shell: bash - run: | - echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}" - echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" - echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}" - - name: Checkout PyTorch - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - submodules: recursive - path: pytorch - - name: Clean PyTorch checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: pytorch - - name: Checkout pytorch/builder - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: main - submodules: recursive - repository: pytorch/builder - path: builder - - name: Clean pytorch/builder checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: builder - - name: Populate binary env - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" - - name: Build PyTorch binary - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_build.sh" - - uses: actions/upload-artifact@v3 - if: always() - with: - name: libtorch-cuda11_3-static-without-deps-debug - retention-days: 14 - if-no-files-found: error - path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - - name: Wait until all sessions have drained - shell: powershell - working-directory: pytorch - if: always() - timeout-minutes: 120 - run: | - .github\scripts\wait_for_ssh_to_drain.ps1 - - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) - shell: powershell - working-directory: pytorch - if: always() - run: | - .github\scripts\kill_active_ssh_sessions.ps1 - libtorch-cuda11_3-static-without-deps-debug-test: # Testing - if: ${{ github.repository_owner == 'pytorch' }} - needs: libtorch-cuda11_3-static-without-deps-debug-build - runs-on: windows.8xlarge.nvidia.gpu - timeout-minutes: 240 - env: - PYTORCH_ROOT: ${{ github.workspace }}/pytorch - BUILDER_ROOT: ${{ github.workspace }}/builder - PACKAGE_TYPE: libtorch - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 - GPU_ARCH_TYPE: cuda - SKIP_ALL_TESTS: 1 - LIBTORCH_CONFIG: debug - LIBTORCH_VARIANT: static-without-deps - # This is a dummy value for libtorch to work correctly with our batch scripts - # without this value pip does not get installed for some reason - DESIRED_PYTHON: "3.7" - steps: - - name: Display EC2 information - shell: bash - run: | - set -euo pipefail - function get_ec2_metadata() { - # Pulled from instance metadata endpoint for EC2 - # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html - category=$1 - curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" - } - echo "ami-id: $(get_ec2_metadata ami-id)" - echo "instance-id: $(get_ec2_metadata instance-id)" - echo "instance-type: $(get_ec2_metadata instance-type)" - echo "system info $(uname -a)" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - # Needed for binary builds, see: https://github.com/pytorch/pytorch/issues/73339#issuecomment-1058981560 - - name: Enable long paths on Windows - shell: powershell - run: | - Set-ItemProperty -Path "HKLM:\\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1 - # Since it's just a defensive command, the workflow should continue even the command fails - - name: Disables Windows Defender scheduled and real-time scanning for files in pytorch directory. - shell: powershell - run: | - Add-MpPreference -ExclusionPath $(Get-Location).tostring() -ErrorAction Ignore - # NOTE: These environment variables are put here so that they can be applied on every job equally - # They are also here because setting them at a workflow level doesn't give us access to the - # runner.temp variable, which we need. - - name: Populate binary env - shell: bash - run: | - echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}" - echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" - echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}" - - uses: actions/download-artifact@v3 - name: Download Build Artifacts - with: - name: libtorch-cuda11_3-static-without-deps-debug - path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - - name: Checkout PyTorch - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - submodules: recursive - path: pytorch - - name: Clean PyTorch checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: pytorch - - name: Checkout pytorch/builder - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: main - submodules: recursive - repository: pytorch/builder - path: builder - - name: Clean pytorch/builder checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: builder - - name: Populate binary env - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" - - name: Test PyTorch binary - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_test.sh" - - name: Wait until all sessions have drained - shell: powershell - working-directory: pytorch - if: always() - timeout-minutes: 120 - run: | - .github\scripts\wait_for_ssh_to_drain.ps1 - - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) - shell: powershell - working-directory: pytorch - if: always() - run: | - .github\scripts\kill_active_ssh_sessions.ps1 - libtorch-cuda11_3-static-without-deps-debug-upload: # Uploading - if: ${{ github.repository_owner == 'pytorch' }} - needs: libtorch-cuda11_3-static-without-deps-debug-test - with: - PYTORCH_ROOT: ${{ github.workspace }}/pytorch - BUILDER_ROOT: ${{ github.workspace }}/builder - PACKAGE_TYPE: libtorch - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 - GPU_ARCH_TYPE: cuda - LIBTORCH_CONFIG: debug - LIBTORCH_VARIANT: static-without-deps - # This is a dummy value for libtorch to work correctly with our batch scripts - # without this value pip does not get installed for some reason - DESIRED_PYTHON: "3.7" - build_name: libtorch-cuda11_3-static-without-deps-debug - secrets: - github-token: ${{ secrets.GITHUB_TOKEN }} - aws-access-key-id: ${{ secrets.AWS_PYTORCH_UPLOADER_ACCESS_KEY_ID }} - aws-pytorch-uploader-secret-access-key: ${{ secrets.AWS_PYTORCH_UPLOADER_SECRET_ACCESS_KEY }} - conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} - uses: ./.github/workflows/_binary-upload.yml libtorch-cuda11_6-shared-with-deps-debug-build: if: ${{ github.repository_owner == 'pytorch' }} runs-on: windows.4xlarge diff --git a/.github/workflows/generated-windows-binary-libtorch-release-nightly.yml b/.github/workflows/generated-windows-binary-libtorch-release-nightly.yml index 88458e0b5df8..f2f1d3badfe3 100644 --- a/.github/workflows/generated-windows-binary-libtorch-release-nightly.yml +++ b/.github/workflows/generated-windows-binary-libtorch-release-nightly.yml @@ -976,962 +976,6 @@ jobs: aws-pytorch-uploader-secret-access-key: ${{ secrets.AWS_PYTORCH_UPLOADER_SECRET_ACCESS_KEY }} conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} uses: ./.github/workflows/_binary-upload.yml - libtorch-cuda11_3-shared-with-deps-release-build: - if: ${{ github.repository_owner == 'pytorch' }} - runs-on: windows.4xlarge - timeout-minutes: 240 - env: - PYTORCH_ROOT: ${{ github.workspace }}/pytorch - BUILDER_ROOT: ${{ github.workspace }}/builder - PACKAGE_TYPE: libtorch - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 - GPU_ARCH_TYPE: cuda - SKIP_ALL_TESTS: 1 - LIBTORCH_CONFIG: release - LIBTORCH_VARIANT: shared-with-deps - # This is a dummy value for libtorch to work correctly with our batch scripts - # without this value pip does not get installed for some reason - DESIRED_PYTHON: "3.7" - steps: - - name: Display EC2 information - shell: bash - run: | - set -euo pipefail - function get_ec2_metadata() { - # Pulled from instance metadata endpoint for EC2 - # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html - category=$1 - curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" - } - echo "ami-id: $(get_ec2_metadata ami-id)" - echo "instance-id: $(get_ec2_metadata instance-id)" - echo "instance-type: $(get_ec2_metadata instance-type)" - echo "system info $(uname -a)" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - # Needed for binary builds, see: https://github.com/pytorch/pytorch/issues/73339#issuecomment-1058981560 - - name: Enable long paths on Windows - shell: powershell - run: | - Set-ItemProperty -Path "HKLM:\\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1 - # Since it's just a defensive command, the workflow should continue even the command fails - - name: Disables Windows Defender scheduled and real-time scanning for files in pytorch directory. - shell: powershell - run: | - Add-MpPreference -ExclusionPath $(Get-Location).tostring() -ErrorAction Ignore - # NOTE: These environment variables are put here so that they can be applied on every job equally - # They are also here because setting them at a workflow level doesn't give us access to the - # runner.temp variable, which we need. - - name: Populate binary env - shell: bash - run: | - echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}" - echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" - echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}" - - name: Checkout PyTorch - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - submodules: recursive - path: pytorch - - name: Clean PyTorch checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: pytorch - - name: Checkout pytorch/builder - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: main - submodules: recursive - repository: pytorch/builder - path: builder - - name: Clean pytorch/builder checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: builder - - name: Populate binary env - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" - - name: Build PyTorch binary - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_build.sh" - - uses: actions/upload-artifact@v3 - if: always() - with: - name: libtorch-cuda11_3-shared-with-deps-release - retention-days: 14 - if-no-files-found: error - path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - - name: Wait until all sessions have drained - shell: powershell - working-directory: pytorch - if: always() - timeout-minutes: 120 - run: | - .github\scripts\wait_for_ssh_to_drain.ps1 - - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) - shell: powershell - working-directory: pytorch - if: always() - run: | - .github\scripts\kill_active_ssh_sessions.ps1 - libtorch-cuda11_3-shared-with-deps-release-test: # Testing - if: ${{ github.repository_owner == 'pytorch' }} - needs: libtorch-cuda11_3-shared-with-deps-release-build - runs-on: windows.8xlarge.nvidia.gpu - timeout-minutes: 240 - env: - PYTORCH_ROOT: ${{ github.workspace }}/pytorch - BUILDER_ROOT: ${{ github.workspace }}/builder - PACKAGE_TYPE: libtorch - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 - GPU_ARCH_TYPE: cuda - SKIP_ALL_TESTS: 1 - LIBTORCH_CONFIG: release - LIBTORCH_VARIANT: shared-with-deps - # This is a dummy value for libtorch to work correctly with our batch scripts - # without this value pip does not get installed for some reason - DESIRED_PYTHON: "3.7" - steps: - - name: Display EC2 information - shell: bash - run: | - set -euo pipefail - function get_ec2_metadata() { - # Pulled from instance metadata endpoint for EC2 - # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html - category=$1 - curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" - } - echo "ami-id: $(get_ec2_metadata ami-id)" - echo "instance-id: $(get_ec2_metadata instance-id)" - echo "instance-type: $(get_ec2_metadata instance-type)" - echo "system info $(uname -a)" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - # Needed for binary builds, see: https://github.com/pytorch/pytorch/issues/73339#issuecomment-1058981560 - - name: Enable long paths on Windows - shell: powershell - run: | - Set-ItemProperty -Path "HKLM:\\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1 - # Since it's just a defensive command, the workflow should continue even the command fails - - name: Disables Windows Defender scheduled and real-time scanning for files in pytorch directory. - shell: powershell - run: | - Add-MpPreference -ExclusionPath $(Get-Location).tostring() -ErrorAction Ignore - # NOTE: These environment variables are put here so that they can be applied on every job equally - # They are also here because setting them at a workflow level doesn't give us access to the - # runner.temp variable, which we need. - - name: Populate binary env - shell: bash - run: | - echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}" - echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" - echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}" - - uses: actions/download-artifact@v3 - name: Download Build Artifacts - with: - name: libtorch-cuda11_3-shared-with-deps-release - path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - - name: Checkout PyTorch - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - submodules: recursive - path: pytorch - - name: Clean PyTorch checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: pytorch - - name: Checkout pytorch/builder - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: main - submodules: recursive - repository: pytorch/builder - path: builder - - name: Clean pytorch/builder checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: builder - - name: Populate binary env - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" - - name: Test PyTorch binary - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_test.sh" - - name: Wait until all sessions have drained - shell: powershell - working-directory: pytorch - if: always() - timeout-minutes: 120 - run: | - .github\scripts\wait_for_ssh_to_drain.ps1 - - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) - shell: powershell - working-directory: pytorch - if: always() - run: | - .github\scripts\kill_active_ssh_sessions.ps1 - libtorch-cuda11_3-shared-with-deps-release-upload: # Uploading - if: ${{ github.repository_owner == 'pytorch' }} - needs: libtorch-cuda11_3-shared-with-deps-release-test - with: - PYTORCH_ROOT: ${{ github.workspace }}/pytorch - BUILDER_ROOT: ${{ github.workspace }}/builder - PACKAGE_TYPE: libtorch - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 - GPU_ARCH_TYPE: cuda - LIBTORCH_CONFIG: release - LIBTORCH_VARIANT: shared-with-deps - # This is a dummy value for libtorch to work correctly with our batch scripts - # without this value pip does not get installed for some reason - DESIRED_PYTHON: "3.7" - build_name: libtorch-cuda11_3-shared-with-deps-release - secrets: - github-token: ${{ secrets.GITHUB_TOKEN }} - aws-access-key-id: ${{ secrets.AWS_PYTORCH_UPLOADER_ACCESS_KEY_ID }} - aws-pytorch-uploader-secret-access-key: ${{ secrets.AWS_PYTORCH_UPLOADER_SECRET_ACCESS_KEY }} - conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} - uses: ./.github/workflows/_binary-upload.yml - libtorch-cuda11_3-shared-without-deps-release-build: - if: ${{ github.repository_owner == 'pytorch' }} - runs-on: windows.4xlarge - timeout-minutes: 240 - env: - PYTORCH_ROOT: ${{ github.workspace }}/pytorch - BUILDER_ROOT: ${{ github.workspace }}/builder - PACKAGE_TYPE: libtorch - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 - GPU_ARCH_TYPE: cuda - SKIP_ALL_TESTS: 1 - LIBTORCH_CONFIG: release - LIBTORCH_VARIANT: shared-without-deps - # This is a dummy value for libtorch to work correctly with our batch scripts - # without this value pip does not get installed for some reason - DESIRED_PYTHON: "3.7" - steps: - - name: Display EC2 information - shell: bash - run: | - set -euo pipefail - function get_ec2_metadata() { - # Pulled from instance metadata endpoint for EC2 - # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html - category=$1 - curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" - } - echo "ami-id: $(get_ec2_metadata ami-id)" - echo "instance-id: $(get_ec2_metadata instance-id)" - echo "instance-type: $(get_ec2_metadata instance-type)" - echo "system info $(uname -a)" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - # Needed for binary builds, see: https://github.com/pytorch/pytorch/issues/73339#issuecomment-1058981560 - - name: Enable long paths on Windows - shell: powershell - run: | - Set-ItemProperty -Path "HKLM:\\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1 - # Since it's just a defensive command, the workflow should continue even the command fails - - name: Disables Windows Defender scheduled and real-time scanning for files in pytorch directory. - shell: powershell - run: | - Add-MpPreference -ExclusionPath $(Get-Location).tostring() -ErrorAction Ignore - # NOTE: These environment variables are put here so that they can be applied on every job equally - # They are also here because setting them at a workflow level doesn't give us access to the - # runner.temp variable, which we need. - - name: Populate binary env - shell: bash - run: | - echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}" - echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" - echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}" - - name: Checkout PyTorch - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - submodules: recursive - path: pytorch - - name: Clean PyTorch checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: pytorch - - name: Checkout pytorch/builder - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: main - submodules: recursive - repository: pytorch/builder - path: builder - - name: Clean pytorch/builder checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: builder - - name: Populate binary env - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" - - name: Build PyTorch binary - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_build.sh" - - uses: actions/upload-artifact@v3 - if: always() - with: - name: libtorch-cuda11_3-shared-without-deps-release - retention-days: 14 - if-no-files-found: error - path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - - name: Wait until all sessions have drained - shell: powershell - working-directory: pytorch - if: always() - timeout-minutes: 120 - run: | - .github\scripts\wait_for_ssh_to_drain.ps1 - - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) - shell: powershell - working-directory: pytorch - if: always() - run: | - .github\scripts\kill_active_ssh_sessions.ps1 - libtorch-cuda11_3-shared-without-deps-release-test: # Testing - if: ${{ github.repository_owner == 'pytorch' }} - needs: libtorch-cuda11_3-shared-without-deps-release-build - runs-on: windows.8xlarge.nvidia.gpu - timeout-minutes: 240 - env: - PYTORCH_ROOT: ${{ github.workspace }}/pytorch - BUILDER_ROOT: ${{ github.workspace }}/builder - PACKAGE_TYPE: libtorch - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 - GPU_ARCH_TYPE: cuda - SKIP_ALL_TESTS: 1 - LIBTORCH_CONFIG: release - LIBTORCH_VARIANT: shared-without-deps - # This is a dummy value for libtorch to work correctly with our batch scripts - # without this value pip does not get installed for some reason - DESIRED_PYTHON: "3.7" - steps: - - name: Display EC2 information - shell: bash - run: | - set -euo pipefail - function get_ec2_metadata() { - # Pulled from instance metadata endpoint for EC2 - # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html - category=$1 - curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" - } - echo "ami-id: $(get_ec2_metadata ami-id)" - echo "instance-id: $(get_ec2_metadata instance-id)" - echo "instance-type: $(get_ec2_metadata instance-type)" - echo "system info $(uname -a)" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - # Needed for binary builds, see: https://github.com/pytorch/pytorch/issues/73339#issuecomment-1058981560 - - name: Enable long paths on Windows - shell: powershell - run: | - Set-ItemProperty -Path "HKLM:\\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1 - # Since it's just a defensive command, the workflow should continue even the command fails - - name: Disables Windows Defender scheduled and real-time scanning for files in pytorch directory. - shell: powershell - run: | - Add-MpPreference -ExclusionPath $(Get-Location).tostring() -ErrorAction Ignore - # NOTE: These environment variables are put here so that they can be applied on every job equally - # They are also here because setting them at a workflow level doesn't give us access to the - # runner.temp variable, which we need. - - name: Populate binary env - shell: bash - run: | - echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}" - echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" - echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}" - - uses: actions/download-artifact@v3 - name: Download Build Artifacts - with: - name: libtorch-cuda11_3-shared-without-deps-release - path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - - name: Checkout PyTorch - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - submodules: recursive - path: pytorch - - name: Clean PyTorch checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: pytorch - - name: Checkout pytorch/builder - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: main - submodules: recursive - repository: pytorch/builder - path: builder - - name: Clean pytorch/builder checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: builder - - name: Populate binary env - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" - - name: Test PyTorch binary - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_test.sh" - - name: Wait until all sessions have drained - shell: powershell - working-directory: pytorch - if: always() - timeout-minutes: 120 - run: | - .github\scripts\wait_for_ssh_to_drain.ps1 - - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) - shell: powershell - working-directory: pytorch - if: always() - run: | - .github\scripts\kill_active_ssh_sessions.ps1 - libtorch-cuda11_3-shared-without-deps-release-upload: # Uploading - if: ${{ github.repository_owner == 'pytorch' }} - needs: libtorch-cuda11_3-shared-without-deps-release-test - with: - PYTORCH_ROOT: ${{ github.workspace }}/pytorch - BUILDER_ROOT: ${{ github.workspace }}/builder - PACKAGE_TYPE: libtorch - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 - GPU_ARCH_TYPE: cuda - LIBTORCH_CONFIG: release - LIBTORCH_VARIANT: shared-without-deps - # This is a dummy value for libtorch to work correctly with our batch scripts - # without this value pip does not get installed for some reason - DESIRED_PYTHON: "3.7" - build_name: libtorch-cuda11_3-shared-without-deps-release - secrets: - github-token: ${{ secrets.GITHUB_TOKEN }} - aws-access-key-id: ${{ secrets.AWS_PYTORCH_UPLOADER_ACCESS_KEY_ID }} - aws-pytorch-uploader-secret-access-key: ${{ secrets.AWS_PYTORCH_UPLOADER_SECRET_ACCESS_KEY }} - conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} - uses: ./.github/workflows/_binary-upload.yml - libtorch-cuda11_3-static-with-deps-release-build: - if: ${{ github.repository_owner == 'pytorch' }} - runs-on: windows.4xlarge - timeout-minutes: 240 - env: - PYTORCH_ROOT: ${{ github.workspace }}/pytorch - BUILDER_ROOT: ${{ github.workspace }}/builder - PACKAGE_TYPE: libtorch - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 - GPU_ARCH_TYPE: cuda - SKIP_ALL_TESTS: 1 - LIBTORCH_CONFIG: release - LIBTORCH_VARIANT: static-with-deps - # This is a dummy value for libtorch to work correctly with our batch scripts - # without this value pip does not get installed for some reason - DESIRED_PYTHON: "3.7" - steps: - - name: Display EC2 information - shell: bash - run: | - set -euo pipefail - function get_ec2_metadata() { - # Pulled from instance metadata endpoint for EC2 - # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html - category=$1 - curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" - } - echo "ami-id: $(get_ec2_metadata ami-id)" - echo "instance-id: $(get_ec2_metadata instance-id)" - echo "instance-type: $(get_ec2_metadata instance-type)" - echo "system info $(uname -a)" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - # Needed for binary builds, see: https://github.com/pytorch/pytorch/issues/73339#issuecomment-1058981560 - - name: Enable long paths on Windows - shell: powershell - run: | - Set-ItemProperty -Path "HKLM:\\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1 - # Since it's just a defensive command, the workflow should continue even the command fails - - name: Disables Windows Defender scheduled and real-time scanning for files in pytorch directory. - shell: powershell - run: | - Add-MpPreference -ExclusionPath $(Get-Location).tostring() -ErrorAction Ignore - # NOTE: These environment variables are put here so that they can be applied on every job equally - # They are also here because setting them at a workflow level doesn't give us access to the - # runner.temp variable, which we need. - - name: Populate binary env - shell: bash - run: | - echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}" - echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" - echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}" - - name: Checkout PyTorch - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - submodules: recursive - path: pytorch - - name: Clean PyTorch checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: pytorch - - name: Checkout pytorch/builder - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: main - submodules: recursive - repository: pytorch/builder - path: builder - - name: Clean pytorch/builder checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: builder - - name: Populate binary env - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" - - name: Build PyTorch binary - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_build.sh" - - uses: actions/upload-artifact@v3 - if: always() - with: - name: libtorch-cuda11_3-static-with-deps-release - retention-days: 14 - if-no-files-found: error - path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - - name: Wait until all sessions have drained - shell: powershell - working-directory: pytorch - if: always() - timeout-minutes: 120 - run: | - .github\scripts\wait_for_ssh_to_drain.ps1 - - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) - shell: powershell - working-directory: pytorch - if: always() - run: | - .github\scripts\kill_active_ssh_sessions.ps1 - libtorch-cuda11_3-static-with-deps-release-test: # Testing - if: ${{ github.repository_owner == 'pytorch' }} - needs: libtorch-cuda11_3-static-with-deps-release-build - runs-on: windows.8xlarge.nvidia.gpu - timeout-minutes: 240 - env: - PYTORCH_ROOT: ${{ github.workspace }}/pytorch - BUILDER_ROOT: ${{ github.workspace }}/builder - PACKAGE_TYPE: libtorch - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 - GPU_ARCH_TYPE: cuda - SKIP_ALL_TESTS: 1 - LIBTORCH_CONFIG: release - LIBTORCH_VARIANT: static-with-deps - # This is a dummy value for libtorch to work correctly with our batch scripts - # without this value pip does not get installed for some reason - DESIRED_PYTHON: "3.7" - steps: - - name: Display EC2 information - shell: bash - run: | - set -euo pipefail - function get_ec2_metadata() { - # Pulled from instance metadata endpoint for EC2 - # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html - category=$1 - curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" - } - echo "ami-id: $(get_ec2_metadata ami-id)" - echo "instance-id: $(get_ec2_metadata instance-id)" - echo "instance-type: $(get_ec2_metadata instance-type)" - echo "system info $(uname -a)" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - # Needed for binary builds, see: https://github.com/pytorch/pytorch/issues/73339#issuecomment-1058981560 - - name: Enable long paths on Windows - shell: powershell - run: | - Set-ItemProperty -Path "HKLM:\\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1 - # Since it's just a defensive command, the workflow should continue even the command fails - - name: Disables Windows Defender scheduled and real-time scanning for files in pytorch directory. - shell: powershell - run: | - Add-MpPreference -ExclusionPath $(Get-Location).tostring() -ErrorAction Ignore - # NOTE: These environment variables are put here so that they can be applied on every job equally - # They are also here because setting them at a workflow level doesn't give us access to the - # runner.temp variable, which we need. - - name: Populate binary env - shell: bash - run: | - echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}" - echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" - echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}" - - uses: actions/download-artifact@v3 - name: Download Build Artifacts - with: - name: libtorch-cuda11_3-static-with-deps-release - path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - - name: Checkout PyTorch - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - submodules: recursive - path: pytorch - - name: Clean PyTorch checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: pytorch - - name: Checkout pytorch/builder - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: main - submodules: recursive - repository: pytorch/builder - path: builder - - name: Clean pytorch/builder checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: builder - - name: Populate binary env - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" - - name: Test PyTorch binary - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_test.sh" - - name: Wait until all sessions have drained - shell: powershell - working-directory: pytorch - if: always() - timeout-minutes: 120 - run: | - .github\scripts\wait_for_ssh_to_drain.ps1 - - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) - shell: powershell - working-directory: pytorch - if: always() - run: | - .github\scripts\kill_active_ssh_sessions.ps1 - libtorch-cuda11_3-static-with-deps-release-upload: # Uploading - if: ${{ github.repository_owner == 'pytorch' }} - needs: libtorch-cuda11_3-static-with-deps-release-test - with: - PYTORCH_ROOT: ${{ github.workspace }}/pytorch - BUILDER_ROOT: ${{ github.workspace }}/builder - PACKAGE_TYPE: libtorch - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 - GPU_ARCH_TYPE: cuda - LIBTORCH_CONFIG: release - LIBTORCH_VARIANT: static-with-deps - # This is a dummy value for libtorch to work correctly with our batch scripts - # without this value pip does not get installed for some reason - DESIRED_PYTHON: "3.7" - build_name: libtorch-cuda11_3-static-with-deps-release - secrets: - github-token: ${{ secrets.GITHUB_TOKEN }} - aws-access-key-id: ${{ secrets.AWS_PYTORCH_UPLOADER_ACCESS_KEY_ID }} - aws-pytorch-uploader-secret-access-key: ${{ secrets.AWS_PYTORCH_UPLOADER_SECRET_ACCESS_KEY }} - conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} - uses: ./.github/workflows/_binary-upload.yml - libtorch-cuda11_3-static-without-deps-release-build: - if: ${{ github.repository_owner == 'pytorch' }} - runs-on: windows.4xlarge - timeout-minutes: 240 - env: - PYTORCH_ROOT: ${{ github.workspace }}/pytorch - BUILDER_ROOT: ${{ github.workspace }}/builder - PACKAGE_TYPE: libtorch - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 - GPU_ARCH_TYPE: cuda - SKIP_ALL_TESTS: 1 - LIBTORCH_CONFIG: release - LIBTORCH_VARIANT: static-without-deps - # This is a dummy value for libtorch to work correctly with our batch scripts - # without this value pip does not get installed for some reason - DESIRED_PYTHON: "3.7" - steps: - - name: Display EC2 information - shell: bash - run: | - set -euo pipefail - function get_ec2_metadata() { - # Pulled from instance metadata endpoint for EC2 - # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html - category=$1 - curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" - } - echo "ami-id: $(get_ec2_metadata ami-id)" - echo "instance-id: $(get_ec2_metadata instance-id)" - echo "instance-type: $(get_ec2_metadata instance-type)" - echo "system info $(uname -a)" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - # Needed for binary builds, see: https://github.com/pytorch/pytorch/issues/73339#issuecomment-1058981560 - - name: Enable long paths on Windows - shell: powershell - run: | - Set-ItemProperty -Path "HKLM:\\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1 - # Since it's just a defensive command, the workflow should continue even the command fails - - name: Disables Windows Defender scheduled and real-time scanning for files in pytorch directory. - shell: powershell - run: | - Add-MpPreference -ExclusionPath $(Get-Location).tostring() -ErrorAction Ignore - # NOTE: These environment variables are put here so that they can be applied on every job equally - # They are also here because setting them at a workflow level doesn't give us access to the - # runner.temp variable, which we need. - - name: Populate binary env - shell: bash - run: | - echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}" - echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" - echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}" - - name: Checkout PyTorch - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - submodules: recursive - path: pytorch - - name: Clean PyTorch checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: pytorch - - name: Checkout pytorch/builder - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: main - submodules: recursive - repository: pytorch/builder - path: builder - - name: Clean pytorch/builder checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: builder - - name: Populate binary env - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" - - name: Build PyTorch binary - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_build.sh" - - uses: actions/upload-artifact@v3 - if: always() - with: - name: libtorch-cuda11_3-static-without-deps-release - retention-days: 14 - if-no-files-found: error - path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - - name: Wait until all sessions have drained - shell: powershell - working-directory: pytorch - if: always() - timeout-minutes: 120 - run: | - .github\scripts\wait_for_ssh_to_drain.ps1 - - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) - shell: powershell - working-directory: pytorch - if: always() - run: | - .github\scripts\kill_active_ssh_sessions.ps1 - libtorch-cuda11_3-static-without-deps-release-test: # Testing - if: ${{ github.repository_owner == 'pytorch' }} - needs: libtorch-cuda11_3-static-without-deps-release-build - runs-on: windows.8xlarge.nvidia.gpu - timeout-minutes: 240 - env: - PYTORCH_ROOT: ${{ github.workspace }}/pytorch - BUILDER_ROOT: ${{ github.workspace }}/builder - PACKAGE_TYPE: libtorch - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 - GPU_ARCH_TYPE: cuda - SKIP_ALL_TESTS: 1 - LIBTORCH_CONFIG: release - LIBTORCH_VARIANT: static-without-deps - # This is a dummy value for libtorch to work correctly with our batch scripts - # without this value pip does not get installed for some reason - DESIRED_PYTHON: "3.7" - steps: - - name: Display EC2 information - shell: bash - run: | - set -euo pipefail - function get_ec2_metadata() { - # Pulled from instance metadata endpoint for EC2 - # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html - category=$1 - curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" - } - echo "ami-id: $(get_ec2_metadata ami-id)" - echo "instance-id: $(get_ec2_metadata instance-id)" - echo "instance-type: $(get_ec2_metadata instance-type)" - echo "system info $(uname -a)" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - # Needed for binary builds, see: https://github.com/pytorch/pytorch/issues/73339#issuecomment-1058981560 - - name: Enable long paths on Windows - shell: powershell - run: | - Set-ItemProperty -Path "HKLM:\\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1 - # Since it's just a defensive command, the workflow should continue even the command fails - - name: Disables Windows Defender scheduled and real-time scanning for files in pytorch directory. - shell: powershell - run: | - Add-MpPreference -ExclusionPath $(Get-Location).tostring() -ErrorAction Ignore - # NOTE: These environment variables are put here so that they can be applied on every job equally - # They are also here because setting them at a workflow level doesn't give us access to the - # runner.temp variable, which we need. - - name: Populate binary env - shell: bash - run: | - echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}" - echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" - echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}" - - uses: actions/download-artifact@v3 - name: Download Build Artifacts - with: - name: libtorch-cuda11_3-static-without-deps-release - path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - - name: Checkout PyTorch - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - submodules: recursive - path: pytorch - - name: Clean PyTorch checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: pytorch - - name: Checkout pytorch/builder - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: main - submodules: recursive - repository: pytorch/builder - path: builder - - name: Clean pytorch/builder checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: builder - - name: Populate binary env - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" - - name: Test PyTorch binary - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_test.sh" - - name: Wait until all sessions have drained - shell: powershell - working-directory: pytorch - if: always() - timeout-minutes: 120 - run: | - .github\scripts\wait_for_ssh_to_drain.ps1 - - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) - shell: powershell - working-directory: pytorch - if: always() - run: | - .github\scripts\kill_active_ssh_sessions.ps1 - libtorch-cuda11_3-static-without-deps-release-upload: # Uploading - if: ${{ github.repository_owner == 'pytorch' }} - needs: libtorch-cuda11_3-static-without-deps-release-test - with: - PYTORCH_ROOT: ${{ github.workspace }}/pytorch - BUILDER_ROOT: ${{ github.workspace }}/builder - PACKAGE_TYPE: libtorch - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 - GPU_ARCH_TYPE: cuda - LIBTORCH_CONFIG: release - LIBTORCH_VARIANT: static-without-deps - # This is a dummy value for libtorch to work correctly with our batch scripts - # without this value pip does not get installed for some reason - DESIRED_PYTHON: "3.7" - build_name: libtorch-cuda11_3-static-without-deps-release - secrets: - github-token: ${{ secrets.GITHUB_TOKEN }} - aws-access-key-id: ${{ secrets.AWS_PYTORCH_UPLOADER_ACCESS_KEY_ID }} - aws-pytorch-uploader-secret-access-key: ${{ secrets.AWS_PYTORCH_UPLOADER_SECRET_ACCESS_KEY }} - conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} - uses: ./.github/workflows/_binary-upload.yml libtorch-cuda11_6-shared-with-deps-release-build: if: ${{ github.repository_owner == 'pytorch' }} runs-on: windows.4xlarge diff --git a/.github/workflows/generated-windows-binary-wheel-master.yml b/.github/workflows/generated-windows-binary-wheel-master.yml deleted file mode 100644 index 1cff1102c50a..000000000000 --- a/.github/workflows/generated-windows-binary-wheel-master.yml +++ /dev/null @@ -1,236 +0,0 @@ -# @generated DO NOT EDIT MANUALLY - -# Template is at: .github/templates/windows_binary_build_workflow.yml.j2 -# Generation script: .github/scripts/generate_ci_workflows.py -name: windows-binary-wheel - -on: - push: - branches: - - master - tags: - - 'ciflow/trunk/*' - workflow_dispatch: - -env: - # Needed for conda builds - ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine" - ANACONDA_USER: pytorch - AWS_DEFAULT_REGION: us-east-1 - BUILD_ENVIRONMENT: windows-binary-wheel - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - PR_NUMBER: ${{ github.event.pull_request.number }} - SHA1: ${{ github.event.pull_request.head.sha || github.sha }} - SKIP_ALL_TESTS: 1 -concurrency: - group: windows-binary-wheel-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }} - cancel-in-progress: true - -jobs: - wheel-py3_7-cuda11_3-build: - if: ${{ github.repository_owner == 'pytorch' }} - runs-on: windows.4xlarge - timeout-minutes: 240 - env: - PYTORCH_ROOT: ${{ github.workspace }}/pytorch - BUILDER_ROOT: ${{ github.workspace }}/builder - PACKAGE_TYPE: wheel - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 - GPU_ARCH_TYPE: cuda - SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.7" - steps: - - name: Display EC2 information - shell: bash - run: | - set -euo pipefail - function get_ec2_metadata() { - # Pulled from instance metadata endpoint for EC2 - # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html - category=$1 - curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" - } - echo "ami-id: $(get_ec2_metadata ami-id)" - echo "instance-id: $(get_ec2_metadata instance-id)" - echo "instance-type: $(get_ec2_metadata instance-type)" - echo "system info $(uname -a)" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - # Needed for binary builds, see: https://github.com/pytorch/pytorch/issues/73339#issuecomment-1058981560 - - name: Enable long paths on Windows - shell: powershell - run: | - Set-ItemProperty -Path "HKLM:\\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1 - # Since it's just a defensive command, the workflow should continue even the command fails - - name: Disables Windows Defender scheduled and real-time scanning for files in pytorch directory. - shell: powershell - run: | - Add-MpPreference -ExclusionPath $(Get-Location).tostring() -ErrorAction Ignore - # NOTE: These environment variables are put here so that they can be applied on every job equally - # They are also here because setting them at a workflow level doesn't give us access to the - # runner.temp variable, which we need. - - name: Populate binary env - shell: bash - run: | - echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}" - echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" - echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}" - - name: Checkout PyTorch - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - submodules: recursive - path: pytorch - - name: Clean PyTorch checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: pytorch - - name: Checkout pytorch/builder - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: main - submodules: recursive - repository: pytorch/builder - path: builder - - name: Clean pytorch/builder checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: builder - - name: Populate binary env - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" - - name: Build PyTorch binary - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_build.sh" - - uses: actions/upload-artifact@v3 - if: always() - with: - name: wheel-py3_7-cuda11_3 - retention-days: 14 - if-no-files-found: error - path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - - name: Wait until all sessions have drained - shell: powershell - working-directory: pytorch - if: always() - timeout-minutes: 120 - run: | - .github\scripts\wait_for_ssh_to_drain.ps1 - - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) - shell: powershell - working-directory: pytorch - if: always() - run: | - .github\scripts\kill_active_ssh_sessions.ps1 - wheel-py3_7-cuda11_3-test: # Testing - if: ${{ github.repository_owner == 'pytorch' }} - needs: wheel-py3_7-cuda11_3-build - runs-on: windows.8xlarge.nvidia.gpu - timeout-minutes: 240 - env: - PYTORCH_ROOT: ${{ github.workspace }}/pytorch - BUILDER_ROOT: ${{ github.workspace }}/builder - PACKAGE_TYPE: wheel - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 - GPU_ARCH_TYPE: cuda - SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.7" - steps: - - name: Display EC2 information - shell: bash - run: | - set -euo pipefail - function get_ec2_metadata() { - # Pulled from instance metadata endpoint for EC2 - # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html - category=$1 - curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" - } - echo "ami-id: $(get_ec2_metadata ami-id)" - echo "instance-id: $(get_ec2_metadata instance-id)" - echo "instance-type: $(get_ec2_metadata instance-type)" - echo "system info $(uname -a)" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - # Needed for binary builds, see: https://github.com/pytorch/pytorch/issues/73339#issuecomment-1058981560 - - name: Enable long paths on Windows - shell: powershell - run: | - Set-ItemProperty -Path "HKLM:\\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1 - # Since it's just a defensive command, the workflow should continue even the command fails - - name: Disables Windows Defender scheduled and real-time scanning for files in pytorch directory. - shell: powershell - run: | - Add-MpPreference -ExclusionPath $(Get-Location).tostring() -ErrorAction Ignore - # NOTE: These environment variables are put here so that they can be applied on every job equally - # They are also here because setting them at a workflow level doesn't give us access to the - # runner.temp variable, which we need. - - name: Populate binary env - shell: bash - run: | - echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}" - echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" - echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}" - - uses: actions/download-artifact@v3 - name: Download Build Artifacts - with: - name: wheel-py3_7-cuda11_3 - path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - - name: Checkout PyTorch - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - submodules: recursive - path: pytorch - - name: Clean PyTorch checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: pytorch - - name: Checkout pytorch/builder - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: main - submodules: recursive - repository: pytorch/builder - path: builder - - name: Clean pytorch/builder checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: builder - - name: Populate binary env - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" - - name: Test PyTorch binary - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_test.sh" - - name: Wait until all sessions have drained - shell: powershell - working-directory: pytorch - if: always() - timeout-minutes: 120 - run: | - .github\scripts\wait_for_ssh_to_drain.ps1 - - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) - shell: powershell - working-directory: pytorch - if: always() - run: | - .github\scripts\kill_active_ssh_sessions.ps1 diff --git a/.github/workflows/generated-windows-binary-wheel-nightly.yml b/.github/workflows/generated-windows-binary-wheel-nightly.yml index 7dc8eb308381..026c81e6bb58 100644 --- a/.github/workflows/generated-windows-binary-wheel-nightly.yml +++ b/.github/workflows/generated-windows-binary-wheel-nightly.yml @@ -256,7 +256,7 @@ jobs: aws-pytorch-uploader-secret-access-key: ${{ secrets.AWS_PYTORCH_UPLOADER_SECRET_ACCESS_KEY }} conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} uses: ./.github/workflows/_binary-upload.yml - wheel-py3_7-cuda11_3-build: + wheel-py3_7-cuda11_6-build: if: ${{ github.repository_owner == 'pytorch' }} runs-on: windows.4xlarge timeout-minutes: 240 @@ -266,8 +266,8 @@ jobs: PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 + DESIRED_CUDA: cu116 + GPU_ARCH_VERSION: 11.6 GPU_ARCH_TYPE: cuda SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.7" @@ -343,7 +343,7 @@ jobs: - uses: actions/upload-artifact@v3 if: always() with: - name: wheel-py3_7-cuda11_3 + name: wheel-py3_7-cuda11_6 retention-days: 14 if-no-files-found: error path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" @@ -360,9 +360,9 @@ jobs: if: always() run: | .github\scripts\kill_active_ssh_sessions.ps1 - wheel-py3_7-cuda11_3-test: # Testing + wheel-py3_7-cuda11_6-test: # Testing if: ${{ github.repository_owner == 'pytorch' }} - needs: wheel-py3_7-cuda11_3-build + needs: wheel-py3_7-cuda11_6-build runs-on: windows.8xlarge.nvidia.gpu timeout-minutes: 240 env: @@ -371,8 +371,8 @@ jobs: PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 + DESIRED_CUDA: cu116 + GPU_ARCH_VERSION: 11.6 GPU_ARCH_TYPE: cuda SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.7" @@ -417,7 +417,7 @@ jobs: - uses: actions/download-artifact@v3 name: Download Build Artifacts with: - name: wheel-py3_7-cuda11_3 + name: wheel-py3_7-cuda11_6 path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - name: Checkout PyTorch uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 @@ -463,27 +463,27 @@ jobs: if: always() run: | .github\scripts\kill_active_ssh_sessions.ps1 - wheel-py3_7-cuda11_3-upload: # Uploading + wheel-py3_7-cuda11_6-upload: # Uploading if: ${{ github.repository_owner == 'pytorch' }} - needs: wheel-py3_7-cuda11_3-test + needs: wheel-py3_7-cuda11_6-test with: PYTORCH_ROOT: ${{ github.workspace }}/pytorch BUILDER_ROOT: ${{ github.workspace }}/builder PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 + DESIRED_CUDA: cu116 + GPU_ARCH_VERSION: 11.6 GPU_ARCH_TYPE: cuda DESIRED_PYTHON: "3.7" - build_name: wheel-py3_7-cuda11_3 + build_name: wheel-py3_7-cuda11_6 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} aws-access-key-id: ${{ secrets.AWS_PYTORCH_UPLOADER_ACCESS_KEY_ID }} aws-pytorch-uploader-secret-access-key: ${{ secrets.AWS_PYTORCH_UPLOADER_SECRET_ACCESS_KEY }} conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} uses: ./.github/workflows/_binary-upload.yml - wheel-py3_7-cuda11_6-build: + wheel-py3_7-cuda11_7-build: if: ${{ github.repository_owner == 'pytorch' }} runs-on: windows.4xlarge timeout-minutes: 240 @@ -493,8 +493,8 @@ jobs: PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu116 - GPU_ARCH_VERSION: 11.6 + DESIRED_CUDA: cu117 + GPU_ARCH_VERSION: 11.7 GPU_ARCH_TYPE: cuda SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.7" @@ -570,7 +570,7 @@ jobs: - uses: actions/upload-artifact@v3 if: always() with: - name: wheel-py3_7-cuda11_6 + name: wheel-py3_7-cuda11_7 retention-days: 14 if-no-files-found: error path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" @@ -587,9 +587,9 @@ jobs: if: always() run: | .github\scripts\kill_active_ssh_sessions.ps1 - wheel-py3_7-cuda11_6-test: # Testing + wheel-py3_7-cuda11_7-test: # Testing if: ${{ github.repository_owner == 'pytorch' }} - needs: wheel-py3_7-cuda11_6-build + needs: wheel-py3_7-cuda11_7-build runs-on: windows.8xlarge.nvidia.gpu timeout-minutes: 240 env: @@ -598,8 +598,8 @@ jobs: PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu116 - GPU_ARCH_VERSION: 11.6 + DESIRED_CUDA: cu117 + GPU_ARCH_VERSION: 11.7 GPU_ARCH_TYPE: cuda SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.7" @@ -644,7 +644,7 @@ jobs: - uses: actions/download-artifact@v3 name: Download Build Artifacts with: - name: wheel-py3_7-cuda11_6 + name: wheel-py3_7-cuda11_7 path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - name: Checkout PyTorch uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 @@ -690,27 +690,27 @@ jobs: if: always() run: | .github\scripts\kill_active_ssh_sessions.ps1 - wheel-py3_7-cuda11_6-upload: # Uploading + wheel-py3_7-cuda11_7-upload: # Uploading if: ${{ github.repository_owner == 'pytorch' }} - needs: wheel-py3_7-cuda11_6-test + needs: wheel-py3_7-cuda11_7-test with: PYTORCH_ROOT: ${{ github.workspace }}/pytorch BUILDER_ROOT: ${{ github.workspace }}/builder PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu116 - GPU_ARCH_VERSION: 11.6 + DESIRED_CUDA: cu117 + GPU_ARCH_VERSION: 11.7 GPU_ARCH_TYPE: cuda DESIRED_PYTHON: "3.7" - build_name: wheel-py3_7-cuda11_6 + build_name: wheel-py3_7-cuda11_7 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} aws-access-key-id: ${{ secrets.AWS_PYTORCH_UPLOADER_ACCESS_KEY_ID }} aws-pytorch-uploader-secret-access-key: ${{ secrets.AWS_PYTORCH_UPLOADER_SECRET_ACCESS_KEY }} conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} uses: ./.github/workflows/_binary-upload.yml - wheel-py3_7-cuda11_7-build: + wheel-py3_8-cpu-build: if: ${{ github.repository_owner == 'pytorch' }} runs-on: windows.4xlarge timeout-minutes: 240 @@ -720,11 +720,10 @@ jobs: PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu117 - GPU_ARCH_VERSION: 11.7 - GPU_ARCH_TYPE: cuda + DESIRED_CUDA: cpu + GPU_ARCH_TYPE: cpu SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.7" + DESIRED_PYTHON: "3.8" steps: - name: Display EC2 information shell: bash @@ -797,7 +796,7 @@ jobs: - uses: actions/upload-artifact@v3 if: always() with: - name: wheel-py3_7-cuda11_7 + name: wheel-py3_8-cpu retention-days: 14 if-no-files-found: error path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" @@ -814,10 +813,10 @@ jobs: if: always() run: | .github\scripts\kill_active_ssh_sessions.ps1 - wheel-py3_7-cuda11_7-test: # Testing + wheel-py3_8-cpu-test: # Testing if: ${{ github.repository_owner == 'pytorch' }} - needs: wheel-py3_7-cuda11_7-build - runs-on: windows.8xlarge.nvidia.gpu + needs: wheel-py3_8-cpu-build + runs-on: windows.4xlarge timeout-minutes: 240 env: PYTORCH_ROOT: ${{ github.workspace }}/pytorch @@ -825,11 +824,10 @@ jobs: PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu117 - GPU_ARCH_VERSION: 11.7 - GPU_ARCH_TYPE: cuda + DESIRED_CUDA: cpu + GPU_ARCH_TYPE: cpu SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.7" + DESIRED_PYTHON: "3.8" steps: - name: Display EC2 information shell: bash @@ -871,7 +869,7 @@ jobs: - uses: actions/download-artifact@v3 name: Download Build Artifacts with: - name: wheel-py3_7-cuda11_7 + name: wheel-py3_8-cpu path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - name: Checkout PyTorch uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 @@ -917,27 +915,26 @@ jobs: if: always() run: | .github\scripts\kill_active_ssh_sessions.ps1 - wheel-py3_7-cuda11_7-upload: # Uploading + wheel-py3_8-cpu-upload: # Uploading if: ${{ github.repository_owner == 'pytorch' }} - needs: wheel-py3_7-cuda11_7-test + needs: wheel-py3_8-cpu-test with: PYTORCH_ROOT: ${{ github.workspace }}/pytorch BUILDER_ROOT: ${{ github.workspace }}/builder PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu117 - GPU_ARCH_VERSION: 11.7 - GPU_ARCH_TYPE: cuda - DESIRED_PYTHON: "3.7" - build_name: wheel-py3_7-cuda11_7 + DESIRED_CUDA: cpu + GPU_ARCH_TYPE: cpu + DESIRED_PYTHON: "3.8" + build_name: wheel-py3_8-cpu secrets: github-token: ${{ secrets.GITHUB_TOKEN }} aws-access-key-id: ${{ secrets.AWS_PYTORCH_UPLOADER_ACCESS_KEY_ID }} aws-pytorch-uploader-secret-access-key: ${{ secrets.AWS_PYTORCH_UPLOADER_SECRET_ACCESS_KEY }} conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} uses: ./.github/workflows/_binary-upload.yml - wheel-py3_8-cpu-build: + wheel-py3_8-cuda11_6-build: if: ${{ github.repository_owner == 'pytorch' }} runs-on: windows.4xlarge timeout-minutes: 240 @@ -947,8 +944,9 @@ jobs: PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cpu - GPU_ARCH_TYPE: cpu + DESIRED_CUDA: cu116 + GPU_ARCH_VERSION: 11.6 + GPU_ARCH_TYPE: cuda SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.8" steps: @@ -1023,7 +1021,7 @@ jobs: - uses: actions/upload-artifact@v3 if: always() with: - name: wheel-py3_8-cpu + name: wheel-py3_8-cuda11_6 retention-days: 14 if-no-files-found: error path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" @@ -1040,10 +1038,10 @@ jobs: if: always() run: | .github\scripts\kill_active_ssh_sessions.ps1 - wheel-py3_8-cpu-test: # Testing + wheel-py3_8-cuda11_6-test: # Testing if: ${{ github.repository_owner == 'pytorch' }} - needs: wheel-py3_8-cpu-build - runs-on: windows.4xlarge + needs: wheel-py3_8-cuda11_6-build + runs-on: windows.8xlarge.nvidia.gpu timeout-minutes: 240 env: PYTORCH_ROOT: ${{ github.workspace }}/pytorch @@ -1051,8 +1049,9 @@ jobs: PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cpu - GPU_ARCH_TYPE: cpu + DESIRED_CUDA: cu116 + GPU_ARCH_VERSION: 11.6 + GPU_ARCH_TYPE: cuda SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.8" steps: @@ -1096,7 +1095,7 @@ jobs: - uses: actions/download-artifact@v3 name: Download Build Artifacts with: - name: wheel-py3_8-cpu + name: wheel-py3_8-cuda11_6 path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - name: Checkout PyTorch uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 @@ -1142,26 +1141,27 @@ jobs: if: always() run: | .github\scripts\kill_active_ssh_sessions.ps1 - wheel-py3_8-cpu-upload: # Uploading + wheel-py3_8-cuda11_6-upload: # Uploading if: ${{ github.repository_owner == 'pytorch' }} - needs: wheel-py3_8-cpu-test + needs: wheel-py3_8-cuda11_6-test with: PYTORCH_ROOT: ${{ github.workspace }}/pytorch BUILDER_ROOT: ${{ github.workspace }}/builder PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cpu - GPU_ARCH_TYPE: cpu + DESIRED_CUDA: cu116 + GPU_ARCH_VERSION: 11.6 + GPU_ARCH_TYPE: cuda DESIRED_PYTHON: "3.8" - build_name: wheel-py3_8-cpu + build_name: wheel-py3_8-cuda11_6 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} aws-access-key-id: ${{ secrets.AWS_PYTORCH_UPLOADER_ACCESS_KEY_ID }} aws-pytorch-uploader-secret-access-key: ${{ secrets.AWS_PYTORCH_UPLOADER_SECRET_ACCESS_KEY }} conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} uses: ./.github/workflows/_binary-upload.yml - wheel-py3_8-cuda11_3-build: + wheel-py3_8-cuda11_7-build: if: ${{ github.repository_owner == 'pytorch' }} runs-on: windows.4xlarge timeout-minutes: 240 @@ -1171,8 +1171,8 @@ jobs: PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 + DESIRED_CUDA: cu117 + GPU_ARCH_VERSION: 11.7 GPU_ARCH_TYPE: cuda SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.8" @@ -1248,7 +1248,7 @@ jobs: - uses: actions/upload-artifact@v3 if: always() with: - name: wheel-py3_8-cuda11_3 + name: wheel-py3_8-cuda11_7 retention-days: 14 if-no-files-found: error path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" @@ -1265,9 +1265,9 @@ jobs: if: always() run: | .github\scripts\kill_active_ssh_sessions.ps1 - wheel-py3_8-cuda11_3-test: # Testing + wheel-py3_8-cuda11_7-test: # Testing if: ${{ github.repository_owner == 'pytorch' }} - needs: wheel-py3_8-cuda11_3-build + needs: wheel-py3_8-cuda11_7-build runs-on: windows.8xlarge.nvidia.gpu timeout-minutes: 240 env: @@ -1276,8 +1276,8 @@ jobs: PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 + DESIRED_CUDA: cu117 + GPU_ARCH_VERSION: 11.7 GPU_ARCH_TYPE: cuda SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.8" @@ -1322,7 +1322,7 @@ jobs: - uses: actions/download-artifact@v3 name: Download Build Artifacts with: - name: wheel-py3_8-cuda11_3 + name: wheel-py3_8-cuda11_7 path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - name: Checkout PyTorch uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 @@ -1368,27 +1368,27 @@ jobs: if: always() run: | .github\scripts\kill_active_ssh_sessions.ps1 - wheel-py3_8-cuda11_3-upload: # Uploading + wheel-py3_8-cuda11_7-upload: # Uploading if: ${{ github.repository_owner == 'pytorch' }} - needs: wheel-py3_8-cuda11_3-test + needs: wheel-py3_8-cuda11_7-test with: PYTORCH_ROOT: ${{ github.workspace }}/pytorch BUILDER_ROOT: ${{ github.workspace }}/builder PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 + DESIRED_CUDA: cu117 + GPU_ARCH_VERSION: 11.7 GPU_ARCH_TYPE: cuda DESIRED_PYTHON: "3.8" - build_name: wheel-py3_8-cuda11_3 + build_name: wheel-py3_8-cuda11_7 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} aws-access-key-id: ${{ secrets.AWS_PYTORCH_UPLOADER_ACCESS_KEY_ID }} aws-pytorch-uploader-secret-access-key: ${{ secrets.AWS_PYTORCH_UPLOADER_SECRET_ACCESS_KEY }} conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} uses: ./.github/workflows/_binary-upload.yml - wheel-py3_8-cuda11_6-build: + wheel-py3_9-cpu-build: if: ${{ github.repository_owner == 'pytorch' }} runs-on: windows.4xlarge timeout-minutes: 240 @@ -1398,11 +1398,10 @@ jobs: PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu116 - GPU_ARCH_VERSION: 11.6 - GPU_ARCH_TYPE: cuda + DESIRED_CUDA: cpu + GPU_ARCH_TYPE: cpu SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.8" + DESIRED_PYTHON: "3.9" steps: - name: Display EC2 information shell: bash @@ -1475,7 +1474,7 @@ jobs: - uses: actions/upload-artifact@v3 if: always() with: - name: wheel-py3_8-cuda11_6 + name: wheel-py3_9-cpu retention-days: 14 if-no-files-found: error path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" @@ -1492,10 +1491,10 @@ jobs: if: always() run: | .github\scripts\kill_active_ssh_sessions.ps1 - wheel-py3_8-cuda11_6-test: # Testing + wheel-py3_9-cpu-test: # Testing if: ${{ github.repository_owner == 'pytorch' }} - needs: wheel-py3_8-cuda11_6-build - runs-on: windows.8xlarge.nvidia.gpu + needs: wheel-py3_9-cpu-build + runs-on: windows.4xlarge timeout-minutes: 240 env: PYTORCH_ROOT: ${{ github.workspace }}/pytorch @@ -1503,11 +1502,10 @@ jobs: PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu116 - GPU_ARCH_VERSION: 11.6 - GPU_ARCH_TYPE: cuda + DESIRED_CUDA: cpu + GPU_ARCH_TYPE: cpu SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.8" + DESIRED_PYTHON: "3.9" steps: - name: Display EC2 information shell: bash @@ -1549,7 +1547,7 @@ jobs: - uses: actions/download-artifact@v3 name: Download Build Artifacts with: - name: wheel-py3_8-cuda11_6 + name: wheel-py3_9-cpu path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - name: Checkout PyTorch uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 @@ -1595,27 +1593,26 @@ jobs: if: always() run: | .github\scripts\kill_active_ssh_sessions.ps1 - wheel-py3_8-cuda11_6-upload: # Uploading + wheel-py3_9-cpu-upload: # Uploading if: ${{ github.repository_owner == 'pytorch' }} - needs: wheel-py3_8-cuda11_6-test + needs: wheel-py3_9-cpu-test with: PYTORCH_ROOT: ${{ github.workspace }}/pytorch BUILDER_ROOT: ${{ github.workspace }}/builder PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu116 - GPU_ARCH_VERSION: 11.6 - GPU_ARCH_TYPE: cuda - DESIRED_PYTHON: "3.8" - build_name: wheel-py3_8-cuda11_6 + DESIRED_CUDA: cpu + GPU_ARCH_TYPE: cpu + DESIRED_PYTHON: "3.9" + build_name: wheel-py3_9-cpu secrets: github-token: ${{ secrets.GITHUB_TOKEN }} aws-access-key-id: ${{ secrets.AWS_PYTORCH_UPLOADER_ACCESS_KEY_ID }} aws-pytorch-uploader-secret-access-key: ${{ secrets.AWS_PYTORCH_UPLOADER_SECRET_ACCESS_KEY }} conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} uses: ./.github/workflows/_binary-upload.yml - wheel-py3_8-cuda11_7-build: + wheel-py3_9-cuda11_6-build: if: ${{ github.repository_owner == 'pytorch' }} runs-on: windows.4xlarge timeout-minutes: 240 @@ -1625,11 +1622,11 @@ jobs: PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu117 - GPU_ARCH_VERSION: 11.7 + DESIRED_CUDA: cu116 + GPU_ARCH_VERSION: 11.6 GPU_ARCH_TYPE: cuda SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.8" + DESIRED_PYTHON: "3.9" steps: - name: Display EC2 information shell: bash @@ -1702,7 +1699,7 @@ jobs: - uses: actions/upload-artifact@v3 if: always() with: - name: wheel-py3_8-cuda11_7 + name: wheel-py3_9-cuda11_6 retention-days: 14 if-no-files-found: error path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" @@ -1719,9 +1716,9 @@ jobs: if: always() run: | .github\scripts\kill_active_ssh_sessions.ps1 - wheel-py3_8-cuda11_7-test: # Testing + wheel-py3_9-cuda11_6-test: # Testing if: ${{ github.repository_owner == 'pytorch' }} - needs: wheel-py3_8-cuda11_7-build + needs: wheel-py3_9-cuda11_6-build runs-on: windows.8xlarge.nvidia.gpu timeout-minutes: 240 env: @@ -1730,11 +1727,11 @@ jobs: PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu117 - GPU_ARCH_VERSION: 11.7 + DESIRED_CUDA: cu116 + GPU_ARCH_VERSION: 11.6 GPU_ARCH_TYPE: cuda SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.8" + DESIRED_PYTHON: "3.9" steps: - name: Display EC2 information shell: bash @@ -1776,7 +1773,7 @@ jobs: - uses: actions/download-artifact@v3 name: Download Build Artifacts with: - name: wheel-py3_8-cuda11_7 + name: wheel-py3_9-cuda11_6 path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - name: Checkout PyTorch uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 @@ -1822,27 +1819,27 @@ jobs: if: always() run: | .github\scripts\kill_active_ssh_sessions.ps1 - wheel-py3_8-cuda11_7-upload: # Uploading + wheel-py3_9-cuda11_6-upload: # Uploading if: ${{ github.repository_owner == 'pytorch' }} - needs: wheel-py3_8-cuda11_7-test + needs: wheel-py3_9-cuda11_6-test with: PYTORCH_ROOT: ${{ github.workspace }}/pytorch BUILDER_ROOT: ${{ github.workspace }}/builder PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu117 - GPU_ARCH_VERSION: 11.7 + DESIRED_CUDA: cu116 + GPU_ARCH_VERSION: 11.6 GPU_ARCH_TYPE: cuda - DESIRED_PYTHON: "3.8" - build_name: wheel-py3_8-cuda11_7 + DESIRED_PYTHON: "3.9" + build_name: wheel-py3_9-cuda11_6 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} aws-access-key-id: ${{ secrets.AWS_PYTORCH_UPLOADER_ACCESS_KEY_ID }} aws-pytorch-uploader-secret-access-key: ${{ secrets.AWS_PYTORCH_UPLOADER_SECRET_ACCESS_KEY }} conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} uses: ./.github/workflows/_binary-upload.yml - wheel-py3_9-cpu-build: + wheel-py3_9-cuda11_7-build: if: ${{ github.repository_owner == 'pytorch' }} runs-on: windows.4xlarge timeout-minutes: 240 @@ -1852,8 +1849,9 @@ jobs: PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cpu - GPU_ARCH_TYPE: cpu + DESIRED_CUDA: cu117 + GPU_ARCH_VERSION: 11.7 + GPU_ARCH_TYPE: cuda SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.9" steps: @@ -1928,7 +1926,7 @@ jobs: - uses: actions/upload-artifact@v3 if: always() with: - name: wheel-py3_9-cpu + name: wheel-py3_9-cuda11_7 retention-days: 14 if-no-files-found: error path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" @@ -1945,10 +1943,10 @@ jobs: if: always() run: | .github\scripts\kill_active_ssh_sessions.ps1 - wheel-py3_9-cpu-test: # Testing + wheel-py3_9-cuda11_7-test: # Testing if: ${{ github.repository_owner == 'pytorch' }} - needs: wheel-py3_9-cpu-build - runs-on: windows.4xlarge + needs: wheel-py3_9-cuda11_7-build + runs-on: windows.8xlarge.nvidia.gpu timeout-minutes: 240 env: PYTORCH_ROOT: ${{ github.workspace }}/pytorch @@ -1956,8 +1954,9 @@ jobs: PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cpu - GPU_ARCH_TYPE: cpu + DESIRED_CUDA: cu117 + GPU_ARCH_VERSION: 11.7 + GPU_ARCH_TYPE: cuda SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.9" steps: @@ -2001,7 +2000,7 @@ jobs: - uses: actions/download-artifact@v3 name: Download Build Artifacts with: - name: wheel-py3_9-cpu + name: wheel-py3_9-cuda11_7 path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - name: Checkout PyTorch uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 @@ -2047,26 +2046,27 @@ jobs: if: always() run: | .github\scripts\kill_active_ssh_sessions.ps1 - wheel-py3_9-cpu-upload: # Uploading + wheel-py3_9-cuda11_7-upload: # Uploading if: ${{ github.repository_owner == 'pytorch' }} - needs: wheel-py3_9-cpu-test + needs: wheel-py3_9-cuda11_7-test with: PYTORCH_ROOT: ${{ github.workspace }}/pytorch BUILDER_ROOT: ${{ github.workspace }}/builder PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cpu - GPU_ARCH_TYPE: cpu + DESIRED_CUDA: cu117 + GPU_ARCH_VERSION: 11.7 + GPU_ARCH_TYPE: cuda DESIRED_PYTHON: "3.9" - build_name: wheel-py3_9-cpu + build_name: wheel-py3_9-cuda11_7 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} aws-access-key-id: ${{ secrets.AWS_PYTORCH_UPLOADER_ACCESS_KEY_ID }} aws-pytorch-uploader-secret-access-key: ${{ secrets.AWS_PYTORCH_UPLOADER_SECRET_ACCESS_KEY }} conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} uses: ./.github/workflows/_binary-upload.yml - wheel-py3_9-cuda11_3-build: + wheel-py3_10-cpu-build: if: ${{ github.repository_owner == 'pytorch' }} runs-on: windows.4xlarge timeout-minutes: 240 @@ -2076,11 +2076,10 @@ jobs: PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 - GPU_ARCH_TYPE: cuda + DESIRED_CUDA: cpu + GPU_ARCH_TYPE: cpu SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.9" + DESIRED_PYTHON: "3.10" steps: - name: Display EC2 information shell: bash @@ -2153,7 +2152,7 @@ jobs: - uses: actions/upload-artifact@v3 if: always() with: - name: wheel-py3_9-cuda11_3 + name: wheel-py3_10-cpu retention-days: 14 if-no-files-found: error path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" @@ -2170,10 +2169,10 @@ jobs: if: always() run: | .github\scripts\kill_active_ssh_sessions.ps1 - wheel-py3_9-cuda11_3-test: # Testing + wheel-py3_10-cpu-test: # Testing if: ${{ github.repository_owner == 'pytorch' }} - needs: wheel-py3_9-cuda11_3-build - runs-on: windows.8xlarge.nvidia.gpu + needs: wheel-py3_10-cpu-build + runs-on: windows.4xlarge timeout-minutes: 240 env: PYTORCH_ROOT: ${{ github.workspace }}/pytorch @@ -2181,11 +2180,10 @@ jobs: PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 - GPU_ARCH_TYPE: cuda + DESIRED_CUDA: cpu + GPU_ARCH_TYPE: cpu SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.9" + DESIRED_PYTHON: "3.10" steps: - name: Display EC2 information shell: bash @@ -2227,7 +2225,7 @@ jobs: - uses: actions/download-artifact@v3 name: Download Build Artifacts with: - name: wheel-py3_9-cuda11_3 + name: wheel-py3_10-cpu path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - name: Checkout PyTorch uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 @@ -2273,688 +2271,9 @@ jobs: if: always() run: | .github\scripts\kill_active_ssh_sessions.ps1 - wheel-py3_9-cuda11_3-upload: # Uploading + wheel-py3_10-cpu-upload: # Uploading if: ${{ github.repository_owner == 'pytorch' }} - needs: wheel-py3_9-cuda11_3-test - with: - PYTORCH_ROOT: ${{ github.workspace }}/pytorch - BUILDER_ROOT: ${{ github.workspace }}/builder - PACKAGE_TYPE: wheel - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 - GPU_ARCH_TYPE: cuda - DESIRED_PYTHON: "3.9" - build_name: wheel-py3_9-cuda11_3 - secrets: - github-token: ${{ secrets.GITHUB_TOKEN }} - aws-access-key-id: ${{ secrets.AWS_PYTORCH_UPLOADER_ACCESS_KEY_ID }} - aws-pytorch-uploader-secret-access-key: ${{ secrets.AWS_PYTORCH_UPLOADER_SECRET_ACCESS_KEY }} - conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} - uses: ./.github/workflows/_binary-upload.yml - wheel-py3_9-cuda11_6-build: - if: ${{ github.repository_owner == 'pytorch' }} - runs-on: windows.4xlarge - timeout-minutes: 240 - env: - PYTORCH_ROOT: ${{ github.workspace }}/pytorch - BUILDER_ROOT: ${{ github.workspace }}/builder - PACKAGE_TYPE: wheel - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu116 - GPU_ARCH_VERSION: 11.6 - GPU_ARCH_TYPE: cuda - SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.9" - steps: - - name: Display EC2 information - shell: bash - run: | - set -euo pipefail - function get_ec2_metadata() { - # Pulled from instance metadata endpoint for EC2 - # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html - category=$1 - curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" - } - echo "ami-id: $(get_ec2_metadata ami-id)" - echo "instance-id: $(get_ec2_metadata instance-id)" - echo "instance-type: $(get_ec2_metadata instance-type)" - echo "system info $(uname -a)" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - # Needed for binary builds, see: https://github.com/pytorch/pytorch/issues/73339#issuecomment-1058981560 - - name: Enable long paths on Windows - shell: powershell - run: | - Set-ItemProperty -Path "HKLM:\\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1 - # Since it's just a defensive command, the workflow should continue even the command fails - - name: Disables Windows Defender scheduled and real-time scanning for files in pytorch directory. - shell: powershell - run: | - Add-MpPreference -ExclusionPath $(Get-Location).tostring() -ErrorAction Ignore - # NOTE: These environment variables are put here so that they can be applied on every job equally - # They are also here because setting them at a workflow level doesn't give us access to the - # runner.temp variable, which we need. - - name: Populate binary env - shell: bash - run: | - echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}" - echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" - echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}" - - name: Checkout PyTorch - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - submodules: recursive - path: pytorch - - name: Clean PyTorch checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: pytorch - - name: Checkout pytorch/builder - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: main - submodules: recursive - repository: pytorch/builder - path: builder - - name: Clean pytorch/builder checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: builder - - name: Populate binary env - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" - - name: Build PyTorch binary - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_build.sh" - - uses: actions/upload-artifact@v3 - if: always() - with: - name: wheel-py3_9-cuda11_6 - retention-days: 14 - if-no-files-found: error - path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - - name: Wait until all sessions have drained - shell: powershell - working-directory: pytorch - if: always() - timeout-minutes: 120 - run: | - .github\scripts\wait_for_ssh_to_drain.ps1 - - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) - shell: powershell - working-directory: pytorch - if: always() - run: | - .github\scripts\kill_active_ssh_sessions.ps1 - wheel-py3_9-cuda11_6-test: # Testing - if: ${{ github.repository_owner == 'pytorch' }} - needs: wheel-py3_9-cuda11_6-build - runs-on: windows.8xlarge.nvidia.gpu - timeout-minutes: 240 - env: - PYTORCH_ROOT: ${{ github.workspace }}/pytorch - BUILDER_ROOT: ${{ github.workspace }}/builder - PACKAGE_TYPE: wheel - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu116 - GPU_ARCH_VERSION: 11.6 - GPU_ARCH_TYPE: cuda - SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.9" - steps: - - name: Display EC2 information - shell: bash - run: | - set -euo pipefail - function get_ec2_metadata() { - # Pulled from instance metadata endpoint for EC2 - # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html - category=$1 - curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" - } - echo "ami-id: $(get_ec2_metadata ami-id)" - echo "instance-id: $(get_ec2_metadata instance-id)" - echo "instance-type: $(get_ec2_metadata instance-type)" - echo "system info $(uname -a)" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - # Needed for binary builds, see: https://github.com/pytorch/pytorch/issues/73339#issuecomment-1058981560 - - name: Enable long paths on Windows - shell: powershell - run: | - Set-ItemProperty -Path "HKLM:\\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1 - # Since it's just a defensive command, the workflow should continue even the command fails - - name: Disables Windows Defender scheduled and real-time scanning for files in pytorch directory. - shell: powershell - run: | - Add-MpPreference -ExclusionPath $(Get-Location).tostring() -ErrorAction Ignore - # NOTE: These environment variables are put here so that they can be applied on every job equally - # They are also here because setting them at a workflow level doesn't give us access to the - # runner.temp variable, which we need. - - name: Populate binary env - shell: bash - run: | - echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}" - echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" - echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}" - - uses: actions/download-artifact@v3 - name: Download Build Artifacts - with: - name: wheel-py3_9-cuda11_6 - path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - - name: Checkout PyTorch - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - submodules: recursive - path: pytorch - - name: Clean PyTorch checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: pytorch - - name: Checkout pytorch/builder - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: main - submodules: recursive - repository: pytorch/builder - path: builder - - name: Clean pytorch/builder checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: builder - - name: Populate binary env - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" - - name: Test PyTorch binary - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_test.sh" - - name: Wait until all sessions have drained - shell: powershell - working-directory: pytorch - if: always() - timeout-minutes: 120 - run: | - .github\scripts\wait_for_ssh_to_drain.ps1 - - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) - shell: powershell - working-directory: pytorch - if: always() - run: | - .github\scripts\kill_active_ssh_sessions.ps1 - wheel-py3_9-cuda11_6-upload: # Uploading - if: ${{ github.repository_owner == 'pytorch' }} - needs: wheel-py3_9-cuda11_6-test - with: - PYTORCH_ROOT: ${{ github.workspace }}/pytorch - BUILDER_ROOT: ${{ github.workspace }}/builder - PACKAGE_TYPE: wheel - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu116 - GPU_ARCH_VERSION: 11.6 - GPU_ARCH_TYPE: cuda - DESIRED_PYTHON: "3.9" - build_name: wheel-py3_9-cuda11_6 - secrets: - github-token: ${{ secrets.GITHUB_TOKEN }} - aws-access-key-id: ${{ secrets.AWS_PYTORCH_UPLOADER_ACCESS_KEY_ID }} - aws-pytorch-uploader-secret-access-key: ${{ secrets.AWS_PYTORCH_UPLOADER_SECRET_ACCESS_KEY }} - conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} - uses: ./.github/workflows/_binary-upload.yml - wheel-py3_9-cuda11_7-build: - if: ${{ github.repository_owner == 'pytorch' }} - runs-on: windows.4xlarge - timeout-minutes: 240 - env: - PYTORCH_ROOT: ${{ github.workspace }}/pytorch - BUILDER_ROOT: ${{ github.workspace }}/builder - PACKAGE_TYPE: wheel - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu117 - GPU_ARCH_VERSION: 11.7 - GPU_ARCH_TYPE: cuda - SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.9" - steps: - - name: Display EC2 information - shell: bash - run: | - set -euo pipefail - function get_ec2_metadata() { - # Pulled from instance metadata endpoint for EC2 - # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html - category=$1 - curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" - } - echo "ami-id: $(get_ec2_metadata ami-id)" - echo "instance-id: $(get_ec2_metadata instance-id)" - echo "instance-type: $(get_ec2_metadata instance-type)" - echo "system info $(uname -a)" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - # Needed for binary builds, see: https://github.com/pytorch/pytorch/issues/73339#issuecomment-1058981560 - - name: Enable long paths on Windows - shell: powershell - run: | - Set-ItemProperty -Path "HKLM:\\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1 - # Since it's just a defensive command, the workflow should continue even the command fails - - name: Disables Windows Defender scheduled and real-time scanning for files in pytorch directory. - shell: powershell - run: | - Add-MpPreference -ExclusionPath $(Get-Location).tostring() -ErrorAction Ignore - # NOTE: These environment variables are put here so that they can be applied on every job equally - # They are also here because setting them at a workflow level doesn't give us access to the - # runner.temp variable, which we need. - - name: Populate binary env - shell: bash - run: | - echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}" - echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" - echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}" - - name: Checkout PyTorch - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - submodules: recursive - path: pytorch - - name: Clean PyTorch checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: pytorch - - name: Checkout pytorch/builder - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: main - submodules: recursive - repository: pytorch/builder - path: builder - - name: Clean pytorch/builder checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: builder - - name: Populate binary env - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" - - name: Build PyTorch binary - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_build.sh" - - uses: actions/upload-artifact@v3 - if: always() - with: - name: wheel-py3_9-cuda11_7 - retention-days: 14 - if-no-files-found: error - path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - - name: Wait until all sessions have drained - shell: powershell - working-directory: pytorch - if: always() - timeout-minutes: 120 - run: | - .github\scripts\wait_for_ssh_to_drain.ps1 - - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) - shell: powershell - working-directory: pytorch - if: always() - run: | - .github\scripts\kill_active_ssh_sessions.ps1 - wheel-py3_9-cuda11_7-test: # Testing - if: ${{ github.repository_owner == 'pytorch' }} - needs: wheel-py3_9-cuda11_7-build - runs-on: windows.8xlarge.nvidia.gpu - timeout-minutes: 240 - env: - PYTORCH_ROOT: ${{ github.workspace }}/pytorch - BUILDER_ROOT: ${{ github.workspace }}/builder - PACKAGE_TYPE: wheel - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu117 - GPU_ARCH_VERSION: 11.7 - GPU_ARCH_TYPE: cuda - SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.9" - steps: - - name: Display EC2 information - shell: bash - run: | - set -euo pipefail - function get_ec2_metadata() { - # Pulled from instance metadata endpoint for EC2 - # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html - category=$1 - curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" - } - echo "ami-id: $(get_ec2_metadata ami-id)" - echo "instance-id: $(get_ec2_metadata instance-id)" - echo "instance-type: $(get_ec2_metadata instance-type)" - echo "system info $(uname -a)" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - # Needed for binary builds, see: https://github.com/pytorch/pytorch/issues/73339#issuecomment-1058981560 - - name: Enable long paths on Windows - shell: powershell - run: | - Set-ItemProperty -Path "HKLM:\\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1 - # Since it's just a defensive command, the workflow should continue even the command fails - - name: Disables Windows Defender scheduled and real-time scanning for files in pytorch directory. - shell: powershell - run: | - Add-MpPreference -ExclusionPath $(Get-Location).tostring() -ErrorAction Ignore - # NOTE: These environment variables are put here so that they can be applied on every job equally - # They are also here because setting them at a workflow level doesn't give us access to the - # runner.temp variable, which we need. - - name: Populate binary env - shell: bash - run: | - echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}" - echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" - echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}" - - uses: actions/download-artifact@v3 - name: Download Build Artifacts - with: - name: wheel-py3_9-cuda11_7 - path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - - name: Checkout PyTorch - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - submodules: recursive - path: pytorch - - name: Clean PyTorch checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: pytorch - - name: Checkout pytorch/builder - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: main - submodules: recursive - repository: pytorch/builder - path: builder - - name: Clean pytorch/builder checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: builder - - name: Populate binary env - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" - - name: Test PyTorch binary - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_test.sh" - - name: Wait until all sessions have drained - shell: powershell - working-directory: pytorch - if: always() - timeout-minutes: 120 - run: | - .github\scripts\wait_for_ssh_to_drain.ps1 - - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) - shell: powershell - working-directory: pytorch - if: always() - run: | - .github\scripts\kill_active_ssh_sessions.ps1 - wheel-py3_9-cuda11_7-upload: # Uploading - if: ${{ github.repository_owner == 'pytorch' }} - needs: wheel-py3_9-cuda11_7-test - with: - PYTORCH_ROOT: ${{ github.workspace }}/pytorch - BUILDER_ROOT: ${{ github.workspace }}/builder - PACKAGE_TYPE: wheel - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu117 - GPU_ARCH_VERSION: 11.7 - GPU_ARCH_TYPE: cuda - DESIRED_PYTHON: "3.9" - build_name: wheel-py3_9-cuda11_7 - secrets: - github-token: ${{ secrets.GITHUB_TOKEN }} - aws-access-key-id: ${{ secrets.AWS_PYTORCH_UPLOADER_ACCESS_KEY_ID }} - aws-pytorch-uploader-secret-access-key: ${{ secrets.AWS_PYTORCH_UPLOADER_SECRET_ACCESS_KEY }} - conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} - uses: ./.github/workflows/_binary-upload.yml - wheel-py3_10-cpu-build: - if: ${{ github.repository_owner == 'pytorch' }} - runs-on: windows.4xlarge - timeout-minutes: 240 - env: - PYTORCH_ROOT: ${{ github.workspace }}/pytorch - BUILDER_ROOT: ${{ github.workspace }}/builder - PACKAGE_TYPE: wheel - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cpu - GPU_ARCH_TYPE: cpu - SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.10" - steps: - - name: Display EC2 information - shell: bash - run: | - set -euo pipefail - function get_ec2_metadata() { - # Pulled from instance metadata endpoint for EC2 - # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html - category=$1 - curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" - } - echo "ami-id: $(get_ec2_metadata ami-id)" - echo "instance-id: $(get_ec2_metadata instance-id)" - echo "instance-type: $(get_ec2_metadata instance-type)" - echo "system info $(uname -a)" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - # Needed for binary builds, see: https://github.com/pytorch/pytorch/issues/73339#issuecomment-1058981560 - - name: Enable long paths on Windows - shell: powershell - run: | - Set-ItemProperty -Path "HKLM:\\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1 - # Since it's just a defensive command, the workflow should continue even the command fails - - name: Disables Windows Defender scheduled and real-time scanning for files in pytorch directory. - shell: powershell - run: | - Add-MpPreference -ExclusionPath $(Get-Location).tostring() -ErrorAction Ignore - # NOTE: These environment variables are put here so that they can be applied on every job equally - # They are also here because setting them at a workflow level doesn't give us access to the - # runner.temp variable, which we need. - - name: Populate binary env - shell: bash - run: | - echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}" - echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" - echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}" - - name: Checkout PyTorch - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - submodules: recursive - path: pytorch - - name: Clean PyTorch checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: pytorch - - name: Checkout pytorch/builder - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: main - submodules: recursive - repository: pytorch/builder - path: builder - - name: Clean pytorch/builder checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: builder - - name: Populate binary env - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" - - name: Build PyTorch binary - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_build.sh" - - uses: actions/upload-artifact@v3 - if: always() - with: - name: wheel-py3_10-cpu - retention-days: 14 - if-no-files-found: error - path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - - name: Wait until all sessions have drained - shell: powershell - working-directory: pytorch - if: always() - timeout-minutes: 120 - run: | - .github\scripts\wait_for_ssh_to_drain.ps1 - - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) - shell: powershell - working-directory: pytorch - if: always() - run: | - .github\scripts\kill_active_ssh_sessions.ps1 - wheel-py3_10-cpu-test: # Testing - if: ${{ github.repository_owner == 'pytorch' }} - needs: wheel-py3_10-cpu-build - runs-on: windows.4xlarge - timeout-minutes: 240 - env: - PYTORCH_ROOT: ${{ github.workspace }}/pytorch - BUILDER_ROOT: ${{ github.workspace }}/builder - PACKAGE_TYPE: wheel - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cpu - GPU_ARCH_TYPE: cpu - SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.10" - steps: - - name: Display EC2 information - shell: bash - run: | - set -euo pipefail - function get_ec2_metadata() { - # Pulled from instance metadata endpoint for EC2 - # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html - category=$1 - curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" - } - echo "ami-id: $(get_ec2_metadata ami-id)" - echo "instance-id: $(get_ec2_metadata instance-id)" - echo "instance-type: $(get_ec2_metadata instance-type)" - echo "system info $(uname -a)" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - # Needed for binary builds, see: https://github.com/pytorch/pytorch/issues/73339#issuecomment-1058981560 - - name: Enable long paths on Windows - shell: powershell - run: | - Set-ItemProperty -Path "HKLM:\\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1 - # Since it's just a defensive command, the workflow should continue even the command fails - - name: Disables Windows Defender scheduled and real-time scanning for files in pytorch directory. - shell: powershell - run: | - Add-MpPreference -ExclusionPath $(Get-Location).tostring() -ErrorAction Ignore - # NOTE: These environment variables are put here so that they can be applied on every job equally - # They are also here because setting them at a workflow level doesn't give us access to the - # runner.temp variable, which we need. - - name: Populate binary env - shell: bash - run: | - echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}" - echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" - echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}" - - uses: actions/download-artifact@v3 - name: Download Build Artifacts - with: - name: wheel-py3_10-cpu - path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - - name: Checkout PyTorch - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - submodules: recursive - path: pytorch - - name: Clean PyTorch checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: pytorch - - name: Checkout pytorch/builder - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: main - submodules: recursive - repository: pytorch/builder - path: builder - - name: Clean pytorch/builder checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: builder - - name: Populate binary env - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" - - name: Test PyTorch binary - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_test.sh" - - name: Wait until all sessions have drained - shell: powershell - working-directory: pytorch - if: always() - timeout-minutes: 120 - run: | - .github\scripts\wait_for_ssh_to_drain.ps1 - - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) - shell: powershell - working-directory: pytorch - if: always() - run: | - .github\scripts\kill_active_ssh_sessions.ps1 - wheel-py3_10-cpu-upload: # Uploading - if: ${{ github.repository_owner == 'pytorch' }} - needs: wheel-py3_10-cpu-test + needs: wheel-py3_10-cpu-test with: PYTORCH_ROOT: ${{ github.workspace }}/pytorch BUILDER_ROOT: ${{ github.workspace }}/builder @@ -2971,233 +2290,6 @@ jobs: aws-pytorch-uploader-secret-access-key: ${{ secrets.AWS_PYTORCH_UPLOADER_SECRET_ACCESS_KEY }} conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} uses: ./.github/workflows/_binary-upload.yml - wheel-py3_10-cuda11_3-build: - if: ${{ github.repository_owner == 'pytorch' }} - runs-on: windows.4xlarge - timeout-minutes: 240 - env: - PYTORCH_ROOT: ${{ github.workspace }}/pytorch - BUILDER_ROOT: ${{ github.workspace }}/builder - PACKAGE_TYPE: wheel - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 - GPU_ARCH_TYPE: cuda - SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.10" - steps: - - name: Display EC2 information - shell: bash - run: | - set -euo pipefail - function get_ec2_metadata() { - # Pulled from instance metadata endpoint for EC2 - # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html - category=$1 - curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" - } - echo "ami-id: $(get_ec2_metadata ami-id)" - echo "instance-id: $(get_ec2_metadata instance-id)" - echo "instance-type: $(get_ec2_metadata instance-type)" - echo "system info $(uname -a)" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - # Needed for binary builds, see: https://github.com/pytorch/pytorch/issues/73339#issuecomment-1058981560 - - name: Enable long paths on Windows - shell: powershell - run: | - Set-ItemProperty -Path "HKLM:\\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1 - # Since it's just a defensive command, the workflow should continue even the command fails - - name: Disables Windows Defender scheduled and real-time scanning for files in pytorch directory. - shell: powershell - run: | - Add-MpPreference -ExclusionPath $(Get-Location).tostring() -ErrorAction Ignore - # NOTE: These environment variables are put here so that they can be applied on every job equally - # They are also here because setting them at a workflow level doesn't give us access to the - # runner.temp variable, which we need. - - name: Populate binary env - shell: bash - run: | - echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}" - echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" - echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}" - - name: Checkout PyTorch - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - submodules: recursive - path: pytorch - - name: Clean PyTorch checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: pytorch - - name: Checkout pytorch/builder - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: main - submodules: recursive - repository: pytorch/builder - path: builder - - name: Clean pytorch/builder checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: builder - - name: Populate binary env - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" - - name: Build PyTorch binary - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_build.sh" - - uses: actions/upload-artifact@v3 - if: always() - with: - name: wheel-py3_10-cuda11_3 - retention-days: 14 - if-no-files-found: error - path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - - name: Wait until all sessions have drained - shell: powershell - working-directory: pytorch - if: always() - timeout-minutes: 120 - run: | - .github\scripts\wait_for_ssh_to_drain.ps1 - - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) - shell: powershell - working-directory: pytorch - if: always() - run: | - .github\scripts\kill_active_ssh_sessions.ps1 - wheel-py3_10-cuda11_3-test: # Testing - if: ${{ github.repository_owner == 'pytorch' }} - needs: wheel-py3_10-cuda11_3-build - runs-on: windows.8xlarge.nvidia.gpu - timeout-minutes: 240 - env: - PYTORCH_ROOT: ${{ github.workspace }}/pytorch - BUILDER_ROOT: ${{ github.workspace }}/builder - PACKAGE_TYPE: wheel - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 - GPU_ARCH_TYPE: cuda - SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.10" - steps: - - name: Display EC2 information - shell: bash - run: | - set -euo pipefail - function get_ec2_metadata() { - # Pulled from instance metadata endpoint for EC2 - # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html - category=$1 - curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" - } - echo "ami-id: $(get_ec2_metadata ami-id)" - echo "instance-id: $(get_ec2_metadata instance-id)" - echo "instance-type: $(get_ec2_metadata instance-type)" - echo "system info $(uname -a)" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - # Needed for binary builds, see: https://github.com/pytorch/pytorch/issues/73339#issuecomment-1058981560 - - name: Enable long paths on Windows - shell: powershell - run: | - Set-ItemProperty -Path "HKLM:\\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1 - # Since it's just a defensive command, the workflow should continue even the command fails - - name: Disables Windows Defender scheduled and real-time scanning for files in pytorch directory. - shell: powershell - run: | - Add-MpPreference -ExclusionPath $(Get-Location).tostring() -ErrorAction Ignore - # NOTE: These environment variables are put here so that they can be applied on every job equally - # They are also here because setting them at a workflow level doesn't give us access to the - # runner.temp variable, which we need. - - name: Populate binary env - shell: bash - run: | - echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}" - echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" - echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}" - - uses: actions/download-artifact@v3 - name: Download Build Artifacts - with: - name: wheel-py3_10-cuda11_3 - path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - - name: Checkout PyTorch - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - submodules: recursive - path: pytorch - - name: Clean PyTorch checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: pytorch - - name: Checkout pytorch/builder - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: main - submodules: recursive - repository: pytorch/builder - path: builder - - name: Clean pytorch/builder checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: builder - - name: Populate binary env - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" - - name: Test PyTorch binary - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_test.sh" - - name: Wait until all sessions have drained - shell: powershell - working-directory: pytorch - if: always() - timeout-minutes: 120 - run: | - .github\scripts\wait_for_ssh_to_drain.ps1 - - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) - shell: powershell - working-directory: pytorch - if: always() - run: | - .github\scripts\kill_active_ssh_sessions.ps1 - wheel-py3_10-cuda11_3-upload: # Uploading - if: ${{ github.repository_owner == 'pytorch' }} - needs: wheel-py3_10-cuda11_3-test - with: - PYTORCH_ROOT: ${{ github.workspace }}/pytorch - BUILDER_ROOT: ${{ github.workspace }}/builder - PACKAGE_TYPE: wheel - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 - GPU_ARCH_TYPE: cuda - DESIRED_PYTHON: "3.10" - build_name: wheel-py3_10-cuda11_3 - secrets: - github-token: ${{ secrets.GITHUB_TOKEN }} - aws-access-key-id: ${{ secrets.AWS_PYTORCH_UPLOADER_ACCESS_KEY_ID }} - aws-pytorch-uploader-secret-access-key: ${{ secrets.AWS_PYTORCH_UPLOADER_SECRET_ACCESS_KEY }} - conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} - uses: ./.github/workflows/_binary-upload.yml wheel-py3_10-cuda11_6-build: if: ${{ github.repository_owner == 'pytorch' }} runs-on: windows.4xlarge diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index eb39618619e7..aa386b783264 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -30,7 +30,11 @@ jobs: **/.github/requirements-gha-cache.txt - name: Install lintrunner - run: pip install lintrunner==0.9.2 + uses: nick-fields/retry@7d4a37704547a311dbb66ebdf5b23ec19374a767 + with: + timeout_minutes: 5 + max_attempts: 3 + command: pip install lintrunner==0.9.2 - name: Initialize lint dependencies run: lintrunner init diff --git a/.github/workflows/periodic.yml b/.github/workflows/periodic.yml index 2a2448104421..3305dc0d0987 100644 --- a/.github/workflows/periodic.yml +++ b/.github/workflows/periodic.yml @@ -162,6 +162,12 @@ jobs: with: build-environment: win-vs2019-cuda11.7-py3 cuda-version: "11.7" + test-matrix: | + { include: [ + { config: "default", shard: 1, num_shards: 2, runner: "windows.8xlarge.nvidia.gpu" }, + { config: "default", shard: 2, num_shards: 2, runner: "windows.8xlarge.nvidia.gpu" }, + { config: "force_on_cpu", shard: 1, num_shards: 1, runner: "windows.4xlarge" }, + ]} win-vs2019-cuda11_7-py3-test: name: win-vs2019-cuda11.7-py3 @@ -170,12 +176,7 @@ jobs: with: build-environment: win-vs2019-cuda11.7-py3 cuda-version: "11.7" - test-matrix: | - { include: [ - { config: "default", shard: 1, num_shards: 2, runner: "windows.8xlarge.nvidia.gpu" }, - { config: "default", shard: 2, num_shards: 2, runner: "windows.8xlarge.nvidia.gpu" }, - { config: "force_on_cpu", shard: 1, num_shards: 1, runner: "windows.4xlarge" }, - ]} + test-matrix: ${{ needs.win-vs2019-cuda11_7-py3-build.outputs.test-matrix }} ios-12-5-1-x86-64-coreml: name: ios-12-5-1-x86-64-coreml diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml index 58a4706897be..6b9e184afc26 100644 --- a/.github/workflows/pull.yml +++ b/.github/workflows/pull.yml @@ -25,7 +25,8 @@ jobs: { include: [ { config: "default", shard: 1, num_shards: 2, runner: "linux.2xlarge" }, { config: "default", shard: 2, num_shards: 2, runner: "linux.2xlarge" }, - { config: "distributed", shard: 1, num_shards: 1, runner: "linux.2xlarge" }, + { config: "distributed", shard: 1, num_shards: 2, runner: "linux.2xlarge" }, + { config: "distributed", shard: 2, num_shards: 2, runner: "linux.2xlarge" }, { config: "functorch", shard: 1, num_shards: 1, runner: "linux.2xlarge" }, { config: "docs_test", shard: 1, num_shards: 1, runner: "linux.2xlarge" }, { config: "jit_legacy", shard: 1, num_shards: 1, runner: "linux.2xlarge" }, @@ -237,6 +238,12 @@ jobs: with: build-environment: win-vs2019-cpu-py3 cuda-version: cpu + test-matrix: | + { include: [ + { config: "default", shard: 1, num_shards: 2, runner: "windows.4xlarge" }, + { config: "default", shard: 2, num_shards: 2, runner: "windows.4xlarge" }, + { config: "functorch", shard: 1, num_shards: 1, runner: "windows.4xlarge" }, + ]} win-vs2019-cpu-py3-test: name: win-vs2019-cpu-py3 @@ -245,12 +252,7 @@ jobs: with: build-environment: win-vs2019-cpu-py3 cuda-version: cpu - test-matrix: | - { include: [ - { config: "default", shard: 1, num_shards: 2, runner: "windows.4xlarge" }, - { config: "default", shard: 2, num_shards: 2, runner: "windows.4xlarge" }, - { config: "functorch", shard: 1, num_shards: 1, runner: "windows.4xlarge" }, - ]} + test-matrix: ${{ needs.win-vs2019-cpu-py3-build.outputs.test-matrix }} win-vs2019-cuda11_6-py3-build: if: github.event_name == 'pull_request' @@ -260,6 +262,16 @@ jobs: build-environment: win-vs2019-cuda11.6-py3 cuda-version: "11.6" sync-tag: win-cuda-build + test-matrix: | + { include: [ + { config: "default", shard: 1, num_shards: 5, runner: "windows.8xlarge.nvidia.gpu" }, + { config: "default", shard: 2, num_shards: 5, runner: "windows.8xlarge.nvidia.gpu" }, + { config: "default", shard: 3, num_shards: 5, runner: "windows.8xlarge.nvidia.gpu" }, + { config: "default", shard: 4, num_shards: 5, runner: "windows.8xlarge.nvidia.gpu" }, + { config: "default", shard: 5, num_shards: 5, runner: "windows.8xlarge.nvidia.gpu" }, + { config: "functorch", shard: 1, num_shards: 1, runner: "windows.8xlarge.nvidia.gpu" }, + { config: "force_on_cpu", shard: 1, num_shards: 1, runner: "windows.4xlarge" }, + ]} linux-bionic-cuda11_6-py3_10-gcc7-bazel-test: name: linux-bionic-cuda11.6-py3.10-gcc7-bazel-test diff --git a/.github/workflows/push_nightly_docker_ghcr.yml b/.github/workflows/push_nightly_docker_ghcr.yml index bdcc6e05dc59..3eb204db3fa3 100644 --- a/.github/workflows/push_nightly_docker_ghcr.yml +++ b/.github/workflows/push_nightly_docker_ghcr.yml @@ -28,7 +28,7 @@ jobs: - uses: nick-fields/retry@7d4a37704547a311dbb66ebdf5b23ec19374a767 name: Build and upload nightly docker with: - timeout_minutes: 10 + timeout_minutes: 30 max_attempts: 3 command: | set -ex diff --git a/.github/workflows/run_torchbench.yml b/.github/workflows/run_torchbench.yml index f9c3039fc4f8..9a46a23af5bf 100644 --- a/.github/workflows/run_torchbench.yml +++ b/.github/workflows/run_torchbench.yml @@ -10,6 +10,8 @@ env: PR_BODY: ${{ github.event.pull_request.body }} PR_BASE_SHA: ${{ github.event.pull_request.base.sha }} PR_HEAD_SHA: ${{ github.event.pull_request.head.sha }} + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_OSSCI_METRICS_V2_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_OSSCI_METRICS_V2_SECRET_ACCESS_KEY }} jobs: run-torchbench: @@ -39,7 +41,7 @@ jobs: # pin cmake version to 3.22 since 3.23 breaks pytorch build # see details at: https://github.com/pytorch/pytorch/issues/74985 conda install -y numpy="${NUMPY_VERSION}" requests ninja pyyaml mkl mkl-include \ - setuptools cmake=3.22 cffi typing_extensions \ + setuptools cmake=3.22 cffi typing_extensions boto3 \ future six dataclasses pillow pytest tabulate gitpython git-lfs tqdm psutil - name: Setup TorchBench branch run: | @@ -78,6 +80,13 @@ jobs: --pr-num "$PR_NUM" \ --pr-base-sha "$PR_MERGE_BASE" \ --pr-head-sha "$PR_HEAD_SHA" + - name: Upload result to S3 + run: | + . "${HOME}"/anaconda3/etc/profile.d/conda.sh + conda activate pr-ci + python3 pytorch/.github/scripts/run_torchbench.py \ + upload-s3 \ + --result-dir "${HOME}/.torchbench/bisection/pr${{ github.event.number }}" - name: Remove conda environment and cleanup run: | conda env remove --name pr-ci diff --git a/.github/workflows/trunk.yml b/.github/workflows/trunk.yml index 4e29526d438a..e03f7f5b9051 100644 --- a/.github/workflows/trunk.yml +++ b/.github/workflows/trunk.yml @@ -141,6 +141,12 @@ jobs: xcode-version: "13.3.1" runner-type: macos-12-xl build-generates-artifacts: true + test-matrix: | + { include: [ + { config: "default", shard: 1, num_shards: 2, runner: "macos-12" }, + { config: "default", shard: 2, num_shards: 2, runner: "macos-12" }, + { config: "functorch", shard: 1, num_shards: 1, runner: "macos-12" }, + ]} secrets: MACOS_SCCACHE_S3_ACCESS_KEY_ID: ${{ secrets.MACOS_SCCACHE_S3_ACCESS_KEY_ID }} MACOS_SCCACHE_S3_SECRET_ACCESS_KEY: ${{ secrets.MACOS_SCCACHE_S3_SECRET_ACCESS_KEY }} @@ -151,12 +157,7 @@ jobs: needs: macos-12-py3-x86-64-build with: build-environment: macos-12-py3-x86-64 - test-matrix: | - { include: [ - { config: "default", shard: 1, num_shards: 2, runner: "macos-12" }, - { config: "default", shard: 2, num_shards: 2, runner: "macos-12" }, - { config: "functorch", shard: 1, num_shards: 1, runner: "macos-12" }, - ]} + test-matrix: ${{ needs.macos-12-py3-x86-64-build.outputs.test-matrix }} arch: x86_64 secrets: AWS_OSSCI_METRICS_V2_ACCESS_KEY_ID: ${{ secrets.AWS_OSSCI_METRICS_V2_ACCESS_KEY_ID }} @@ -185,6 +186,11 @@ jobs: build-generates-artifacts: true # To match the one pre-installed in the m1 runners python_version: 3.9.12 + test-matrix: | + { include: [ + { config: "default", shard: 1, num_shards: 2, runner: "macos-m1-12" }, + { config: "default", shard: 2, num_shards: 2, runner: "macos-m1-12" }, + ]} secrets: MACOS_SCCACHE_S3_ACCESS_KEY_ID: ${{ secrets.MACOS_SCCACHE_S3_ACCESS_KEY_ID }} MACOS_SCCACHE_S3_SECRET_ACCESS_KEY: ${{ secrets.MACOS_SCCACHE_S3_SECRET_ACCESS_KEY }} @@ -193,6 +199,7 @@ jobs: name: macos-12-py3-arm64-mps uses: ./.github/workflows/_mac-test-mps.yml needs: macos-12-py3-arm64-build + if: needs.macos-12-py3-arm64-build.outputs.build-outcome == 'success' with: sync-tag: macos-12-py3-arm64-mps-test build-environment: macos-12-py3-arm64 @@ -203,11 +210,7 @@ jobs: needs: macos-12-py3-arm64-build with: build-environment: macos-12-py3-arm64 - test-matrix: | - { include: [ - { config: "default", shard: 1, num_shards: 2, runner: "macos-m1-12" }, - { config: "default", shard: 2, num_shards: 2, runner: "macos-m1-12" }, - ]} + test-matrix: ${{ needs.macos-12-py3-arm64-build.outputs.test-matrix }} arch: arm64 secrets: AWS_OSSCI_METRICS_V2_ACCESS_KEY_ID: ${{ secrets.AWS_OSSCI_METRICS_V2_ACCESS_KEY_ID }} @@ -220,14 +223,6 @@ jobs: build-environment: win-vs2019-cuda11.6-py3 cuda-version: "11.6" sync-tag: win-cuda-build - - win-vs2019-cuda11_6-py3-test: - name: win-vs2019-cuda11.6-py3 - uses: ./.github/workflows/_win-test.yml - needs: win-vs2019-cuda11_6-py3-build - with: - build-environment: win-vs2019-cuda11.6-py3 - cuda-version: "11.6" test-matrix: | { include: [ { config: "default", shard: 1, num_shards: 5, runner: "windows.8xlarge.nvidia.gpu" }, @@ -239,6 +234,15 @@ jobs: { config: "force_on_cpu", shard: 1, num_shards: 1, runner: "windows.4xlarge" }, ]} + win-vs2019-cuda11_6-py3-test: + name: win-vs2019-cuda11.6-py3 + uses: ./.github/workflows/_win-test.yml + needs: win-vs2019-cuda11_6-py3-build + with: + build-environment: win-vs2019-cuda11.6-py3 + cuda-version: "11.6" + test-matrix: ${{ needs.win-vs2019-cuda11_6-py3-build.outputs.test-matrix }} + linux-focal-rocm5_2-py3_7-build: name: linux-focal-rocm5.2-py3.7 uses: ./.github/workflows/_linux-build.yml diff --git a/.github/workflows/trymerge.yml b/.github/workflows/trymerge.yml index 9ba29af66002..7b2c4336e73c 100644 --- a/.github/workflows/trymerge.yml +++ b/.github/workflows/trymerge.yml @@ -38,8 +38,13 @@ jobs: ON_GREEN: ${{ github.event.client_payload.on_green}} LAND_CHECKS: ${{ github.event.client_payload.land_checks }} COMMENT_ID: ${{ github.event.client_payload.comment_id }} + REBASE: ${{ github.event.client_payload.rebase }} run: | set -ex + if [ -n "${REBASE}" ]; then + python3 .github/scripts/tryrebase.py "${PR_NUM}" --branch "${REBASE}" + git checkout master + fi if [ -n "${FORCE}" ]; then if [ -n "${COMMENT_ID}" ]; then python3 .github/scripts/trymerge.py --force --comment-id "${COMMENT_ID}" "${PR_NUM}" diff --git a/.gitmodules b/.gitmodules index 32c0c205948a..282746ed0b53 100644 --- a/.gitmodules +++ b/.gitmodules @@ -151,3 +151,6 @@ [submodule "third_party/VulkanMemoryAllocator"] path = third_party/VulkanMemoryAllocator url = https://github.com/GPUOpen-LibrariesAndSDKs/VulkanMemoryAllocator.git +[submodule "third_party/cutlass"] + path = third_party/cutlass + url = https://github.com/NVIDIA/cutlass.git diff --git a/.jenkins/pytorch/build.sh b/.jenkins/pytorch/build.sh index a215459fcc7e..e808d83cbcf9 100755 --- a/.jenkins/pytorch/build.sh +++ b/.jenkins/pytorch/build.sh @@ -68,8 +68,13 @@ fi pip_install -r requirements.txt || true # Enable LLVM dependency for TensorExpr testing -export USE_LLVM=/opt/llvm -export LLVM_DIR=/opt/llvm/lib/cmake/llvm +if [[ "$BUILD_ENVIRONMENT" == *rocm* ]]; then + export USE_LLVM=/opt/rocm/llvm + export LLVM_DIR=/opt/rocm/llvm/lib/cmake/llvm +else + export USE_LLVM=/opt/llvm + export LLVM_DIR=/opt/llvm/lib/cmake/llvm +fi # TODO: Don't install this here if ! which conda; then @@ -146,9 +151,9 @@ if [[ "$BUILD_ENVIRONMENT" == *rocm* ]]; then fi if [[ -n "$CI" && -z "$PYTORCH_ROCM_ARCH" ]]; then - # Set ROCM_ARCH to gfx900 and gfx906 for CI builds, if user doesn't override. - echo "Limiting PYTORCH_ROCM_ARCH to gfx90[06] for CI builds" - export PYTORCH_ROCM_ARCH="gfx900;gfx906" + # Set ROCM_ARCH to gfx906 for CI builds, if user doesn't override. + echo "Limiting PYTORCH_ROCM_ARCH to gfx906 for CI builds" + export PYTORCH_ROCM_ARCH="gfx906" fi # hipify sources diff --git a/.jenkins/pytorch/common_utils.sh b/.jenkins/pytorch/common_utils.sh index 7b592d57c280..61a7cb36178b 100644 --- a/.jenkins/pytorch/common_utils.sh +++ b/.jenkins/pytorch/common_utils.sh @@ -141,12 +141,6 @@ function checkout_install_torchdynamo() { popd } -function install_functorch() { - pushd functorch - time python setup.py develop - popd -} - function test_functorch() { python test/run_test.py --functorch --verbose } diff --git a/.jenkins/pytorch/macos-common.sh b/.jenkins/pytorch/macos-common.sh index 4df378d505ec..319e88e40aa8 100755 --- a/.jenkins/pytorch/macos-common.sh +++ b/.jenkins/pytorch/macos-common.sh @@ -28,7 +28,7 @@ else numpy=1.18.5 \ pyyaml=5.3 \ setuptools=46.0.0 \ - cmake=3.19 \ + cmake=3.22.1 \ cffi \ ninja \ typing_extensions \ diff --git a/.jenkins/pytorch/macos-test.sh b/.jenkins/pytorch/macos-test.sh index a30e16ba942e..244c9dda7fc1 100755 --- a/.jenkins/pytorch/macos-test.sh +++ b/.jenkins/pytorch/macos-test.sh @@ -177,7 +177,6 @@ test_dynamo() { } if [[ "${TEST_CONFIG}" == *functorch* ]]; then - install_functorch test_functorch elif [[ $NUM_TEST_SHARDS -gt 1 ]]; then test_python_shard "${SHARD_NUMBER}" diff --git a/.jenkins/pytorch/test.sh b/.jenkins/pytorch/test.sh index 43e6119d4401..231a47bcc9f5 100755 --- a/.jenkins/pytorch/test.sh +++ b/.jenkins/pytorch/test.sh @@ -180,9 +180,6 @@ test_dynamo_shard() { echo "NUM_TEST_SHARDS must be defined to run a Python test shard" exit 1 fi - pushd functorch - python setup.py develop - popd # Temporarily disable test_fx for dynamo pending the investigation on TTS # regression in https://github.com/pytorch/torchdynamo/issues/784 time python test/run_test.py \ @@ -197,8 +194,8 @@ test_dynamo_shard() { test_reductions \ test_namedtensor \ test_namedtuple_return_api \ - test_profiler \ - test_profiler_tree \ + profiler/test_profiler \ + profiler/test_profiler_tree \ test_overrides \ test_python_dispatch \ test_fx \ @@ -332,6 +329,14 @@ test_libtorch() { test_aot_compilation() { echo "Testing Ahead of Time compilation" + ln -sf "$TORCH_LIB_DIR"/libc10* "$TORCH_BIN_DIR" + ln -sf "$TORCH_LIB_DIR"/libtorch* "$TORCH_BIN_DIR" + + # Make test_reports directory + # NB: the ending test_libtorch must match the current function name for the current + # test reporting process (in print_test_stats.py) to function as expected. + TEST_REPORTS_DIR=test/test-reports/cpp-unittest/test_aot_compilation + mkdir -p $TEST_REPORTS_DIR if [ -f "$TORCH_BIN_DIR"/test_mobile_nnc ]; then "$TORCH_BIN_DIR"/test_mobile_nnc --gtest_output=xml:$TEST_REPORTS_DIR/test_mobile_nnc.xml; fi # shellcheck source=test/mobile/nnc/test_aot_compile.sh if [ -f "$TORCH_BIN_DIR"/aot_model_compiler_test ]; then source test/mobile/nnc/test_aot_compile.sh; fi @@ -686,7 +691,6 @@ elif [[ "${BUILD_ENVIRONMENT}" == *-mobile-lightweight-dispatch* ]]; then elif [[ "${TEST_CONFIG}" = docs_test ]]; then test_docs_test elif [[ "${TEST_CONFIG}" == *functorch* ]]; then - install_functorch test_functorch else install_torchvision diff --git a/.jenkins/pytorch/win-test-helpers/build_pytorch.bat b/.jenkins/pytorch/win-test-helpers/build_pytorch.bat index 7edeca96ed8d..65ba7ef3235c 100644 --- a/.jenkins/pytorch/win-test-helpers/build_pytorch.bat +++ b/.jenkins/pytorch/win-test-helpers/build_pytorch.bat @@ -144,7 +144,7 @@ python setup.py install --cmake && sccache --show-stats && ( if "%BUILD_ENVIRONMENT%"=="" ( echo NOTE: To run `import torch`, please make sure to activate the conda environment by running `call %CONDA_PARENT_DIR%\Miniconda3\Scripts\activate.bat %CONDA_PARENT_DIR%\Miniconda3` in Command Prompt before running Git Bash. ) else ( - 7z a %TMP_DIR_WIN%\%IMAGE_COMMIT_TAG%.7z %CONDA_PARENT_DIR%\Miniconda3\Lib\site-packages\torch %CONDA_PARENT_DIR%\Miniconda3\Lib\site-packages\torchgen %CONDA_PARENT_DIR%\Miniconda3\Lib\site-packages\caffe2 && copy /Y "%TMP_DIR_WIN%\%IMAGE_COMMIT_TAG%.7z" "%PYTORCH_FINAL_PACKAGE_DIR%\" + 7z a %TMP_DIR_WIN%\%IMAGE_COMMIT_TAG%.7z %CONDA_PARENT_DIR%\Miniconda3\Lib\site-packages\torch %CONDA_PARENT_DIR%\Miniconda3\Lib\site-packages\torchgen %CONDA_PARENT_DIR%\Miniconda3\Lib\site-packages\caffe2 %CONDA_PARENT_DIR%\Miniconda3\Lib\site-packages\functorch && copy /Y "%TMP_DIR_WIN%\%IMAGE_COMMIT_TAG%.7z" "%PYTORCH_FINAL_PACKAGE_DIR%\" if errorlevel 1 exit /b if not errorlevel 0 exit /b diff --git a/.jenkins/pytorch/win-test-helpers/install_test_functorch.bat b/.jenkins/pytorch/win-test-helpers/install_test_functorch.bat index 7679bffbc70e..d06d46f3dd22 100644 --- a/.jenkins/pytorch/win-test-helpers/install_test_functorch.bat +++ b/.jenkins/pytorch/win-test-helpers/install_test_functorch.bat @@ -6,15 +6,6 @@ if not errorlevel 0 ( exit /b ) -pushd functorch -echo "Install functorch" -:: --no-deps because for some reason, on windows, `torch` isn't found in -:: `pip list` despite being installed. With just `python setup.py develop`, -:: setuptools explicitly checks for the existence of torch and can't find it. -python setup.py develop --no-deps -popd -if ERRORLEVEL 1 goto fail - echo "Installing test dependencies" pip install networkx if errorlevel 1 exit /b diff --git a/.lintrunner.toml b/.lintrunner.toml index b2fa676f8e13..33980c3cbc9f 100644 --- a/.lintrunner.toml +++ b/.lintrunner.toml @@ -318,6 +318,7 @@ exclude_patterns = [ 'aten/src/ATen/native/vulkan/api/vk_mem_alloc.h', 'test/cpp/jit/upgrader_models/*.ptl', 'test/cpp/jit/upgrader_models/*.ptl.ff', + 'cmake/External/nccl.patch', ] command = [ 'python3', @@ -347,6 +348,7 @@ exclude_patterns = [ 'test/cpp/jit/upgrader_models/*.ptl', 'test/cpp/jit/upgrader_models/*.ptl.ff', '.lintrunner.toml', + 'cmake/External/nccl.patch', ] command = [ 'python3', diff --git a/BUILD.bazel b/BUILD.bazel index dd417c413a6b..2c00e0d1dc56 100644 --- a/BUILD.bazel +++ b/BUILD.bazel @@ -133,6 +133,7 @@ filegroup( name = "aten_base_cpp", srcs = glob([ "aten/src/ATen/*.cpp", + "aten/src/ATen/functorch/*.cpp", "aten/src/ATen/detail/*.cpp", "aten/src/ATen/cpu/*.cpp", ]), diff --git a/CMakeLists.txt b/CMakeLists.txt index 379fa2fd7c7e..3800fe238cd6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -355,6 +355,8 @@ option(USE_PER_OPERATOR_HEADERS "Whether ATen should generate separate headers f cmake_dependent_option( BUILD_LAZY_TS_BACKEND "Build the lazy Torchscript backend, not compatible with mobile builds" ON "NOT INTERN_BUILD_MOBILE" OFF) +cmake_dependent_option( + BUILD_FUNCTORCH "Build Functorch" ON "BUILD_PYTHON" OFF) if(USE_CCACHE) @@ -572,6 +574,22 @@ if(ANDROID OR IOS OR DEFINED ENV{BUILD_PYTORCH_MOBILE_WITH_HOST_TOOLCHAIN}) message(WARNING "INTERN_BUILD_MOBILE is on, disabling BUILD_LAZY_TS_BACKEND") set(BUILD_LAZY_TS_BACKEND OFF) + # Set -ffunction-sections and -fdata-sections so that each method has its own + # text section. This allows the linker to remove unused section when the flag + # -Wl,-gc-sections is provided at link time. + string(APPEND CMAKE_CXX_FLAGS " -ffunction-sections") + string(APPEND CMAKE_C_FLAGS " -ffunction-sections") + string(APPEND CMAKE_CXX_FLAGS " -fdata-sections") + string(APPEND CMAKE_C_FLAGS " -fdata-sections") + + # Please note that the use of the following flags is required when linking + # against libtorch_cpu.a for mobile builds. + # -Wl,--whole-archive -ltorch_cpu -Wl,--no-whole-archive + # + # This allows global constructors to be included and run. Global + # constructors are used for operator/kernel registration with the + # PyTorch Dispatcher. + if(DEFINED ENV{BUILD_PYTORCH_MOBILE_WITH_HOST_TOOLCHAIN}) # C10_MOBILE is derived from Android/iOS toolchain macros in # c10/macros/Macros.h, so it needs to be explicitly set here. @@ -590,6 +608,10 @@ endif() # INTERN_BUILD_ATEN_OPS is used to control whether to build ATen/TH operators. set(INTERN_BUILD_ATEN_OPS ON) +if(NOT DEFINED USE_BLAS) + set(USE_BLAS ON) +endif() + # Build libtorch mobile library, which contains ATen/TH ops and native support for # TorchScript model, but doesn't contain not-yet-unified caffe2 ops; if(INTERN_BUILD_MOBILE) @@ -602,13 +624,18 @@ if(INTERN_BUILD_MOBILE) set(INTERN_DISABLE_AUTOGRAD ON) endif() set(BUILD_PYTHON OFF) + set(BUILD_FUNCTORCH OFF) set(BUILD_CAFFE2_OPS OFF) set(USE_DISTRIBUTED OFF) set(NO_API ON) set(USE_FBGEMM OFF) set(USE_QNNPACK OFF) set(INTERN_DISABLE_ONNX ON) - set(INTERN_USE_EIGEN_BLAS ON) + if(USE_BLAS) + set(INTERN_USE_EIGEN_BLAS ON) + else() + set(INTERN_USE_EIGEN_BLAS OFF) + endif() # Disable developing mobile interpreter for actual mobile build. # Enable it elsewhere to capture build error. set(INTERN_DISABLE_MOBILE_INTERP ON) @@ -697,6 +724,13 @@ set(BUILD_ONEDNN_GRAPH OFF) include(cmake/Dependencies.cmake) +# Moved this cmake set option down here because CMAKE_CUDA_COMPILER_VERSION is not avaialble until now +option(USE_FLASH_ATTENTION "Whether to build the flash_attention kernel for scaled dot product attention" OFF) +if(USE_FLASH_ATTENTION) + ADD_DEFINITIONS(-DUSE_FLASH_ATTENTION) +ENDIF() + + if(USE_CUDA AND (CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 10.2) AND (CMAKE_HOST_SYSTEM_NAME MATCHES "Windows")) # CUDA < 10.2 doesn't support compiling and extracting header dependencies in # one call, so instead CMake calls nvcc twice with && in between. @@ -1144,3 +1178,7 @@ caffe2_print_configuration_summary() if(USE_DEPLOY) add_subdirectory(torch/csrc/deploy) endif() + +if(BUILD_FUNCTORCH) + add_subdirectory(functorch) +endif() diff --git a/MANIFEST.in b/MANIFEST.in index acf4c7291f43..403b90b702df 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -26,5 +26,6 @@ recursive-include benchmarks *.* recursive-include scripts *.* recursive-include mypy_plugins *.* recursive-include modules *.* +recursive-include functorch *.* prune */__pycache__ global-exclude *.o *.so *.dylib *.a .git *.pyc *.swp diff --git a/aten/src/ATen/BatchedTensorImpl.cpp b/aten/src/ATen/BatchedTensorImpl.cpp index d5ab588de53d..fdedfa7c6316 100644 --- a/aten/src/ATen/BatchedTensorImpl.cpp +++ b/aten/src/ATen/BatchedTensorImpl.cpp @@ -17,7 +17,7 @@ BatchedTensorImpl::BatchedTensorImpl(Tensor value, BatchDims bdims) { TORCH_INTERNAL_ASSERT(value_.defined()); set_storage_access_should_throw(); - set_sizes_strides_policy(SizesStridesPolicy::CustomStrides); + set_custom_sizes_strides(SizesStridesPolicy::CustomStrides); checkInvariants(); const auto public_dims = value_.dim() - bdims_.size(); diff --git a/aten/src/ATen/CMakeLists.txt b/aten/src/ATen/CMakeLists.txt index 286d59f3e97d..3055e290094d 100644 --- a/aten/src/ATen/CMakeLists.txt +++ b/aten/src/ATen/CMakeLists.txt @@ -56,8 +56,8 @@ if(NOT BUILD_CAFFE2 AND NOT BUILD_LITE_INTERPRETER) EXCLUDE(ATen_CORE_TEST_SRCS "${ATen_CORE_TEST_SRCS}" ${ATen_CORE_EXCLUDED_TEST_SRCS}) endif() -file(GLOB base_h "*.h" "detail/*.h" "cpu/*.h" "cpu/vec/vec512/*.h" "cpu/vec/vec256/*.h" "cpu/vec/*.h" "quantized/*.h") -file(GLOB base_cpp "*.cpp" "detail/*.cpp" "cpu/*.cpp") +file(GLOB base_h "*.h" "detail/*.h" "cpu/*.h" "cpu/vec/vec512/*.h" "cpu/vec/vec256/*.h" "cpu/vec/*.h" "quantized/*.h" "functorch/*.h") +file(GLOB base_cpp "*.cpp" "detail/*.cpp" "cpu/*.cpp" "functorch/*.cpp") file(GLOB cuda_h "cuda/*.h" "cuda/detail/*.h" "cuda/*.cuh" "cuda/detail/*.cuh") file(GLOB cuda_cpp "cuda/*.cpp" "cuda/detail/*.cpp") file(GLOB cuda_nvrtc_stub_h "cuda/nvrtc_stub/*.h") @@ -130,15 +130,13 @@ file(GLOB native_cuda_h "native/cuda/*.h" "native/cuda/*.cuh") file(GLOB native_cuda_linalg_cpp "native/cuda/linalg/*.cpp") file(GLOB native_hip_h "native/hip/*.h" "native/hip/*.cuh") file(GLOB native_cudnn_cpp "native/cudnn/*.cpp") -file(GLOB native_nested_cuda_cu "native/nested/cuda/*.cu") -file(GLOB native_nested_cuda_cpp "native/nested/cuda/*.cpp") file(GLOB native_sparse_cuda_cu "native/sparse/cuda/*.cu") file(GLOB native_sparse_cuda_cpp "native/sparse/cuda/*.cpp") file(GLOB native_quantized_cuda_cu "native/quantized/cuda/*.cu") file(GLOB native_quantized_cuda_cpp "native/quantized/cuda/*.cpp") file(GLOB native_quantized_cudnn_cpp "native/quantized/cudnn/*.cpp") -file(GLOB native_transformers_cuda_cu "native/transformers/cuda/*.cu") -file(GLOB native_transformers_cuda_cpp "native/transformers/cuda/*.cpp") +file(GLOB native_nested_cuda_cu "native/nested/cuda/*.cu") +file(GLOB native_nested_cuda_cpp "native/nested/cuda/*.cpp") file(GLOB native_hip_hip "native/hip/*.hip") file(GLOB native_hip_cpp "native/hip/*.cpp") @@ -151,11 +149,22 @@ file(GLOB native_sparse_hip_hip "native/sparse/hip/*.hip") file(GLOB native_sparse_hip_cpp "native/sparse/hip/*.cpp") file(GLOB native_quantized_hip_hip "native/quantized/hip/*.hip") file(GLOB native_quantized_hip_cpp "native/quantized/hip/*.cpp") +file(GLOB native_transformers_cuda_cu "native/transformers/cuda/*.cu") +file(GLOB native_transformers_cuda_cpp "native/transformers/cuda/*.cpp") file(GLOB native_transformers_hip_hip "native/transformers/hip/*.hip") file(GLOB native_transformers_hip_cpp "native/transformers/hip/*.cpp") file(GLOB native_quantized_cudnn_hip_cpp "native/quantized/cudnn/hip/*.cpp") file(GLOB native_utils_cpp "native/utils/*.cpp") +# flash_attention sources +file(GLOB flash_attention_cuda_cu "native/transformers/cuda/flash_attn/*.cu") +file(GLOB flash_attention_cuda_cpp "native/transformers/cuda/flash_attn/*.cpp") + +if(USE_FLASH_ATTENTION) + list(APPEND native_transformers_cuda_cu ${flash_attention_cuda_cu}) + list(APPEND native_transformers_cuda_cpp ${flash_attention_cuda_cpp}) +endif() + # XNNPACK file(GLOB native_xnnpack "native/xnnpack/*.cpp") @@ -415,6 +424,9 @@ if(NOT MSVC AND NOT EMSCRIPTEN AND NOT INTERN_BUILD_MOBILE) endif() if(USE_CUDA AND NOT USE_ROCM) + if(USE_FLASH_ATTENTION) + list(APPEND ATen_CUDA_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/../../../third_party/cutlass/include) + endif() if($ENV{ATEN_STATIC_CUDA}) list(APPEND ATen_CUDA_DEPENDENCY_LIBS ${CUDA_LIBRARIES} diff --git a/aten/src/ATen/EmptyTensor.cpp b/aten/src/ATen/EmptyTensor.cpp index ff91aa0bd14d..daf0b6842365 100644 --- a/aten/src/ATen/EmptyTensor.cpp +++ b/aten/src/ATen/EmptyTensor.cpp @@ -106,6 +106,35 @@ size_t computeStorageNbytes( #endif } +// not including mobile-only macros in this function, +// since mobile shouldn't be using symints. +SymInt computeStorageNbytes( + SymIntArrayRef sizes, + SymIntArrayRef strides, + SymInt itemsize_bytes, + SymInt storage_offset + ) { + TORCH_CHECK( + sizes.size() == strides.size(), + "dimensionality of sizes (", + sizes.size(), + ") must match dimensionality of strides (", + strides.size(), + ")"); + + // size of the underlying storage is 1 bigger than the offset + // of the last element according to stride + SymInt size = 1; + for (const auto i : c10::irange(sizes.size())) { + if (sizes[i] == 0) { + return 0; + } + + size += strides[i] * (sizes[i] - 1); + } + return itemsize_bytes * (storage_offset + size); +} + TensorBase empty_generic( IntArrayRef size, c10::Allocator* allocator, @@ -140,20 +169,20 @@ TensorBase empty_generic( return tensor; } -TensorBase empty_strided_generic( - IntArrayRef size, - IntArrayRef stride, +template +TensorBase _empty_strided_generic( + T size, + T stride, c10::Allocator* allocator, c10::DispatchKeySet ks, ScalarType scalar_type) { at::detail::check_size_nonnegative(size); at::detail::raise_warning_for_complex_half(scalar_type); caffe2::TypeMeta dtype = scalarTypeToTypeMeta(scalar_type); - size_t size_bytes = computeStorageNbytes(size, stride, dtype.itemsize()); + auto size_bytes = computeStorageNbytes(size, stride, dtype.itemsize()); auto storage_impl = c10::make_intrusive( c10::StorageImpl::use_byte_size_t(), size_bytes, - allocator->allocate(size_bytes), allocator, /*resizeable=*/true); @@ -163,6 +192,24 @@ TensorBase empty_strided_generic( return tensor; } +TensorBase empty_strided_generic( + IntArrayRef size, + IntArrayRef stride, + c10::Allocator* allocator, + c10::DispatchKeySet ks, + ScalarType scalar_type) { + return _empty_strided_generic(size, stride, allocator, ks, scalar_type); +} + +TensorBase empty_strided_symint_generic( + SymIntArrayRef size, + SymIntArrayRef stride, + c10::Allocator* allocator, + c10::DispatchKeySet ks, + ScalarType scalar_type) { + return _empty_strided_generic(size, stride, allocator, ks, scalar_type); +} + TensorBase empty_cpu(IntArrayRef size, ScalarType dtype, bool pin_memory, c10::optional memory_format_opt) { auto allocator = GetCPUAllocatorMaybePinned(pin_memory); @@ -303,9 +350,7 @@ TensorBase empty_symint_meta( auto scalar_type = dtype_or_default(dtype_opt); auto *allocator = GetAllocator(kMeta); constexpr c10::DispatchKeySet meta_dks(c10::DispatchKey::Meta); - // TODO: do this. Note that naive implementation will choke on truly - // unknown sizes without on the fly reasoning - // at::detail::check_size_nonnegative(size); + at::detail::check_size_nonnegative(size); at::detail::raise_warning_for_complex_half(scalar_type); caffe2::TypeMeta dtype = scalarTypeToTypeMeta(scalar_type); SymInt size_bytes = dtype.itemsize(); @@ -343,7 +388,7 @@ TensorBase empty_symint_meta( TORCH_CHECK(0, "other memory format not implemented yet"); } - tensor.unsafeGetTensorImpl()->set_sym_sizes_and_strides(size, strides); + tensor.unsafeGetTensorImpl()->set_sizes_and_strides(size, strides); return tensor; } @@ -395,4 +440,40 @@ TensorBase empty_strided_meta( options.pinned_memory_opt()); } +TensorBase empty_strided_symint_meta(SymIntArrayRef size, SymIntArrayRef stride, + ScalarType dtype) { + auto *allocator = GetAllocator(kMeta); + constexpr c10::DispatchKeySet meta_dks(c10::DispatchKey::Meta); + return at::detail::empty_strided_symint_generic( + size, stride, allocator, meta_dks, dtype); +} + +TensorBase empty_strided_symint_meta( + SymIntArrayRef size, + SymIntArrayRef stride, + c10::optional dtype_opt, + c10::optional layout_opt, + c10::optional device_opt, + c10::optional pin_memory_opt) { + auto device = device_or_default(device_opt); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(device.type() == DeviceType::Meta); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(layout_or_default(layout_opt) == Layout::Strided); + + auto dtype = dtype_or_default(dtype_opt); + return at::detail::empty_strided_symint_meta(size, stride, dtype); +} + +TensorBase empty_strided_symint_meta( + SymIntArrayRef size, + SymIntArrayRef stride, + const TensorOptions &options) { + return at::detail::empty_strided_symint_meta( + size, + stride, + optTypeMetaToScalarType(options.dtype_opt()), + options.layout_opt(), + options.device_opt(), + options.pinned_memory_opt()); +} + }} // namespace at::detail diff --git a/aten/src/ATen/EmptyTensor.h b/aten/src/ATen/EmptyTensor.h index 06a33601a154..969eeb6dc5ee 100644 --- a/aten/src/ATen/EmptyTensor.h +++ b/aten/src/ATen/EmptyTensor.h @@ -4,7 +4,8 @@ namespace at { namespace detail { -inline void check_size_nonnegative(IntArrayRef size) { +template +inline void check_size_nonnegative(ArrayRefType size) { for (auto x : size) { TORCH_CHECK( x >= 0, @@ -24,6 +25,11 @@ TORCH_API size_t computeStorageNbytes( IntArrayRef strides, size_t itemsize, size_t storage_offset = 0); +TORCH_API SymInt computeStorageNbytes( + SymIntArrayRef sizes, + SymIntArrayRef strides, + SymInt itemsize, + SymInt storage_offset = 0); TORCH_API TensorBase empty_generic( IntArrayRef size, @@ -39,6 +45,13 @@ TORCH_API TensorBase empty_strided_generic( c10::DispatchKeySet ks, ScalarType scalar_type); +TORCH_API TensorBase empty_strided_symint_generic( + SymIntArrayRef size, + SymIntArrayRef stride, + c10::Allocator* allocator, + c10::DispatchKeySet ks, + ScalarType scalar_type); + TORCH_API TensorBase empty_cpu( IntArrayRef size, ScalarType dtype, @@ -113,5 +126,23 @@ TORCH_API TensorBase empty_strided_meta( IntArrayRef stride, const TensorOptions& options); +TORCH_API TensorBase empty_strided_symint_meta( + SymIntArrayRef size, + SymIntArrayRef stride, + ScalarType dtype); + +TORCH_API TensorBase empty_strided_symint_meta( + SymIntArrayRef size, + SymIntArrayRef stride, + c10::optional dtype_opt, + c10::optional layout_opt, + c10::optional device_opt, + c10::optional pin_memory_opt); + +TORCH_API TensorBase empty_strided_symint_meta( + SymIntArrayRef size, + SymIntArrayRef stride, + const TensorOptions& options); + } // namespace detail } // namespace at diff --git a/aten/src/ATen/FunctionalInverses.cpp b/aten/src/ATen/FunctionalInverses.cpp index 41c4e22a33de..6ae5f3b927c7 100644 --- a/aten/src/ATen/FunctionalInverses.cpp +++ b/aten/src/ATen/FunctionalInverses.cpp @@ -228,6 +228,11 @@ Tensor FunctionalInverses::transpose_copy_int_inverse(const Tensor& base, const } } +Tensor FunctionalInverses::_nested_view_from_buffer_copy_inverse(const Tensor& base, const Tensor& mutated_view, bool reapply_views, const Tensor& nested_size_tensor, const Tensor& nested_stride_tensor, IntArrayRef offsets) { + TORCH_INTERNAL_ASSERT(false, "Attempted to call _nested_view_from_buffer() during the functionalization pass. For now, nested tensors aren't supported during functionalization"); + return Tensor(); +} + Tensor FunctionalInverses::unsqueeze_copy_inverse(const Tensor& base, const Tensor& mutated_view, bool reapply_views, int64_t dim) { if (reapply_views) { return at::squeeze(mutated_view, dim); diff --git a/aten/src/ATen/FunctionalStorageImpl.cpp b/aten/src/ATen/FunctionalStorageImpl.cpp index 7f136759ef6a..e50ffbdcf511 100644 --- a/aten/src/ATen/FunctionalStorageImpl.cpp +++ b/aten/src/ATen/FunctionalStorageImpl.cpp @@ -1,5 +1,6 @@ #include +#include #include #include #include @@ -90,10 +91,24 @@ bool Alias::apply_updates() { return any_updates; } +c10::SymInt get_nbytes(const Tensor& value) { + if (value.unsafeGetTensorImpl()->has_symbolic_sizes_strides()) { + // Today, the two implementations of SymInt are in Python (proxy tensor), + // and lazy tensor (LTC/XLA). + // LTC hasn't implemented SymInt support yet though (torch::lazy::SymIntNodeImpl). + // Once it does, we should remove this check. + if (value.key_set().has(c10::DispatchKey::Python)) { + return value.storage().sym_nbytes(); + } + } + // XLA storage objects also do not properly track nbytes. + return at::detail::computeStorageNbytes(value.sizes(), value.strides(), value.dtype().itemsize(), value.storage_offset()); +} + FunctionalStorageImpl::FunctionalStorageImpl(const Tensor& value) : c10::StorageImpl( c10::StorageImpl::use_byte_size_t(), - value.numel() * value.dtype().itemsize(), + get_nbytes(value), DataPtr{nullptr, value.device()}, GetAllocator(kMeta), /*resizeable=*/true diff --git a/aten/src/ATen/FunctionalTensorWrapper.cpp b/aten/src/ATen/FunctionalTensorWrapper.cpp index 0692982ec467..2c60d3e77ba4 100644 --- a/aten/src/ATen/FunctionalTensorWrapper.cpp +++ b/aten/src/ATen/FunctionalTensorWrapper.cpp @@ -49,6 +49,9 @@ void FunctionalTensorWrapper::set_constructor_metadata() { // Instead, it's sufficient to remove the `Dense` dispatch key, // which prevents us from accidentally trying to directly run a CPU/CUDA kernel. key_set_ = key_set_.remove(c10::DispatchKey::Dense); + // We override a bunch of _custom(), so make sure they get called + // TODO: metadata copying may not actually be necessary then + set_custom_sizes_strides(SizesStridesPolicy::CustomSizes); } FunctionalTensorWrapper::FunctionalTensorWrapper(const Tensor& value) @@ -343,12 +346,12 @@ int64_t FunctionalTensorWrapper::numel_custom() const { bool FunctionalTensorWrapper::is_contiguous_custom(at::MemoryFormat memory_format) const { return value_.unsafeGetTensorImpl()->is_contiguous(); } -c10::SymIntArrayRef FunctionalTensorWrapper::sym_sizes() const { - return value_.unsafeGetTensorImpl()->sym_sizes(); -} c10::SymIntArrayRef FunctionalTensorWrapper::sym_sizes_custom() const { return value_.unsafeGetTensorImpl()->sym_sizes(); } +c10::SymIntArrayRef FunctionalTensorWrapper::sym_strides_custom() const { + return value_.unsafeGetTensorImpl()->sym_strides(); +} namespace functionalization { namespace impl { diff --git a/aten/src/ATen/FunctionalTensorWrapper.h b/aten/src/ATen/FunctionalTensorWrapper.h index c5c0339fc1bf..cf389715795a 100644 --- a/aten/src/ATen/FunctionalTensorWrapper.h +++ b/aten/src/ATen/FunctionalTensorWrapper.h @@ -134,15 +134,15 @@ struct TORCH_API FunctionalTensorWrapper : public c10::TensorImpl { ~FunctionalTensorWrapper() override = default; // FunctionalTensorWrapper overrides all custom size/stride function, - // so that if the inner tensor has a custo implementation + // so that if the inner tensor has a custom implementation // we make sure to call that implementation. at::IntArrayRef sizes_custom() const override; at::IntArrayRef strides_custom() const override; int64_t dim_custom() const override; int64_t numel_custom() const override; bool is_contiguous_custom(at::MemoryFormat memory_format) const override; - c10::SymIntArrayRef sym_sizes() const override; c10::SymIntArrayRef sym_sizes_custom() const override; + c10::SymIntArrayRef sym_strides_custom() const override; private: const char* tensorimpl_type_name() const override; diff --git a/aten/src/ATen/InferSize.h b/aten/src/ATen/InferSize.h index e0bedb751bf2..594b87373a20 100644 --- a/aten/src/ATen/InferSize.h +++ b/aten/src/ATen/InferSize.h @@ -2,6 +2,8 @@ #include #include +#include +#include #include #include #include @@ -14,9 +16,13 @@ namespace at { // templated to handle std::vector and DimVector use cases, see // below // -template -inline void infer_size_impl(IntArrayRef shape, int64_t numel, ResultVec& res) { - int64_t newsize = 1; +template +inline void infer_size_impl( + InputArrayRef shape, + NumelType numel, + ResultVec& res) { + NumelType newsize = 1; + // N.B. this is an index, not a sym dim! auto infer_dim = c10::optional(); for (int64_t dim = 0, ndim = shape.size(); dim != ndim; dim++) { if (shape[dim] == -1) { @@ -69,4 +75,13 @@ inline at::DimVector infer_size_dv(IntArrayRef shape, int64_t numel) { return res; } +inline at::SymDimVector infer_size_dv( + c10::SymIntArrayRef shape, + c10::SymInt numel) { + auto res = at::SymDimVector(shape); + infer_size_impl( + shape, numel, res); + return res; +} + } // namespace at diff --git a/aten/src/ATen/NestedTensorImpl.cpp b/aten/src/ATen/NestedTensorImpl.cpp index 1d3efc6f06bf..fb89f46d86b7 100644 --- a/aten/src/ATen/NestedTensorImpl.cpp +++ b/aten/src/ATen/NestedTensorImpl.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include @@ -25,6 +26,46 @@ inline void validate_nested_tensor_metadata( (size_dim == 0 && (int64_t)offsets.empty()) || (size_dim == 2 && nested_sizes.size(0) == (int64_t)offsets.size())); } + +/** + * Generates a nested key_set from a non-nested tensor. + * + * When creating a nested tensor from a non-nested tensor + * We want to maintain the same keyset as the buffer but + * swap non nested keys for nested ones + * + * @return Appropriate key set for nested tensor + */ +inline c10::DispatchKeySet generate_nested_key_set_from_buffer( + const at::Tensor& buffer) { + auto nested_key_set = buffer.key_set(); + const bool has_autograd = nested_key_set.has_any(c10::autograd_dispatch_keyset); + // Remove non_nested tensor specific keys + nested_key_set = nested_key_set - + c10::DispatchKeySet{c10::DispatchKey::Dense, c10::DispatchKey::Autograd}; + + // Add nested tensor specific keys + nested_key_set = + nested_key_set | c10::DispatchKeySet{c10::DispatchKey::NestedTensor}; + nested_key_set = + has_autograd ? nested_key_set | c10::autograd_nested : nested_key_set; + return nested_key_set; +} + +/** + * Generates a the correct view keyset. + * + * When creating a nested tensor view of base + * The appropriate keyset will be dependent on the nested + * status of the base + * + * @return Appropriate key set for nested tensor + */ +c10::DispatchKeySet get_view_key_set(const at::Tensor& base) { + return base.is_nested() ? base.key_set() + : generate_nested_key_set_from_buffer(base); +} + } // namespace namespace at { namespace native { @@ -119,19 +160,6 @@ inline std::vector construct_offsets(const at::Tensor& sizes) { return offsets; } -// [Note: Nested Tensor Autograd] The Nested Tensor key is a functionality -// key and therefore getAutogradRelatedKeySetFromBackend will return the -// wrong autograd key. For this specific impl we make sure to register the -// correct Autograd key which is AutogradNestedTensor -c10::DispatchKeySet generate_nested_key_set(at::Tensor buffer) { - c10::DispatchKeySet key_set = - c10::DispatchKeySet(DispatchKey::NestedTensor) | c10::DispatchKeySet{buffer.key_set().highestBackendKey()}; - - // Add AutogradNestedTensor specific keys - key_set = key_set | inplace_or_view_ks | autograd_nested; - return key_set; -} - NestedTensorImpl::NestedTensorImpl( Storage storage, c10::DispatchKeySet key_set, @@ -154,7 +182,7 @@ NestedTensorImpl::NestedTensorImpl( storage_device); validate_nested_tensor_metadata(nested_size_tensor_, nested_stride_tensor_, offsets_); refresh_dim(); - set_sizes_strides_policy(c10::TensorImpl::SizesStridesPolicy::CustomSizes); + set_custom_sizes_strides(c10::TensorImpl::SizesStridesPolicy::CustomSizes); } NestedTensorImpl::NestedTensorImpl( @@ -164,7 +192,7 @@ NestedTensorImpl::NestedTensorImpl( std::vector&& offsets) : NestedTensorImpl( buffer.storage(), - generate_nested_key_set(buffer), + generate_nested_key_set_from_buffer(buffer), buffer.dtype(), nested_size_tensor, nested_stride_tensor, @@ -195,15 +223,14 @@ NestedTensorImpl::NestedTensorImpl( at::Tensor nested_size_tensor, at::Tensor nested_stride_tensor, std::vector&& offsets) - : TensorImpl(impl_type, Storage(base_tensor.storage()), base_tensor.key_set(), base_tensor.dtype()), + : TensorImpl(impl_type, Storage(base_tensor.storage()), get_view_key_set(base_tensor), base_tensor.dtype()), nested_size_tensor_(std::move(nested_size_tensor)), nested_stride_tensor_(std::move(nested_stride_tensor)), offsets_(std::move(offsets)), opt_sizes_(construct_opt_sizes(nested_size_tensor_)) { - TORCH_INTERNAL_ASSERT(base_tensor.is_nested()); validate_nested_tensor_metadata(nested_size_tensor_, nested_stride_tensor_, offsets_); refresh_dim(); - set_sizes_strides_policy(c10::TensorImpl::SizesStridesPolicy::CustomSizes); + set_custom_sizes_strides(c10::TensorImpl::SizesStridesPolicy::CustomSizes); } void NestedTensorImpl::refresh_dim() { @@ -256,9 +283,6 @@ c10::SymIntArrayRef NestedTensorImpl::sym_sizes_custom() const { TORCH_CHECK(false, "Internal error: NestedTensorImpl doesn't support sizes. Please file an issue on https://github.com/pytorch/nestedtensor"); } -c10::SymIntArrayRef NestedTensorImpl::sym_sizes() const { - return sym_sizes_custom(); -} c10::SymIntArrayRef NestedTensorImpl::sym_strides_custom() const { TORCH_CHECK(false, "Internal error: NestedTensorImpl doesn't support strides. Please file an issue on https://github.com/pytorch/nestedtensor"); } diff --git a/aten/src/ATen/NestedTensorImpl.h b/aten/src/ATen/NestedTensorImpl.h index f1fb8273c290..278df3c0d203 100644 --- a/aten/src/ATen/NestedTensorImpl.h +++ b/aten/src/ATen/NestedTensorImpl.h @@ -109,7 +109,6 @@ struct TORCH_API NestedTensorImpl : public c10::TensorImpl { } IntArrayRef sizes_custom() const override; c10::SymIntArrayRef sym_sizes_custom() const override; - c10::SymIntArrayRef sym_sizes() const override; IntArrayRef strides_custom() const override; c10::SymIntArrayRef sym_strides_custom() const override; @@ -168,7 +167,7 @@ struct TORCH_API NestedTensorImpl : public c10::TensorImpl { * is generated and redispatched to a non-nested kernel this function * generates the key set used by that buffer tensor * - * @return A newly constructed view tensor + * @return Appropriate key set for non-nested tensor */ inline c10::DispatchKeySet generate_buffer_key_set() const { auto buffer_key_set = this->key_set(); @@ -185,6 +184,7 @@ struct TORCH_API NestedTensorImpl : public c10::TensorImpl { buffer_key_set = Autograd ? c10::DispatchKeySet{c10::DispatchKey::Autograd} | buffer_key_set : buffer_key_set; + return buffer_key_set; } }; diff --git a/aten/src/ATen/OpaqueTensorImpl.h b/aten/src/ATen/OpaqueTensorImpl.h index 1888c65725ee..e6c6413815bb 100644 --- a/aten/src/ATen/OpaqueTensorImpl.h +++ b/aten/src/ATen/OpaqueTensorImpl.h @@ -30,7 +30,7 @@ struct TORCH_API OpaqueTensorImpl : public TensorImpl { : TensorImpl(key_set, data_type, device), opaque_handle_(std::move(opaque_handle)) { set_storage_access_should_throw(); - set_sizes_strides_policy(SizesStridesPolicy::CustomStrides); + set_custom_sizes_strides(SizesStridesPolicy::CustomStrides); sizes_and_strides_.set_sizes(sizes); refresh_numel(); is_non_overlapping_and_dense_ = is_non_overlapping_and_dense; diff --git a/aten/src/ATen/SparseCsrTensorImpl.cpp b/aten/src/ATen/SparseCsrTensorImpl.cpp index 69fc013211f9..4adc602b14ce 100644 --- a/aten/src/ATen/SparseCsrTensorImpl.cpp +++ b/aten/src/ATen/SparseCsrTensorImpl.cpp @@ -68,7 +68,7 @@ SparseCsrTensorImpl::SparseCsrTensorImpl( "to https://github.com/pytorch/pytorch/issues."); set_storage_access_should_throw(); is_non_overlapping_and_dense_ = false; - set_sizes_strides_policy(SizesStridesPolicy::CustomStrides); + set_custom_sizes_strides(SizesStridesPolicy::CustomStrides); // TODO: If this check ever shows up as a bottleneck, which is unlikely given that // comparing devices only involves comparing the type and index (two integers), we // can move this to a DEBUG only assert. Until then this confirms and maintains a @@ -104,10 +104,51 @@ void SparseCsrTensorImpl::resize_(int64_t nnz, IntArrayRef size) { sizes_and_strides_.set_sizes(size); } +void SparseCsrTensorImpl::resize_and_clear_(int64_t sparse_dim, IntArrayRef size) { + TORCH_CHECK( + !has_symbolic_sizes_strides_, + "resize_as_sparse_csr_tensor_ called on tensor with symbolic shape"); + TORCH_CHECK(sparse_dim >= 2, "resize_and_clear_ sparse dimensionality must be at least 2, got ", sparse_dim); + TORCH_CHECK(static_cast(size.size()) >= sparse_dim, "resize_and_clear_ size length must be at least sparse dimensionality (=", + sparse_dim, "), got ", size.size()); + auto batch_dim = sparse_dim - 2; + auto batchsize = size.slice(0, batch_dim); + auto densesize = size.slice(batch_dim + 2, size.size() - batch_dim - 2); + + auto values_size = DimVector(batchsize); + values_size.push_back(0); // nse + values_size.append(densesize.begin(), densesize.end()); + + auto col_indices_size = DimVector(batchsize); + col_indices_size.push_back(0); // nse + + auto n_compressed_indices = AT_DISPATCH_ROW_SPARSE_COMPRESSED_LAYOUTS(layout_, "resize_and_clear_", + [&] () -> int64_t { return size[batch_dim]; }, + [&] () -> int64_t { return size[batch_dim + 1]; } + ); + AT_DISPATCH_PLAIN_SPARSE_COMPRESSED_LAYOUTS(layout_, + "resize_and_clear_", + [] () {}, + [&] () { + auto blocksize = this->values_.sizes().slice(this->batch_dim() + 1, 2); + values_size.append(blocksize.begin(), blocksize.end()); + n_compressed_indices /= blocksize[(the_layout == kSparseBsr ? 0 : 1)]; + }); + auto crow_indices_size = DimVector(batchsize); + crow_indices_size.push_back(n_compressed_indices + 1); + + crow_indices_.resize_(crow_indices_size); + crow_indices_.zero_(); + col_indices_.resize_(col_indices_size); + values_.resize_(values_size); + sizes_and_strides_.set_sizes(size); + refresh_numel(); +} + void SparseCsrTensorImpl::resize_as_sparse_csr_tensor_(const Tensor& src) { TORCH_CHECK( !has_symbolic_sizes_strides_, - "resize_as_sparse_csr_tensor_ called on tensor with symbolic shape") + "resize_as_sparse_csr_tensor_ called on tensor with symbolic shape"); set_layout(src.layout()); crow_indices_ = at::empty_like( src.crow_indices(), @@ -132,7 +173,7 @@ void SparseCsrTensorImpl::set_member_tensors( IntArrayRef size) { TORCH_CHECK( !has_symbolic_sizes_strides_, - "set_member_tensors called on tensor with symbolic shape") + "set_member_tensors called on tensor with symbolic shape"); // CSR Type Invariants TORCH_CHECK( @@ -172,5 +213,8 @@ void SparseCsrTensorImpl::set_stride(int64_t dim, int64_t new_stride) { void SparseCsrTensorImpl::set_storage_offset(int64_t storage_offset) { TORCH_CHECK(false, "Sparse ", at::sparse_csr::layoutToString(layout_, /*upper=*/true), " tensors do not have set_storage_offset."); } +bool SparseCsrTensorImpl::is_contiguous_custom(MemoryFormat) const { + TORCH_CHECK(false, "Sparse ", at::sparse_csr::layoutToString(layout_, /*upper=*/true), " tensors do not have is_contiguous"); +} } // namespace at diff --git a/aten/src/ATen/SparseCsrTensorImpl.h b/aten/src/ATen/SparseCsrTensorImpl.h index 1f84fb422fde..9d361be15674 100644 --- a/aten/src/ATen/SparseCsrTensorImpl.h +++ b/aten/src/ATen/SparseCsrTensorImpl.h @@ -37,6 +37,7 @@ struct TORCH_API SparseCsrTensorImpl : public TensorImpl { const caffe2::TypeMeta); void resize_(int64_t nnz, IntArrayRef size); + void resize_and_clear_(int64_t sparse_dim, IntArrayRef size); void resize_as_sparse_csr_tensor_(const Tensor& src); void set_member_tensors( const Tensor& crow_indices, @@ -77,6 +78,7 @@ struct TORCH_API SparseCsrTensorImpl : public TensorImpl { protected: IntArrayRef strides_custom() const override; SymIntArrayRef sym_strides_custom() const override; + bool is_contiguous_custom(MemoryFormat) const override; public: void set_size(int64_t dim, int64_t new_size) override; diff --git a/aten/src/ATen/SparseTensorImpl.cpp b/aten/src/ATen/SparseTensorImpl.cpp index 99dcec4d6162..197ae2143896 100644 --- a/aten/src/ATen/SparseTensorImpl.cpp +++ b/aten/src/ATen/SparseTensorImpl.cpp @@ -46,7 +46,7 @@ SparseTensorImpl::SparseTensorImpl(at::DispatchKeySet key_set, const caffe2::Typ is_non_overlapping_and_dense_ = false; set_storage_access_should_throw(); - set_sizes_strides_policy(SizesStridesPolicy::CustomStrides); + set_custom_sizes_strides(SizesStridesPolicy::CustomStrides); } // Destructor doesn't call release_resources because it's diff --git a/aten/src/ATen/TensorUtils.cpp b/aten/src/ATen/TensorUtils.cpp index 7fbddd7a3482..e014b650f989 100644 --- a/aten/src/ATen/TensorUtils.cpp +++ b/aten/src/ATen/TensorUtils.cpp @@ -310,12 +310,12 @@ std::vector defaultStrides(IntArrayRef sizes) { // templatized for DimVector and IntArrayRef use cases, // see overloads of computeStride() below. // -template +template inline c10::optional computeStride_impl( - IntArrayRef oldshape, - IntArrayRef oldstride, + const NewShapeVec& oldshape, + const NewShapeVec& oldstride, const NewShapeVec& newshape, - ResultVec toResult(const IntArrayRef&) + ResultVec toResult(const NewShapeVec&) ) { if (oldshape.empty()) { return ResultVec(newshape.size(), 1); @@ -326,7 +326,7 @@ inline c10::optional computeStride_impl( // we use the stride as if it were computed via resize. // This could perhaps be combined with the below code, but the complexity // didn't seem worth it. - const int64_t numel = c10::multiply_integers(oldshape); + const Numel numel = c10::multiply_integers(oldshape); if (numel == 0 && oldshape.equals(newshape)) { return toResult(oldstride); } @@ -338,7 +338,7 @@ inline c10::optional computeStride_impl( newstride[view_d] = 1; } else { newstride[view_d] = - std::max(newshape[view_d+1], 1) * newstride[view_d+1]; + std::max(newshape[view_d+1], Numel(1)) * newstride[view_d+1]; } } return newstride; @@ -346,10 +346,10 @@ inline c10::optional computeStride_impl( int64_t view_d = (int64_t)newshape.size() - 1; // stride for each subspace in the chunk - int64_t chunk_base_stride = oldstride.back(); + Numel chunk_base_stride = oldstride.back(); // numel in current chunk - int64_t tensor_numel = 1; - int64_t view_numel = 1; + Numel tensor_numel = 1; + Numel view_numel = 1; for (int64_t tensor_d = oldshape.size() - 1; tensor_d >= 0; tensor_d--) { tensor_numel *= oldshape[tensor_d]; // if end of tensor size chunk, check view @@ -383,7 +383,15 @@ c10::optional> computeStride( IntArrayRef oldstride, IntArrayRef newshape) { auto toResult = [](const IntArrayRef& a) { return a.vec(); }; - return computeStride_impl, IntArrayRef>(oldshape, oldstride, newshape, toResult); + return computeStride_impl, IntArrayRef, int64_t>(oldshape, oldstride, newshape, toResult); +} + +c10::optional computeStride( + c10::SymIntArrayRef oldshape, + c10::SymIntArrayRef oldstride, + c10::SymIntArrayRef newshape) { + auto toResult = [](const SymIntArrayRef& a) { return SymDimVector(a); }; + return computeStride_impl(oldshape, oldstride, newshape, toResult); } c10::optional computeStride( @@ -391,7 +399,7 @@ c10::optional computeStride( IntArrayRef oldstride, const DimVector& newshape) { auto toResult = [](const IntArrayRef& a) { return DimVector(a); }; - return computeStride_impl(oldshape, oldstride, newshape, toResult); + return computeStride_impl(oldshape, oldstride, newshape, toResult); } } // namespace detail diff --git a/aten/src/ATen/TensorUtils.h b/aten/src/ATen/TensorUtils.h index 4bfe87c9de44..2a70e64da066 100644 --- a/aten/src/ATen/TensorUtils.h +++ b/aten/src/ATen/TensorUtils.h @@ -157,6 +157,11 @@ TORCH_API c10::optional> computeStride( IntArrayRef oldstride, IntArrayRef newshape); +TORCH_API c10::optional computeStride( + c10::SymIntArrayRef oldshape, + c10::SymIntArrayRef oldstride, + c10::SymIntArrayRef newshape); + TORCH_API c10::optional computeStride( IntArrayRef oldshape, IntArrayRef oldstride, diff --git a/aten/src/ATen/ThreadLocalState.cpp b/aten/src/ATen/ThreadLocalState.cpp index fb589beaba89..422c1dcc6f0f 100644 --- a/aten/src/ATen/ThreadLocalState.cpp +++ b/aten/src/ATen/ThreadLocalState.cpp @@ -14,6 +14,7 @@ ThreadLocalState::ThreadLocalState() debug_info_(c10::ThreadLocalDebugInfo::current()), functorch_tls_(functorch::getCopyOfFuncTorchTLS()), autograd_tls_(c10::AutogradState::get_tls_state()), + python_dispatcher_state_(c10::impl::PythonDispatcherTLS::get_state()), python_torch_function_state_(at::impl::PythonTorchFunctionTLS::get_state()) { rf_tls_ = at::get_record_function_tls_(); @@ -41,6 +42,8 @@ void ThreadLocalState::setThreadLocalState( at::SavedTensorDefaultHooks::set_stack(state.saved_tensors_default_hooks_); + c10::impl::PythonDispatcherTLS::set_state(state.python_dispatcher_state_); + c10::ThreadLocalDebugInfo::_forceCurrentDebugInfo(state.debug_info_); c10::impl::_force_tls_local_dispatch_key_set(state.dispatch_key_); diff --git a/aten/src/ATen/ThreadLocalState.h b/aten/src/ATen/ThreadLocalState.h index a0067fb8aaeb..529c98b99723 100644 --- a/aten/src/ATen/ThreadLocalState.h +++ b/aten/src/ATen/ThreadLocalState.h @@ -10,6 +10,7 @@ #include #include #include +#include #include namespace at { @@ -57,6 +58,9 @@ class TORCH_API ThreadLocalState { // TLS for enable_torch_dispatch_mode std::shared_ptr torch_dispatch_mode_state_; + // TLS for enable_python_dispatcher + c10::impl::PyInterpreter* python_dispatcher_state_; + // TLS for __torch_function__ (mode and disable_torch_function) at::impl::PythonTorchFunctionTLS python_torch_function_state_; diff --git a/aten/src/ATen/autocast_mode.cpp b/aten/src/ATen/autocast_mode.cpp index 396b9746754c..95f9029c8dfb 100644 --- a/aten/src/ATen/autocast_mode.cpp +++ b/aten/src/ATen/autocast_mode.cpp @@ -595,12 +595,6 @@ TORCH_LIBRARY_IMPL(aten, AutocastCPU, m) { KERNEL_CPU(ADD_NS(linalg_tensorsolve), "linalg_tensorsolve", Tensor(const Tensor &, const Tensor &, at::OptionalIntArrayRef), fp32) KERNEL_CPU(ADD_NS(fake_quantize_per_tensor_affine), "fake_quantize_per_tensor_affine", Tensor (const Tensor &, double, int64_t, int64_t, int64_t), fp32) - m.impl(TORCH_SELECTIVE_NAME("aten::eig"), - TORCH_FN((&WrapFunction (const Tensor &, bool), - std::tuple (const Tensor &, bool), - &ADD_NS(eig)>::type::call))); - m.impl(TORCH_SELECTIVE_NAME("aten::geqrf"), TORCH_FN((&WrapFunction (const Tensor &), diff --git a/aten/src/ATen/core/PythonFallbackKernel.cpp b/aten/src/ATen/core/PythonFallbackKernel.cpp index 06588a0a989d..381c8a45aca6 100644 --- a/aten/src/ATen/core/PythonFallbackKernel.cpp +++ b/aten/src/ATen/core/PythonFallbackKernel.cpp @@ -1,4 +1,5 @@ #include +#include #include #include @@ -87,6 +88,12 @@ void pythonFallback(const c10::OperatorHandle& op, torch::jit::Stack* stack) { TORCH_INTERNAL_ASSERT(0, "Hit Python dispatch key but no arguments had PyInterpreter (no tensor args?)"); } +void pythonDispatcherFallback(const c10::OperatorHandle& op, c10::DispatchKeySet dispatch_keys, torch::jit::Stack* stack) { + auto* state = c10::impl::PythonDispatcherTLS::get_state(); + TORCH_INTERNAL_ASSERT(state, "Hit PythonDispatcher dispatch key but PythonDispatcherTLS was not set"); + (*state)->python_dispatcher(op, dispatch_keys.remove(c10::DispatchKey::PythonDispatcher), stack); +} + void pythonTLSSnapshotFallback(const c10::OperatorHandle &op, c10::DispatchKeySet dispatch_keys, torch::jit::Stack* stack) { // It is ok for the tls to be already set here. // It means that there are multiple calls into the dispatcher not originating from python code. @@ -134,6 +141,10 @@ TORCH_LIBRARY_IMPL(_, Python, m) { m.fallback(torch::CppFunction::makeFromBoxedFunction<&pythonFallback>()); } +TORCH_LIBRARY_IMPL(_, PythonDispatcher, m) { + m.fallback(torch::CppFunction::makeFromBoxedFunction<&pythonDispatcherFallback>()); +} + TORCH_LIBRARY_IMPL(_, PythonTLSSnapshot, m) { m.fallback(torch::CppFunction::makeFromBoxedFunction<&pythonTLSSnapshotFallback>()); } diff --git a/aten/src/ATen/core/TensorBase.h b/aten/src/ATen/core/TensorBase.h index e6dd73658efc..3f35d3a71de4 100644 --- a/aten/src/ATen/core/TensorBase.h +++ b/aten/src/ATen/core/TensorBase.h @@ -302,6 +302,10 @@ class TORCH_API TensorBase { return impl_->sym_numel(); } + c10::SymInt sym_storage_offset() const { + return impl_->sym_storage_offset(); + } + // Length of one array element in bytes. This is the traditional // Numpy naming. size_t itemsize() const { diff --git a/aten/src/ATen/core/dispatch/Dispatcher.h b/aten/src/ATen/core/dispatch/Dispatcher.h index bc40bc5b62e0..1ea677b54ef5 100644 --- a/aten/src/ATen/core/dispatch/Dispatcher.h +++ b/aten/src/ATen/core/dispatch/Dispatcher.h @@ -168,6 +168,12 @@ class TORCH_API Dispatcher final { // See Note [Plumbing Keys Through The Dispatcher] void redispatchBoxed(const OperatorHandle& op, DispatchKeySet dispatchKeySet, Stack* stack) const; + bool hasBackendFallbackForDispatchKey(DispatchKey dk) { + auto dispatch_ix = getDispatchTableIndexForDispatchKey(dk); + if (dispatch_ix < 0) return false; + return backendFallbackKernels_[dispatch_ix].kernel.isValid(); + } + // ------------------------------------------------------------------------ // @@ -333,6 +339,10 @@ class TORCH_API OperatorHandle { return operatorDef_->op.hasKernelForDispatchKey(k); } + bool hasKernelForAnyDispatchKey(DispatchKeySet k) const { + return operatorDef_->op.hasKernelForAnyDispatchKey(k); + } + bool hasComputedKernelForDispatchKey(DispatchKey k) const { return operatorDef_->op.hasComputedKernelForDispatchKey(k); } @@ -388,6 +398,11 @@ class TORCH_API OperatorHandle { c10::Dispatcher::singleton().redispatchBoxed(*this, ks, stack); } + template + PyObject* getPythonOp(c10::impl::PyInterpreter* self_interpreter, F slow_accessor) const { + return operatorDef_->op.getPythonOp(self_interpreter, slow_accessor); + } + private: explicit OperatorHandle(std::list::iterator operatorIterator) : operatorDef_(&*operatorIterator), operatorIterator_(operatorIterator) {} @@ -635,11 +650,18 @@ inline void Dispatcher::callBoxedForDispatchKey(const OperatorHandle& op, Dispat // We still compute this as we're obligated to pass it on to the internal // kernel, if it is a boxed fallback auto dispatchKeySet = entry.dispatchKeyExtractor().getDispatchKeySetBoxed(stack); - const auto& kernel = entry.kernelForDispatchKey(dk); + const auto& kernel = ([&]() { + if (op.hasKernelForDispatchKey(dk)) { + return entry.kernelForDispatchKey(dk); + } else { + auto idx = getDispatchTableIndexForDispatchKey(dk); + TORCH_INTERNAL_ASSERT(idx >= 0); + return backendFallbackKernels_[idx].kernel; + } + })(); kernel.callBoxed(op, dispatchKeySet, stack); } - inline void Dispatcher::redispatchBoxed(const OperatorHandle& op, DispatchKeySet dispatchKeySet, Stack* stack) const { // note: this doesn't need the mutex because write operations on the list keep iterators intact. const auto& entry = op.operatorDef_->op; diff --git a/aten/src/ATen/core/dispatch/OperatorEntry.cpp b/aten/src/ATen/core/dispatch/OperatorEntry.cpp index 01d30c888db2..5d53500e7dfe 100644 --- a/aten/src/ATen/core/dispatch/OperatorEntry.cpp +++ b/aten/src/ATen/core/dispatch/OperatorEntry.cpp @@ -329,10 +329,8 @@ std::pair OperatorEntry::computeDispatchTab // to let the original CompositeImplicitAutograd handle Undefined if (dispatch_key != DispatchKey::Undefined && isIncludedInAlias(dispatch_key, DispatchKey::CompositeImplicitAutogradNestedTensor)) { if (auto nested_registration = getKernelForDispatchKey(DispatchKey::CompositeImplicitAutogradNestedTensor)) { - if (!has_backend_kernel) { - return {*nested_registration, "nested kernel"}; + return {*nested_registration, "nested kernel"}; } - } } if (dispatch_key == DispatchKey::Undefined || isIncludedInAlias(dispatch_key, DispatchKey::CompositeImplicitAutograd)) { diff --git a/aten/src/ATen/core/dispatch/OperatorEntry.h b/aten/src/ATen/core/dispatch/OperatorEntry.h index a964423d6aa8..c3bd91197f5e 100644 --- a/aten/src/ATen/core/dispatch/OperatorEntry.h +++ b/aten/src/ATen/core/dispatch/OperatorEntry.h @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -211,6 +212,11 @@ class TORCH_API OperatorEntry final { // Returns all the operator tags added at the time of registration const std::vector& getTags() const; + template + PyObject* getPythonOp(PyInterpreter* self_interpreter, F slow_accessor) const { + return py_cache_.ptr_or(self_interpreter, slow_accessor); + } + private: OperatorName name_; @@ -220,6 +226,8 @@ class TORCH_API OperatorEntry final { #endif std::array dispatchTable_; DispatchKeyExtractor dispatchKeyExtractor_; + // Pointer to the torch.ops.ns.op.overload object for speed + c10::PyHandleCache py_cache_; // kernels_ stores all registered kernels for the corresponding dispatch key // and catchAllKernels_ stores the catch-all kernels. diff --git a/aten/src/ATen/core/function_schema.h b/aten/src/ATen/core/function_schema.h index 14f134939d76..315ceaec19ac 100644 --- a/aten/src/ATen/core/function_schema.h +++ b/aten/src/ATen/core/function_schema.h @@ -550,7 +550,10 @@ inline std::ostream& operator<<(std::ostream& out, const Argument& arg) { // in schema, we have Tensor?(a!) input, and t(a!)?. // however, t?(a!) doesn't work with schema parser. // so we always use Type(alias)? format - auto type = arg.type(); + // real_type versus fake_type: in order to be compatible with FunctionSchema + // parser, printing an argument with either MemoryFormat or Layout type should + // give us the original schema string, hence printing out real_type. + auto type = arg.real_type(); bool is_opt = type->kind() == OptionalType::Kind; auto unopt_type = is_opt ? type->castRaw()->getElementType() : type; diff --git a/aten/src/ATen/core/ivalue.h b/aten/src/ATen/core/ivalue.h index 8d0199b3c954..9793730d1977 100644 --- a/aten/src/ATen/core/ivalue.h +++ b/aten/src/ATen/core/ivalue.h @@ -565,8 +565,6 @@ struct TORCH_API IValue final { } } - IValue(c10::SymIntArrayRef v); - bool isSymInt() const { return Tag::SymInt == tag; } diff --git a/aten/src/ATen/core/ivalue_inl.h b/aten/src/ATen/core/ivalue_inl.h index 00361c80a01c..270732c5e163 100644 --- a/aten/src/ATen/core/ivalue_inl.h +++ b/aten/src/ATen/core/ivalue_inl.h @@ -1999,7 +1999,6 @@ inline IValue::IValue(at::ArrayRef v) : IValue(c10::List()) { list.push_back(e); } } -inline IValue::IValue(c10::SymIntArrayRef v) : IValue(at::ArrayRef(v.data(), v.size())) {} template > inline IValue::IValue(const std::vector& v) : IValue(c10::List()) { auto list = to>(); diff --git a/aten/src/ATen/core/jit_type.h b/aten/src/ATen/core/jit_type.h index ce698761dad7..86f784b0c8f4 100644 --- a/aten/src/ATen/core/jit_type.h +++ b/aten/src/ATen/core/jit_type.h @@ -2114,7 +2114,7 @@ struct MemoryFormatType; using MemoryFormatTypePtr = SingletonTypePtr; struct TORCH_API MemoryFormatType : public EnumerationType { std::string str() const override { -return "MemoryFormatType"; +return "MemoryFormat"; } static const TypeKind Kind = TypeKind::MemoryFormatType; // global singleton @@ -2128,7 +2128,7 @@ struct LayoutType; using LayoutTypePtr = SingletonTypePtr; struct TORCH_API LayoutType : public EnumerationType { std::string str() const override { -return "LayoutType"; +return "Layout"; } static const TypeKind Kind = TypeKind::LayoutType; // global singleton diff --git a/aten/src/ATen/cuda/Atomic.cuh b/aten/src/ATen/cuda/Atomic.cuh index 03cabf8de73f..42975411e841 100644 --- a/aten/src/ATen/cuda/Atomic.cuh +++ b/aten/src/ATen/cuda/Atomic.cuh @@ -164,6 +164,7 @@ Atomic##NAME##IntegerImpl()(address, } \ ATOMIC_INTEGER_IMPL(Add) +GPU_ATOMIC_INTEGER(Add, a || b, bool) // Don't instantiate gpuAtomicAdd with the macro as it seems non-standard (see int32, int64) static inline __device__ void gpuAtomicAdd(uint8_t *address, uint8_t val) { @@ -206,10 +207,6 @@ static inline __device__ void gpuAtomicAdd(int64_t *address, int64_t val) { #endif } -static inline __device__ void gpuAtomicAdd(bool *address, bool val) { - *address = address && val; -} - static inline __device__ at::Half gpuAtomicAdd(at::Half *address, at::Half val) { #if defined(USE_ROCM) || ((defined(CUDA_VERSION) && CUDA_VERSION < 10000) || (defined(__CUDA_ARCH__) && (__CUDA_ARCH__ < 700))) return AtomicFPOp()(address, val, diff --git a/aten/src/ATen/cuda/CUDABlas.cpp b/aten/src/ATen/cuda/CUDABlas.cpp index e1a01ceb6829..866f53ee7f87 100644 --- a/aten/src/ATen/cuda/CUDABlas.cpp +++ b/aten/src/ATen/cuda/CUDABlas.cpp @@ -709,11 +709,9 @@ void gemm_and_bias( CuBlasLtMatrixLayout Cdesc(abcType, m, n, result_ld); CuBlasLtMatmulPreference preference; - // See https://github.com/pytorch/pytorch/issues/73328. - // Check https://docs.nvidia.com/cuda/cublas/index.html#cublassetworkspace . - // Recommended size of user-provided workspace is at least 4MiB (to match - // cuBLAS' default workspace pool). - size_t workspaceSize = 4 * 1024 * 1024; + // See https://github.com/pytorch/pytorch/issues/73328 for reasoning behind + // setting this to 1M. + size_t workspaceSize = 1024 * 1024; TORCH_CUDABLAS_CHECK(cublasLtMatmulPreferenceSetAttribute( preference.descriptor(), CUBLASLT_MATMUL_PREF_MAX_WORKSPACE_BYTES, diff --git a/aten/src/ATen/cuda/CUDAEvent.h b/aten/src/ATen/cuda/CUDAEvent.h index 8e83d82b5255..1c3c67949e58 100644 --- a/aten/src/ATen/cuda/CUDAEvent.h +++ b/aten/src/ATen/cuda/CUDAEvent.h @@ -157,6 +157,10 @@ struct TORCH_CUDA_CPP_API CUDAEvent { // Note: cudaEventSynchronize can be safely called from any device void synchronize() const { if (is_created_) { + const c10::impl::PyInterpreter* interp = c10::impl::GPUTrace::get_trace(); + if (C10_UNLIKELY(interp)) { + (*interp)->trace_gpu_event_synchronization(reinterpret_cast(event_)); + } AT_CUDA_CHECK(cudaEventSynchronize(event_)); } } diff --git a/aten/src/ATen/cuda/CUDAGeneratorImpl.cpp b/aten/src/ATen/cuda/CUDAGeneratorImpl.cpp index 0cac5d6da2d5..a678354dca49 100644 --- a/aten/src/ATen/cuda/CUDAGeneratorImpl.cpp +++ b/aten/src/ATen/cuda/CUDAGeneratorImpl.cpp @@ -231,7 +231,8 @@ uint64_t CUDAGeneratorImpl::philox_offset_per_thread() const { * offset_extragraph is the initial offset at the start of the graphed region. * offset_intragraph tracks the offset in the graphed region. */ -void CUDAGeneratorImpl::capture_prologue(int64_t* offset_extragraph) { +void CUDAGeneratorImpl::capture_prologue(int64_t* seed_extragraph, int64_t* offset_extragraph) { + seed_extragraph_ = seed_extragraph; offset_extragraph_ = offset_extragraph; offset_intragraph_ = 0; graph_expects_this_gen_ = true; @@ -279,7 +280,7 @@ PhiloxCudaState CUDAGeneratorImpl::philox_cuda_state(uint64_t increment) { TORCH_INTERNAL_ASSERT(this->offset_intragraph_ <= std::numeric_limits::max() - increment); this->offset_intragraph_ += increment; - return PhiloxCudaState(this->seed_, + return PhiloxCudaState(this->seed_extragraph_, this->offset_extragraph_, offset); } else { diff --git a/aten/src/ATen/cuda/CUDAGeneratorImpl.h b/aten/src/ATen/cuda/CUDAGeneratorImpl.h index 768f0b7549c2..60130b884719 100644 --- a/aten/src/ATen/cuda/CUDAGeneratorImpl.h +++ b/aten/src/ATen/cuda/CUDAGeneratorImpl.h @@ -100,7 +100,7 @@ struct TORCH_CUDA_CPP_API CUDAGeneratorImpl : public c10::GeneratorImpl { c10::intrusive_ptr get_state() const override; void set_philox_offset_per_thread(uint64_t offset); uint64_t philox_offset_per_thread() const; - void capture_prologue(int64_t* offset_extragraph); + void capture_prologue(int64_t* seed_extragraph, int64_t* offset_extragraph); uint64_t capture_epilogue(); PhiloxCudaState philox_cuda_state(uint64_t increment); @@ -114,6 +114,7 @@ struct TORCH_CUDA_CPP_API CUDAGeneratorImpl : public c10::GeneratorImpl { CUDAGeneratorImpl* clone_impl() const override; uint64_t seed_ = default_rng_seed_val; uint64_t philox_offset_per_thread_ = 0; + int64_t* seed_extragraph_{}; int64_t* offset_extragraph_{}; uint32_t offset_intragraph_ = 0; bool graph_expects_this_gen_ = false; diff --git a/aten/src/ATen/cuda/CUDAGraph.cpp b/aten/src/ATen/cuda/CUDAGraph.cpp index c7734334f4e2..583918e9fc08 100644 --- a/aten/src/ATen/cuda/CUDAGraph.cpp +++ b/aten/src/ATen/cuda/CUDAGraph.cpp @@ -65,9 +65,11 @@ void CUDAGraph::capture_begin(MempoolId_t pool/*=0*/) { c10::nullopt, cuda::detail::getDefaultCUDAGenerator()); auto options = TensorOptions().device(at::kCUDA).dtype(at::kLong); + seed_extragraph_ = at::empty({1}, options); offset_extragraph_ = at::empty({1}, options); - gen->capture_prologue(offset_extragraph_.data_ptr()); + seed_extragraph_.fill_(int64_t(gen->current_seed())); + gen->capture_prologue(seed_extragraph_.data_ptr(), offset_extragraph_.data_ptr()); auto stream = at::cuda::getCurrentCUDAStream(); @@ -175,6 +177,7 @@ void CUDAGraph::replay() { std::lock_guard lock(gen->mutex_); rng_engine_inputs = gen->philox_cuda_state(wholegraph_increment_); } + seed_extragraph_.fill_(int64_t(gen->current_seed())); offset_extragraph_.fill_(int64_t(rng_engine_inputs.offset_.val)); // graph_exec_ may be replayed in any stream. diff --git a/aten/src/ATen/cuda/CUDAGraph.h b/aten/src/ATen/cuda/CUDAGraph.h index 09b0b7b5d800..bacad79102a3 100644 --- a/aten/src/ATen/cuda/CUDAGraph.h +++ b/aten/src/ATen/cuda/CUDAGraph.h @@ -69,6 +69,7 @@ struct TORCH_CUDA_CPP_API CUDAGraph { int capture_dev_; // RNG state trackers + at::Tensor seed_extragraph_; at::Tensor offset_extragraph_; uint64_t wholegraph_increment_; }; diff --git a/aten/src/ATen/cuda/detail/PhiloxCudaStateRaw.cuh b/aten/src/ATen/cuda/detail/PhiloxCudaStateRaw.cuh index e14680f88793..a9b67b41ac45 100644 --- a/aten/src/ATen/cuda/detail/PhiloxCudaStateRaw.cuh +++ b/aten/src/ATen/cuda/detail/PhiloxCudaStateRaw.cuh @@ -13,14 +13,14 @@ struct PhiloxCudaState { // Called if graph capture is not underway PhiloxCudaState(uint64_t seed, uint64_t offset) { - seed_ = seed; + seed_.val = seed; offset_.val = offset; } // Called if graph capture is underway - PhiloxCudaState(uint64_t seed, + PhiloxCudaState(int64_t* seed, int64_t* offset_extragraph, uint32_t offset_intragraph) { - seed_ = seed; + seed_.ptr = seed; offset_.ptr = offset_extragraph; offset_intragraph_ = offset_intragraph; captured_ = true; @@ -34,7 +34,7 @@ struct PhiloxCudaState { int64_t* ptr; }; - uint64_t seed_ = 0; + Payload seed_; Payload offset_; uint32_t offset_intragraph_ = 0; bool captured_ = false; diff --git a/aten/src/ATen/cuda/detail/UnpackRaw.cuh b/aten/src/ATen/cuda/detail/UnpackRaw.cuh index e6746fbe4fd0..f8fa4ebbf160 100644 --- a/aten/src/ATen/cuda/detail/UnpackRaw.cuh +++ b/aten/src/ATen/cuda/detail/UnpackRaw.cuh @@ -21,9 +21,9 @@ unpack(at::PhiloxCudaState arg) { // static_cast avoids "warning: invalid narrowing conversion from "long" to "unsigned long". // *(arg.offset_.ptr) is a broadcast load of a single int64_t to the entire kernel. // For most threads' reads it will hit in cache, so it shouldn't hurt performance. - return std::make_tuple(arg.seed_, static_cast(*(arg.offset_.ptr) + arg.offset_intragraph_)); + return std::make_tuple(static_cast(*arg.seed_.ptr), static_cast(*(arg.offset_.ptr) + arg.offset_intragraph_)); } else { - return std::make_tuple(arg.seed_, arg.offset_.val); + return std::make_tuple(arg.seed_.val, arg.offset_.val); } } diff --git a/aten/src/ATen/cuda/llvm_complex.cpp b/aten/src/ATen/cuda/llvm_complex.cpp index d88bdc4ce657..0bb2c2ba9a09 100644 --- a/aten/src/ATen/cuda/llvm_complex.cpp +++ b/aten/src/ATen/cuda/llvm_complex.cpp @@ -48,6 +48,10 @@ class complex void real(value_type __re) {__re_ = __re;} void imag(value_type __im) {__im_ = __im;} + constexpr operator bool() const { + return real() || imag(); + } + complex& operator= (const value_type& __re) {__re_ = __re; __im_ = value_type(); return *this;} complex& operator+=(const value_type& __re) {__re_ += __re; return *this;} @@ -106,6 +110,10 @@ class complex void real(value_type __re) {__re_ = __re;} void imag(value_type __im) {__im_ = __im;} + constexpr operator bool() const { + return real() || imag(); + } + complex& operator= (float __re) {__re_ = __re; __im_ = value_type(); return *this;} complex& operator+=(float __re) {__re_ += __re; return *this;} @@ -162,6 +170,10 @@ class complex void real(value_type __re) {__re_ = __re;} void imag(value_type __im) {__im_ = __im;} + constexpr operator bool() const { + return real() || imag(); + } + complex& operator= (double __re) {__re_ = __re; __im_ = value_type(); return *this;} complex& operator+=(double __re) {__re_ += __re; return *this;} @@ -482,7 +494,15 @@ inline constexpr bool operator&&(const complex<_Tp>& __x, const complex<_Tp>& __y) { - return (__x.real() || __x.imag()) && (__y.real() || __y.imag()); + return bool(__x) && bool(__y); +} + +template +inline constexpr +bool +operator||(const complex<_Tp>& __x, const complex<_Tp>& __y) +{ + return bool(__x) || bool(__y); } // 26.3.7 values: diff --git a/functorch/functorch/csrc/BatchedTensorImpl.cpp b/aten/src/ATen/functorch/BatchedTensorImpl.cpp similarity index 59% rename from functorch/functorch/csrc/BatchedTensorImpl.cpp rename to aten/src/ATen/functorch/BatchedTensorImpl.cpp index 58d8bfdde6af..c5d6eb34030d 100644 --- a/functorch/functorch/csrc/BatchedTensorImpl.cpp +++ b/aten/src/ATen/functorch/BatchedTensorImpl.cpp @@ -3,51 +3,19 @@ // // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. -#include +#include #include #include -#include #include namespace at { namespace functorch { -BatchedTensorImpl::BatchedTensorImpl(Tensor value, int64_t bdim, int64_t level) - : TensorImpl( - c10::DispatchKeySet(kBatchedKey), - value.dtype(), - value.device() - ) - , value_(std::move(value)) - , level_(level) - , bdim_(bdim) -{ - // TODO: I don't think this ctor gets used. - TORCH_INTERNAL_ASSERT(false); - TORCH_INTERNAL_ASSERT(value_.defined()); - set_storage_access_should_throw(); - set_sizes_strides_policy(SizesStridesPolicy::CustomStrides); - checkInvariants(); - - const auto public_dims = value_.dim() - 1; - const auto value_sizes = value_.sizes(); - const auto value_strides = value_.strides(); - sizes_and_strides_.resize(public_dims); - for (const auto dim : c10::irange(0, public_dims)) { - auto actual_dim = actualDim(dim, /*wrap_dim=*/false); - sizes_and_strides_.size_at_unchecked(dim) = value_sizes.at(actual_dim); - sizes_and_strides_.stride_at_unchecked(dim) = value_strides.at(actual_dim); - } - storage_offset_= value_.storage_offset(); - refresh_numel(); - refresh_contiguous(); -} - BatchedTensorImpl::BatchedTensorImpl(DispatchKeySet key_set, Tensor value, int64_t bdim, int64_t level) : TensorImpl( - key_set.add(kBatchedKey), + key_set.add(DispatchKey::FuncTorchBatched), value.dtype(), value.device() ) @@ -57,7 +25,7 @@ BatchedTensorImpl::BatchedTensorImpl(DispatchKeySet key_set, Tensor value, int64 { TORCH_INTERNAL_ASSERT(value_.defined()); set_storage_access_should_throw(); - set_sizes_strides_policy(SizesStridesPolicy::CustomStrides); + set_custom_sizes_strides(SizesStridesPolicy::CustomStrides); checkInvariants(); refreshTensorMetadata(); } @@ -82,36 +50,11 @@ int64_t BatchedTensorImpl::actualDim(int64_t dim, bool wrap_dim) const { const auto ndim = sizes_and_strides_.size(); dim = maybe_wrap_dim(dim, ndim); } - auto is_bdim = createBatchDimBitset(bdim_); - - // TODO(vfdev): As BatchedTensorImpl is refactored and has only one dim. - // Below code may be simplified. - - // Example: assume dim = 3, and is_bdim = 10010011000... - // The 1's are batch dims and 0's are normal dims of the underlying value_ Tensor. - // actualDim gives us the index of `dim` in the `value_` Tensor, which is equivalent - // to asking "where does the 3rd (0-indexed) zero occur in the bitset?". - // The answer to that is index 5. - // - // TODO(rzou): the PDEP instruction does exactly this - // (https://stackoverflow.com/questions/7669057/find-nth-set-bit-in-an-int) - // but it might require newer (>= ~2015) CPUs. We should clean this up - // if/when we have dropped support for older CPUs. - int64_t non_bdim_count = 0; - for (int64_t actual_dim = 0; actual_dim < kVmapMaxTensorDims; actual_dim++) { - if (is_bdim[actual_dim]) { - continue; - } - if (non_bdim_count == dim) { - return actual_dim; - } - non_bdim_count++; + if (bdim_ <= dim) { + return dim + 1; + } else { + return dim; } - // If we hit this assert, then that means - // `non_bdim_count` + #num_bdims > kVmapMaxTensorDims. We restrict the number - // of dims a BatchedTensorImpl can have to kVmapMaxTensorDims so this should - // never be hit. - TORCH_INTERNAL_ASSERT(false); } void BatchedTensorImpl::checkInvariants() const { diff --git a/functorch/functorch/csrc/BatchedTensorImpl.h b/aten/src/ATen/functorch/BatchedTensorImpl.h similarity index 83% rename from functorch/functorch/csrc/BatchedTensorImpl.h rename to aten/src/ATen/functorch/BatchedTensorImpl.h index 3d422d68491e..320989604570 100644 --- a/functorch/functorch/csrc/BatchedTensorImpl.h +++ b/aten/src/ATen/functorch/BatchedTensorImpl.h @@ -12,9 +12,6 @@ #include #include -#include -#include - namespace at { namespace functorch { @@ -43,8 +40,7 @@ constexpr int64_t kBatchDimsStackSize = 5; // // bt.sizes() returns (5, 7); bt.sum(0) performs a reduction over the (public) // dim 0, which is equivalent to dim 3 in the underlying ones(2, 3, 5, 7) tensor. -struct BatchedTensorImpl : public c10::TensorImpl { - explicit BatchedTensorImpl(Tensor value, int64_t dim, int64_t level); +struct TORCH_API BatchedTensorImpl : public c10::TensorImpl { explicit BatchedTensorImpl(at::DispatchKeySet key_set, Tensor value, int64_t dim, int64_t level); // Returns batch dimension of this tensor @@ -79,9 +75,16 @@ struct BatchedTensorImpl : public c10::TensorImpl { #endif void refreshTensorMetadata(); + + // Used in torchdim. torchdim uses non-lexical BatchedTensor; the way it + // accomplishes this is a hack where it is able to modify the levels of + // BatchedTensor to match the level of the current vmap transform. void _unsafe_set_level(int64_t level) { level_ = level; } + + // Used in batching rule for in-place view operations that can change + // the index of the bdim (think squeeze_, unsqueeze_) void unsafe_set_bdim(int64_t bdim) { // NB: you MUST call refreshTensorMetadata after doing this. bdim_ = bdim; @@ -100,7 +103,7 @@ struct BatchedTensorImpl : public c10::TensorImpl { // NB: We use the term "BatchedTensor" to mean a Tensor that is backed with a // BatchedTensorImpl. inline bool isBatchedTensor(const Tensor& tensor) { - return tensor.unsafeGetTensorImpl()->key_set().has(kBatchedKey); + return tensor.unsafeGetTensorImpl()->key_set().has(DispatchKey::FuncTorchBatched); } // It is unsafe to call this on a Tensor that is not backed by a @@ -131,11 +134,15 @@ inline std::bitset createVmapLevelsBitset(int64_t level) { } // Use this to construct a BatchedTensor from a regular Tensor -FUNCTORCH_API Tensor makeBatched(const Tensor& tensor, int64_t dim, int64_t level); +TORCH_API Tensor makeBatched(const Tensor& tensor, int64_t dim, int64_t level); // Adds a batch dim to `tensor`, returning a BatchedTensor -FUNCTORCH_API Tensor addBatchDim(const Tensor& tensor, int64_t dim, int64_t level); +TORCH_API Tensor addBatchDim(const Tensor& tensor, int64_t dim, int64_t level); +// Certain dispatch keys must be propagated to the BatchedTensor (or, in general, +// any wrapper Tensor subclasses). This is because there are methods on Tensor +// that skip dispatch and check for the presence of a dispatch key (e.g. is_cpu()). +// TODO: should probably contain more (or all?) backend keys constexpr DispatchKeySet kKeysToPropagateToWrapper({ DispatchKey::Negative, DispatchKey::Conjugate, diff --git a/aten/src/ATen/mps/MPSFallback.mm b/aten/src/ATen/mps/MPSFallback.mm index 4f9e635dce05..75092867fa01 100644 --- a/aten/src/ATen/mps/MPSFallback.mm +++ b/aten/src/ATen/mps/MPSFallback.mm @@ -14,7 +14,7 @@ void mps_fallback(const c10::OperatorHandle& op, torch::jit::Stack* stack) void mps_error_fallback(const c10::OperatorHandle& op, torch::jit::Stack* stack) { - TORCH_CHECK_NOT_IMPLEMENTED(false, "The operator '", op.schema().operator_name(), "' is not current implemented ", + TORCH_CHECK_NOT_IMPLEMENTED(false, "The operator '", op.schema().operator_name(), "' is not currently implemented ", "for the MPS device. If you want this op to be added in priority during the prototype ", "phase of this feature, please comment on https://github.com/pytorch/pytorch/issues/77764. ", "As a temporary fix, you can set the environment variable `PYTORCH_ENABLE_MPS_FALLBACK=1` ", diff --git a/aten/src/ATen/native/Activation.cpp b/aten/src/ATen/native/Activation.cpp index 97f504b85dd1..e7aec5e08a40 100644 --- a/aten/src/ATen/native/Activation.cpp +++ b/aten/src/ATen/native/Activation.cpp @@ -314,7 +314,7 @@ bool use_mkldnn(const Tensor& input) { if (!at::globalContext().userEnabledMkldnn()) { return false; } - if (!input.is_contiguous() || input.numel() == 1) { + if (!input.is_contiguous() || input.numel() <= 1) { return false; } return (input.is_mkldnn()) || // input is mkldnn Tensor diff --git a/aten/src/ATen/native/AdaptiveAveragePooling.cpp b/aten/src/ATen/native/AdaptiveAveragePooling.cpp index 855d54eadba8..0057f58b07d9 100644 --- a/aten/src/ATen/native/AdaptiveAveragePooling.cpp +++ b/aten/src/ATen/native/AdaptiveAveragePooling.cpp @@ -106,7 +106,7 @@ namespace { return at::mkldnn_adaptive_avg_pool2d(input, output_size); } - if (!input.is_quantized() && output_size[0] == 1 && output_size[1] == 1) { + if (!input.is_quantized() && output_size[0] == 1 && output_size[1] == 1 && !input.is_xpu()) { // in this case, adaptive pooling is just computing mean over hw // dimensions, which can be done more efficiently #if defined(C10_MOBILE) && defined(USE_XNNPACK) diff --git a/aten/src/ATen/native/AdaptiveAveragePooling3d.cpp b/aten/src/ATen/native/AdaptiveAveragePooling3d.cpp index 71f45daeebac..06257b42cd96 100644 --- a/aten/src/ATen/native/AdaptiveAveragePooling3d.cpp +++ b/aten/src/ATen/native/AdaptiveAveragePooling3d.cpp @@ -306,7 +306,7 @@ Tensor adaptive_avg_pool3d(Tensor const& input, IntArrayRef output_size) { "adaptive_avg_pool2d: elements of output_size must be greater than or equal to 0 ", "but received {", output_size[0], ", ", output_size[1], ",", output_size[2], "}"); - if (output_size[0] == 1 && output_size[1] == 1 && output_size[2] == 1) { + if (output_size[0] == 1 && output_size[1] == 1 && output_size[2] == 1 && !input.is_xpu()) { // in this case, adaptive pooling is just computing mean over hw // dimensions, which can be done more efficiently Tensor out = input.mean({-1, -2, -3}, /* keepdim = */ true); diff --git a/aten/src/ATen/native/BatchLinearAlgebra.cpp b/aten/src/ATen/native/BatchLinearAlgebra.cpp index 7464e12fd7d3..09bffa1a7438 100644 --- a/aten/src/ATen/native/BatchLinearAlgebra.cpp +++ b/aten/src/ATen/native/BatchLinearAlgebra.cpp @@ -3168,66 +3168,6 @@ Tensor linalg_eigvals(const Tensor& input) { return values; } -// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ eig ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -DEFINE_DISPATCH(eig_stub); - -std::tuple eig_out(const Tensor& self, bool eigenvectors, Tensor& e, Tensor& v) { - TORCH_WARN_ONCE( - "torch.eig is deprecated in favor of torch.linalg.eig and will be removed in a future ", - "PyTorch release.\n", - "torch.linalg.eig returns complex tensors of dtype cfloat or cdouble rather than real tensors ", - "mimicking complex tensors.\n", - "L, _ = torch.eig(A)\n", - "should be replaced with\n", - "L_complex = torch.linalg.eigvals(A)\n", - "and\n", - "L, V = torch.eig(A, eigenvectors=True)\n", - "should be replaced with\n", - "L_complex, V_complex = torch.linalg.eig(A)" - ); - TORCH_CHECK(self.dim() == 2, "input should be 2 dimensional"); - TORCH_CHECK(self.size(0) == self.size(1), "input should be square"); - TORCH_CHECK(self.isfinite().all().item(), "input should not contain infs or NaNs"); - checkSameDevice("torch.eig", e, self, "eigenvalues"); - checkLinalgCompatibleDtype("torch.eig", e, self, "eigenvalues"); - if (eigenvectors) { - checkSameDevice("torch.eig", v, self, "eigenvectors"); - checkLinalgCompatibleDtype("torch.eig", v, self, "eigenvectors"); - } - int64_t n = self.size(-1); - - if (isComplexType(at::typeMetaToScalarType(self.dtype()))) { - at::native::resize_output(e, {n}); - } else { - at::native::resize_output(e, {n, 2}); - } - if (eigenvectors) { - at::native::resize_output(v, self.sizes()); - } - - // optimization: if self is empty, we can immediately return the empty - // tensors, instead of getting empty tensors from eig_helper - if (self.numel() == 0) { - return std::tuple(e, v); - } - - Tensor vals_, vecs_; - std::tie(vals_, vecs_) = eig_stub(self.device().type(), self, eigenvectors); - e.copy_(vals_); - if (eigenvectors) { - v.copy_(vecs_); - } - return std::tuple(e, v); -} - -std::tuple eig(const Tensor& self, bool eigenvectors) { - Tensor e = at::empty({0}, self.options()); - Tensor v = at::empty({0}, self.options()); - at::eig_out(e, v, self, eigenvectors); - return std::tuple(e, v); -} - // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ linalg_svd ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* torch.svd, implemented in terms of torch.linalg.svd. There are two main diff --git a/aten/src/ATen/native/BatchLinearAlgebra.h b/aten/src/ATen/native/BatchLinearAlgebra.h index 531595f3544e..a86be95f40bd 100644 --- a/aten/src/ATen/native/BatchLinearAlgebra.h +++ b/aten/src/ATen/native/BatchLinearAlgebra.h @@ -231,10 +231,6 @@ using cholesky_inverse_fn = Tensor& (*)(Tensor& /*result*/, Tensor& /*infos*/, b DECLARE_DISPATCH(cholesky_inverse_fn, cholesky_inverse_stub); -using eig_fn = std::tuple (*)(const Tensor&, bool&); - -DECLARE_DISPATCH(eig_fn, eig_stub); - using linalg_eig_fn = void (*)(Tensor& /*eigenvalues*/, Tensor& /*eigenvectors*/, Tensor& /*infos*/, const Tensor& /*input*/, bool /*compute_eigenvectors*/); DECLARE_DISPATCH(linalg_eig_fn, linalg_eig_stub); diff --git a/aten/src/ATen/native/BatchLinearAlgebraKernel.cpp b/aten/src/ATen/native/BatchLinearAlgebraKernel.cpp index 5b18dbe2d5fa..3fe9fc137697 100644 --- a/aten/src/ATen/native/BatchLinearAlgebraKernel.cpp +++ b/aten/src/ATen/native/BatchLinearAlgebraKernel.cpp @@ -127,87 +127,6 @@ Tensor& cholesky_inverse_kernel_impl(Tensor& result, Tensor& infos, bool upper) return result; } -template -void apply_eig(const Tensor& self, bool eigenvectors, Tensor& vals_, Tensor& vecs_, int* info_ptr) { -#if !AT_BUILD_WITH_LAPACK() - TORCH_CHECK(false, "Calling torch.eig on a CPU tensor requires compiling ", - "PyTorch with LAPACK. Please use PyTorch built with LAPACK support."); -#else - using value_t = typename c10::scalar_value_type::type; - - char jobvr = eigenvectors ? 'V' : 'N'; - int64_t n = self.size(-1); - auto self_data = self.data_ptr(); - - auto vals_data = vals_.data_ptr(); - scalar_t* wr = vals_data; - - scalar_t* vecs_data = eigenvectors ? vecs_.data_ptr() : nullptr; - // NOLINTNEXTLINE(cppcoreguidelines-narrowing-conversions,bugprone-narrowing-conversions) - int ldvr = eigenvectors ? n : 1; - - Tensor rwork; - value_t* rwork_data = nullptr; - if (self.is_complex()) { - ScalarType real_dtype = toRealValueType(typeMetaToScalarType(self.dtype())); - rwork = at::empty({n*2}, self.options().dtype(real_dtype)); - rwork_data = rwork.data_ptr(); - } - - if (n > 0) { - // call lapackEig once to get the optimal size for work data - scalar_t wkopt; - // NOLINTNEXTLINE(cppcoreguidelines-init-variables) - lapackEig('N', jobvr, n, self_data, n, wr, - nullptr, 1, vecs_data, ldvr, &wkopt, -1, rwork_data, info_ptr); - int lwork = std::max(1, real_impl(wkopt)); - - // call again to do the actual work - Tensor work = at::empty({lwork}, self.dtype()); - lapackEig('N', jobvr, n, self_data, n, wr, - nullptr, 1, vecs_data, ldvr, work.data_ptr(), lwork, rwork_data, info_ptr); - } -#endif -} - -std::tuple eig_kernel_impl(const Tensor& self, bool& eigenvectors) { - int64_t n = self.size(-1); - // lapackEig function expects the input to be column major, or stride {1, n}, - // so we must set the stride manually since the default stride for tensors is - // row major, {n, 1} - Tensor self_ = at::empty_strided( - {n, n}, - {1, n}, - at::TensorOptions(self.dtype())); - self_.copy_(self); - - auto options = self.options().memory_format(LEGACY_CONTIGUOUS_MEMORY_FORMAT); - - // the API is slightly different for the complex vs real case: if the input - // is complex, eigenvals will be a vector of complex. If the input is real, - // eigenvals will be a (n, 2) matrix containing the real and imaginary parts - // in each column - Tensor vals_; - if (self.is_complex()) { - vals_ = at::empty({n}, options); - } else { - vals_ = at::empty_strided({n, 2}, {1, n}, options); - } - Tensor vecs_ = eigenvectors - ? at::empty_strided({n, n}, {1, n}, options) - : Tensor(); - - // NOLINTNEXTLINE(cppcoreguidelines-init-variables) - auto infos = at::zeros({}, self.options().dtype(kInt)); - AT_DISPATCH_FLOATING_AND_COMPLEX_TYPES(self.scalar_type(), "eig_cpu", [&]{ - apply_eig(self_, eigenvectors, vals_, vecs_, infos.data_ptr()); - }); - // NOLINTNEXTLINE(clang-analyzer-core.CallAndMessage) - at::_linalg_check_errors(infos, "eig", /*is_matrix*/true); - - return std::tuple(vals_, vecs_); -} - /* Computes the eigenvalues and eigenvectors of n-by-n matrix 'input'. This is an in-place routine, content of 'input', 'values', 'vectors' is overwritten. @@ -1200,12 +1119,6 @@ REGISTER_AVX2_DISPATCH(cholesky_inverse_stub, &cholesky_inverse_kernel_impl); REGISTER_VSX_DISPATCH(cholesky_inverse_stub, &cholesky_inverse_kernel_impl); REGISTER_ZVECTOR_DISPATCH(cholesky_inverse_stub, &cholesky_inverse_kernel_impl); -REGISTER_ARCH_DISPATCH(eig_stub, DEFAULT, &eig_kernel_impl); -REGISTER_AVX512_DISPATCH(eig_stub, &eig_kernel_impl); -REGISTER_AVX2_DISPATCH(eig_stub, &eig_kernel_impl); -REGISTER_VSX_DISPATCH(eig_stub, &eig_kernel_impl); -REGISTER_ZVECTOR_DISPATCH(eig_stub, &eig_kernel_impl); - REGISTER_ARCH_DISPATCH(linalg_eig_stub, DEFAULT, &linalg_eig_kernel); REGISTER_AVX512_DISPATCH(linalg_eig_stub, &linalg_eig_kernel); REGISTER_AVX2_DISPATCH(linalg_eig_stub, &linalg_eig_kernel); diff --git a/aten/src/ATen/native/Linear.cpp b/aten/src/ATen/native/Linear.cpp index 255f86ca1a30..f6d61076dec5 100644 --- a/aten/src/ATen/native/Linear.cpp +++ b/aten/src/ATen/native/Linear.cpp @@ -179,24 +179,6 @@ static Tensor sumproduct_pair(const Tensor& left_, const Tensor& right_, IntArra return result; } -namespace { - -bool einsum_check_label(unsigned char label) { - return std::isalpha(label); -} - -uint8_t einsum_label_to_index(unsigned char label) { - constexpr uint8_t NUM_OF_LETTERS = 'z' - 'a' + 1; - return std::isupper(label) ? label - 'A' : NUM_OF_LETTERS + (label - 'a'); -} - -unsigned char einsum_index_to_label(uint8_t index) { - constexpr uint8_t NUM_OF_LETTERS = 'z' - 'a' + 1; - return index < NUM_OF_LETTERS ? index + 'A' : index - NUM_OF_LETTERS + 'a'; -} - -} // namespace - // There are roughly three parts to compute einsum: // 1. Parse equation to extract the labels for each input operand and output // 2. Unsqueeze missing dimensions from input operands and permute to align them @@ -205,8 +187,22 @@ unsigned char einsum_index_to_label(uint8_t index) { Tensor einsum(c10::string_view equation, TensorList operands) { TORCH_CHECK(!operands.empty(), "einsum(): must provide at least one operand"); + // Labels must be in range [A-Za-z] + constexpr uint8_t NUM_OF_LETTERS = 'z' - 'a' + 1; + constexpr uint8_t TOTAL_LABELS = NUM_OF_LETTERS * 2; + // Code used to identify ELLIPSIS ("...") - constexpr uint8_t ELLIPSIS = 52; + constexpr uint8_t ELLIPSIS = TOTAL_LABELS; + + // Convert label in [A-Za-z] to subscript in [0, TOTAL_LABELS) + auto label_to_subscript = [=](unsigned char label) -> uint8_t { + return std::isupper(label) ? label - 'A' : label - 'a' + NUM_OF_LETTERS; + }; + + // Convert subscript in [0, TOTAL_LABELS) to label in [A-Za-z] + auto subscript_to_label = [=](uint8_t s) -> unsigned char { + return s < NUM_OF_LETTERS ? s + 'A' : s + 'a' - NUM_OF_LETTERS; + }; // Find arrow (->) to split equation into lhs and rhs const auto arrow_pos = equation.find("->"); @@ -255,11 +251,11 @@ Tensor einsum(c10::string_view equation, TensorList operands) { default: // Parse label TORCH_CHECK( - einsum_check_label(label), + std::isalpha(label), "einsum(): invalid subscript given at index ", i, " in the equation string, subscripts must be in [a-zA-Z]"); - op_labels[curr_op].push_back(einsum_label_to_index(label)); + op_labels[curr_op].push_back(label_to_subscript(label)); } } @@ -267,8 +263,6 @@ Tensor einsum(c10::string_view equation, TensorList operands) { curr_op == num_ops - 1, "einsum(): more operands were provided than specified in the equation"); - // Labels must be within [a-zA-Z]. - constexpr uint8_t TOTAL_LABELS = 52; std::vector label_count(TOTAL_LABELS, 0); // The maximum number of dimensions covered by any ellipsis, needed when @@ -354,11 +348,11 @@ Tensor einsum(c10::string_view equation, TensorList operands) { default: TORCH_CHECK( - einsum_check_label(label), + std::isalpha(label), "einsum(): invalid subscript given at index ", - lhs.size() + 2 + i, + lhs.size() + 2 + i, " in the equation string, subscripts must be in [a-zA-Z]"); - const auto index = einsum_label_to_index(label); + const auto index = label_to_subscript(label); TORCH_CHECK( // Ensure label appeared at least once for some input operand and at // most once for the output @@ -420,7 +414,7 @@ Tensor einsum(c10::string_view equation, TensorList operands) { TORCH_CHECK( operand.size(j) == operand.size(dim), "einsum(): subscript ", - einsum_index_to_label(label), + subscript_to_label(label), " is repeated for operand ", i, " but the sizes don't match, ", diff --git a/aten/src/ATen/native/MaxUnpooling.cpp b/aten/src/ATen/native/MaxUnpooling.cpp index 27d4e1a93c81..33cc4dc7a61c 100644 --- a/aten/src/ATen/native/MaxUnpooling.cpp +++ b/aten/src/ATen/native/MaxUnpooling.cpp @@ -11,6 +11,10 @@ Tensor& max_unpooling2d_forward_out_cpu( const Tensor& indices_, IntArrayRef output_size, Tensor& output) { + // See Note [Writing Nondeterministic Operations] + // Nondeterministic with duplicate indices + at::globalContext().alertNotDeterministic("max_unpooling2d_forward_out"); + auto oheight = output_size[0]; auto owidth = output_size[1]; TORCH_CHECK( @@ -149,6 +153,10 @@ Tensor& max_unpooling3d_forward_out_cpu(const Tensor& self_, IntArrayRef stride, IntArrayRef padding, Tensor& output) { + // See Note [Writing Nondeterministic Operations] + // Nondeterministic with duplicate indices + at::globalContext().alertNotDeterministic("max_unpooling3d_forward_out"); + TORCH_CHECK(output.is_contiguous(), "output must be contiguous"); int64_t oT = output_size[0]; int64_t oH = output_size[1]; diff --git a/aten/src/ATen/native/MetaTensor.cpp b/aten/src/ATen/native/MetaTensor.cpp index a58b18c786e8..e29317c25870 100644 --- a/aten/src/ATen/native/MetaTensor.cpp +++ b/aten/src/ATen/native/MetaTensor.cpp @@ -12,7 +12,7 @@ namespace at { namespace native { -Tensor empty_meta( +Tensor empty_meta_symint( SymIntArrayRef size, c10::optional dtype_opt, c10::optional layout_opt, @@ -29,6 +29,7 @@ Tensor empty_meta( size, dtype_opt, layout_opt, device_opt, pin_memory_opt, memory_format_opt); } +// Kept only for BC with XLA Tensor empty_strided_meta( IntArrayRef size, IntArrayRef stride, @@ -37,7 +38,18 @@ Tensor empty_strided_meta( c10::optional device_opt, c10::optional pin_memory_opt ) { - return at::detail::empty_strided_meta( + return empty_strided_meta_symint(c10::fromIntArrayRef(size), c10::fromIntArrayRef(stride), dtype_opt, layout_opt, device_opt, pin_memory_opt); +} + +Tensor empty_strided_meta_symint( + SymIntArrayRef size, + SymIntArrayRef stride, + c10::optional dtype_opt, + c10::optional layout_opt, + c10::optional device_opt, + c10::optional pin_memory_opt +) { + return at::detail::empty_strided_symint_meta( size, stride, dtype_opt, layout_opt, device_opt, pin_memory_opt); } diff --git a/aten/src/ATen/native/Normalization.cpp b/aten/src/ATen/native/Normalization.cpp index 34d906b7adc4..a67377f047d7 100644 --- a/aten/src/ATen/native/Normalization.cpp +++ b/aten/src/ATen/native/Normalization.cpp @@ -14,6 +14,7 @@ #include #include +#include static const int MIOPEN_DIM_MAX = 5; @@ -41,7 +42,7 @@ DEFINE_DISPATCH(batch_norm_cpu_backward_stub); DEFINE_DISPATCH(renorm_scale_factor_stub); namespace { - void check_dims_match_num_input_features(const char* arg_name, int64_t expected, int64_t actual){ + void check_dims_match_num_input_features(const char* arg_name, SymInt expected, SymInt actual){ TORCH_CHECK(actual == expected, arg_name, " should contain ", expected, " elements not ", actual); } @@ -443,14 +444,14 @@ std::tuple _batch_norm_impl_index( const Tensor& running_mean = c10::value_or_else(running_mean_opt, [] {return Tensor();}); const Tensor& running_var = c10::value_or_else(running_var_opt, [] {return Tensor();}); - auto num_features = input.sizes()[1]; + auto num_features = input.sym_sizes()[1]; - if (input.numel() == 0) { + if (input.sym_numel() == 0) { Tensor reserve = at::empty({0}, input.options().dtype(kByte)); auto options = input.options().dtype( at::toAccumulateType(input.scalar_type(), /*is_cuda=*/input.is_cuda())); - auto save_mean = at::empty({num_features}, options); - auto save_invstd = at::empty({num_features}, options); + auto save_mean = at::empty_symint(c10::SymIntArrayRef({num_features}), options); + auto save_invstd = at::empty_symint(c10::SymIntArrayRef({num_features}), options); // don't return view of input, don't return empty tensor because it will break gradient chain auto out = input.clone(); @@ -461,20 +462,20 @@ std::tuple _batch_norm_impl_index( } if (running_mean.defined()) { - check_dims_match_num_input_features("running_mean", num_features, running_mean.numel()); + check_dims_match_num_input_features("running_mean", num_features, running_mean.sym_numel()); } else if (!training) { AT_ERROR("running_mean must be defined in evaluation mode"); } if (running_var.defined()) { - check_dims_match_num_input_features("running_var", num_features, running_var.numel()); + check_dims_match_num_input_features("running_var", num_features, running_var.sym_numel()); } else if (!training) { AT_ERROR("running_var must be defined in evaluation mode"); } if (weight.defined()) { - check_dims_match_num_input_features("weight", num_features, weight.numel()); + check_dims_match_num_input_features("weight", num_features, weight.sym_numel()); } if (bias.defined()) { - check_dims_match_num_input_features("bias", num_features, bias.numel()); + check_dims_match_num_input_features("bias", num_features, bias.sym_numel()); } const bool use_cudnn = ( diff --git a/aten/src/ATen/native/README.md b/aten/src/ATen/native/README.md index cfce94a36c0e..01a25e3a978c 100644 --- a/aten/src/ATen/native/README.md +++ b/aten/src/ATen/native/README.md @@ -628,7 +628,8 @@ the torch._C._nn (marked with `python_module: nn`), torch._C._fft (marked with `python_module: fft`), torch._C._linalg (marked with `python_module: linalg`) objects, torch._C._sparse (marked with `python_module: sparse`) objects, -or torch._C._special (marked with `python_module: special`) objects. +torch._C._special (marked with `python_module: special`) objects, +or torch._C._nested (marked with `python_module: nested`) objects. ### Undefined tensor conventions diff --git a/aten/src/ATen/native/RNN.cpp b/aten/src/ATen/native/RNN.cpp index e40caef80e3c..b195422ff862 100644 --- a/aten/src/ATen/native/RNN.cpp +++ b/aten/src/ATen/native/RNN.cpp @@ -717,7 +717,7 @@ struct GRUCell : Cell { const hidden_type& hidden, const cell_params& params, bool pre_compute_input = false) const override { - if (input.is_cuda()) { + if (input.is_cuda() || input.is_xpu()) { TORCH_CHECK(!pre_compute_input); auto igates = params.matmul_ih(input); auto hgates = params.matmul_hh(hidden); diff --git a/aten/src/ATen/native/ReflectionPad.cpp b/aten/src/ATen/native/ReflectionPad.cpp index db744cc95eb0..7824de63805f 100644 --- a/aten/src/ATen/native/ReflectionPad.cpp +++ b/aten/src/ATen/native/ReflectionPad.cpp @@ -965,8 +965,8 @@ TORCH_IMPL_FUNC(reflection_pad3d_out_cpu) auto input = input_.contiguous(); if (batch_mode) { - AT_DISPATCH_FLOATING_AND_COMPLEX_TYPES_AND1( - kHalf, input.scalar_type(), "reflection_pad3d_cpu", [&] { + AT_DISPATCH_FLOATING_AND_COMPLEX_TYPES_AND2( + kHalf, kBFloat16, input.scalar_type(), "reflection_pad3d_cpu", [&] { auto input_data = input.data_ptr(); auto output_data = output.data_ptr(); auto nbatch = input.size(0); @@ -986,8 +986,8 @@ TORCH_IMPL_FUNC(reflection_pad3d_out_cpu) pad_front); }); } else { - AT_DISPATCH_FLOATING_AND_COMPLEX_TYPES_AND1( - kHalf, input.scalar_type(), "reflection_pad3d_cpu", [&] { + AT_DISPATCH_FLOATING_AND_COMPLEX_TYPES_AND2( + kHalf, kBFloat16, input.scalar_type(), "reflection_pad3d_cpu", [&] { auto input_data = input.data_ptr(); auto output_data = output.data_ptr(); reflection_pad3d_out_frame( @@ -1043,8 +1043,8 @@ TORCH_IMPL_FUNC(reflection_pad3d_backward_out_cpu)(const Tensor& grad_output, grad_input.zero_(); if (batch_mode) { - AT_DISPATCH_FLOATING_AND_COMPLEX_TYPES_AND1( - kHalf, input.scalar_type(), "reflection_pad3d_backward_cpu", [&] { + AT_DISPATCH_FLOATING_AND_COMPLEX_TYPES_AND2( + kHalf, kBFloat16, input.scalar_type(), "reflection_pad3d_backward_cpu", [&] { reflection_pad3d_backward_out_loop( grad_input.data_ptr(), grad_output_.data_ptr(), @@ -1061,8 +1061,8 @@ TORCH_IMPL_FUNC(reflection_pad3d_backward_out_cpu)(const Tensor& grad_output, pad_front); }); } else { - AT_DISPATCH_FLOATING_AND_COMPLEX_TYPES_AND1( - kHalf, input.scalar_type(), "reflection_pad3d_backward_cpu", [&] { + AT_DISPATCH_FLOATING_AND_COMPLEX_TYPES_AND2( + kHalf, kBFloat16, input.scalar_type(), "reflection_pad3d_backward_cpu", [&] { reflection_pad3d_backward_out_frame( grad_input.data_ptr(), grad_output_.data_ptr(), diff --git a/aten/src/ATen/native/Resize.h b/aten/src/ATen/native/Resize.h index c6fe2b3d2146..84fd6c6e6196 100644 --- a/aten/src/ATen/native/Resize.h +++ b/aten/src/ATen/native/Resize.h @@ -83,20 +83,30 @@ inline TensorImpl* resize_impl_cpu_( return self; } +template +T maybe_convert_symint(c10::SymInt) = delete; + +template <> +inline c10::SymInt maybe_convert_symint(c10::SymInt x) { return x; } + +template <> +inline int64_t maybe_convert_symint(c10::SymInt x) { return x.expect_int(); } + +template static inline void checkInBoundsForStorage( - IntArrayRef size, - IntArrayRef stride, - int64_t storage_offset, + ArrayRef size, + ArrayRef stride, + T storage_offset, const caffe2::TypeMeta data_type, const Storage& new_storage) { - int64_t storage_size_bytes = + T storage_size_bytes = at::detail::computeStorageNbytes(size, stride, data_type.itemsize()); - int64_t storage_offset_bytes = storage_offset * data_type.itemsize(); + T storage_offset_bytes = storage_offset * data_type.itemsize(); if (storage_size_bytes == 0) { // NB: (a tensor with arbitrary 0 dims)'s storage can have any numel. return; } - int64_t new_storage_size_bytes = new_storage.nbytes(); + T new_storage_size_bytes = maybe_convert_symint(new_storage.sym_nbytes()); TORCH_CHECK( storage_size_bytes + storage_offset_bytes <= new_storage_size_bytes, "setStorage: sizes ", @@ -151,11 +161,12 @@ static inline void checkSetStorage(Tensor& result, Storage storage, int64_t stor * Set self's sizes, strides, and storage_offset. * (size, stride, storage_offset) must be in bounds for self's storage. */ +template inline void setStrided( const Tensor& self, - IntArrayRef size, - IntArrayRef stride, - int64_t storage_offset) { + ArrayRef size, + ArrayRef stride, + T storage_offset) { TORCH_CHECK(size.size() == stride.size(), "mismatch in length of strides and shape"); for (auto val : stride) { TORCH_CHECK(val >= 0, @@ -169,13 +180,7 @@ inline void setStrided( /* storage offset */ TORCH_CHECK(storage_offset >= 0, "Tensor: invalid storage offset ", storage_offset); - self_->set_storage_offset(storage_offset); - - /* size and stride */ - if (self_->sizes() == size && self_->strides() == stride) { - return; - } - self_->set_sizes_and_strides(size, stride); + self_->set_sizes_and_strides(size, stride, c10::make_optional(storage_offset)); } }} diff --git a/aten/src/ATen/native/TensorAdvancedIndexing.cpp b/aten/src/ATen/native/TensorAdvancedIndexing.cpp index f263c2ce2389..101803c71d75 100644 --- a/aten/src/ATen/native/TensorAdvancedIndexing.cpp +++ b/aten/src/ATen/native/TensorAdvancedIndexing.cpp @@ -1095,8 +1095,6 @@ Tensor & index_select_out_cpu_(const Tensor & self, int64_t dim, const Tensor & TORCH_CHECK(index.scalar_type() == ScalarType::Long || index.scalar_type() == ScalarType::Int, "index_select(): Expected dtype int32 or int64 for index"); TORCH_CHECK(self.scalar_type() == result.scalar_type(), "index_select(): self and result must have the same scalar type"); - TORCH_CHECK(dim == 0 || dim < self.dim(), - "index_select(): Indexing dim ", dim, " is out of bounds of tensor"); at::assert_no_internal_overlap(result); at::assert_no_overlap(result, self); at::assert_no_overlap(result, index); diff --git a/aten/src/ATen/native/TensorConversions.cpp b/aten/src/ATen/native/TensorConversions.cpp index c28fe272f861..ea8474960264 100644 --- a/aten/src/ATen/native/TensorConversions.cpp +++ b/aten/src/ATen/native/TensorConversions.cpp @@ -1470,7 +1470,7 @@ Tensor sparse_compressed_to_sparse(const Tensor& self) { // Sparse layout conversions End Tensor to_meta(const Tensor& tensor) { - auto out = at::native::empty_strided_meta(tensor.sizes(), tensor.strides(), \ + auto out = at::native::empty_strided_meta_symint(tensor.sym_sizes(), tensor.sym_strides(), \ /*dtype=*/c10::make_optional(tensor.scalar_type()), /*layout=*/c10::make_optional(tensor.layout()), \ /*device=*/c10::make_optional(c10::Device(c10::kMeta)), /*pin_memory=*/c10::nullopt); // needs to handle wrapped numbers, so dtype promotion works properly. diff --git a/aten/src/ATen/native/TensorFactories.cpp b/aten/src/ATen/native/TensorFactories.cpp index 6ccbbbac03a7..2e01f7e8699a 100644 --- a/aten/src/ATen/native/TensorFactories.cpp +++ b/aten/src/ATen/native/TensorFactories.cpp @@ -214,12 +214,9 @@ Tensor empty_strided_cpu(IntArrayRef size, IntArrayRef stride, c10::optional optional_memory_format, Tensor& result) { - // TODO: support empty_out properly (I was forced to change this immediately - // with empty so that empty/empty.out had the same type signature) - auto size = c10::asIntArrayRefSlow(sym_size); // Preferably, this argument would not be accepted by _out, but the code // generator requires the out and non-out overloads to match exactly TORCH_CHECK( @@ -386,7 +383,7 @@ Tensor empty_like_quantized( } } -Tensor new_empty( +Tensor new_empty_symint( const Tensor& self, SymIntArrayRef size, c10::optional dtype_opt, @@ -401,10 +398,10 @@ Tensor new_empty( return at::empty_symint(size, dtype, layout, device, pin_memory, c10::nullopt); } -Tensor new_empty_strided( +Tensor new_empty_strided_symint( const Tensor& self, - IntArrayRef size, - IntArrayRef stride, + c10::SymIntArrayRef size, + c10::SymIntArrayRef stride, c10::optional dtype, c10::optional layout, c10::optional device, @@ -413,7 +410,7 @@ Tensor new_empty_strided( // See [Note: hacky wrapper removal for TensorOptions] TensorOptions options = TensorOptions().dtype(dtype).layout(layout).device(device).pinned_memory(pin_memory); - return at::empty_strided(size, stride, self.options().merge_in(options)); + return at::empty_strided_symint(size, stride, self.options().merge_in(options)); } // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ eye ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -1077,7 +1074,7 @@ Tensor triu_indices_cpu( // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ zeros ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Tensor zeros(SymIntArrayRef size, +Tensor zeros_symint(SymIntArrayRef size, c10::optional dtype, c10::optional layout, c10::optional device, @@ -1107,8 +1104,7 @@ Tensor& zeros_sparse_out(IntArrayRef size, Tensor& result) { return result; } -Tensor& zeros_out(SymIntArrayRef sym_size, Tensor& result) { - auto size = c10::asIntArrayRefSlow(sym_size); +Tensor& zeros_out(IntArrayRef size, Tensor& result) { if (result.is_sparse()) { // TODO: I think this branch should be dead, but we don't have an easy // way to cover all sparse kernels with zeros_sparse_out, so retain this @@ -1483,7 +1479,7 @@ Tensor clone(const Tensor& src, c10::optional optional_memory if (memory_format == MemoryFormat::Preserve) { if (src.is_non_overlapping_and_dense()) { // Copy all strides, this is marginally faster than calling empty_like - self = at::empty_strided(src.sizes(), src.strides(), src.options()); + self = at::empty_strided_symint(src.sym_sizes(), src.sym_strides(), src.options()); } else { self = at::empty_like(src); } diff --git a/aten/src/ATen/native/TensorShape.cpp b/aten/src/ATen/native/TensorShape.cpp index 3f0b40ba29b7..d72cc0b65293 100644 --- a/aten/src/ATen/native/TensorShape.cpp +++ b/aten/src/ATen/native/TensorShape.cpp @@ -429,6 +429,23 @@ Tensor concat(TensorList tensors, int64_t dim) { return at::cat(tensors, dim); } +// torch.concatenate, alias for torch.cat +Tensor& concatenate_out(TensorList tensors, Dimname dim, Tensor& result) { + return at::cat_out(result, tensors, dimname_to_position(tensors[0], dim)); +} + +Tensor concatenate(TensorList tensors, Dimname dim) { + return at::cat(tensors, dimname_to_position(tensors[0], dim)); +} + +Tensor& concatenate_out(TensorList tensors, int64_t dim, Tensor & result) { + return at::cat_out(result, tensors, dim); +} + +Tensor concatenate(TensorList tensors, int64_t dim) { + return at::cat(tensors, dim); +} + static bool sizes_match_except(IntArrayRef s1, IntArrayRef s2, int64_t dim_except /* should already be wrapped */) { if (s1.size() != s2.size()) { return false; @@ -844,9 +861,7 @@ Tensor diag_embed(const Tensor& self, int64_t offset, int64_t dim1_, int64_t dim return result; } -Tensor expand(const Tensor& self, c10::SymIntArrayRef sym_size, bool /*unused*/) { - // TODO: properly support SymInt expand - auto size = asIntArrayRefSlow(sym_size); +Tensor expand(const Tensor& self, c10::IntArrayRef size, bool /*unused*/) { TORCH_CHECK(size.size() >= (size_t)self.dim(), "expand(", self.toString(), "{", self.sizes(), "}, size=", size, "): the number of sizes provided (", size.size(), ") ", @@ -882,6 +897,15 @@ Tensor make_qtensor(const Tensor& self, IntArrayRef size, IntArrayRef stride, Qu } Tensor as_strided_tensorimpl(const Tensor& self, IntArrayRef size, IntArrayRef stride, optional storage_offset_) { + TORCH_INTERNAL_ASSERT(!self.is_mps(), "as_strided_tensorimpl does not work with MPS; call self.as_strided(...) instead"); + auto storage_offset = storage_offset_.value_or(self.storage_offset()); + auto result = at::detail::make_tensor( + c10::TensorImpl::VIEW, Storage(self.storage()), self.key_set(), self.dtype()); + setStrided(result, size, stride, storage_offset); + return result; +} + +Tensor as_strided_tensorimpl_meta(const Tensor& self, IntArrayRef size, IntArrayRef stride, optional storage_offset_) { auto storage_offset = storage_offset_.value_or(self.storage_offset()); auto result = at::detail::make_tensor( c10::TensorImpl::VIEW, Storage(self.storage()), self.key_set(), self.dtype()); @@ -925,9 +949,8 @@ const Tensor &as_strided_(const Tensor& self, IntArrayRef size, IntArrayRef stri return self; } -Tensor narrow_copy_dense(const Tensor& self, int64_t dim, SymInt start, SymInt length) { - // TODO: properly support SymInt narrow_copy - return self.narrow(dim, start.expect_int(), length.expect_int()).clone(at::MemoryFormat::Contiguous); +Tensor narrow_copy_dense(const Tensor& self, int64_t dim, int64_t start, int64_t length) { + return self.narrow(dim, start, length).clone(at::MemoryFormat::Contiguous); } Tensor narrow_copy_dense_cpu(const Tensor& self, int64_t dim, int64_t start, int64_t length){ @@ -2105,6 +2128,10 @@ Tensor slice( auto quantizer = create_subtensor_quantizer(self, false, start_val, end_val, dim, step); result = as_strided_qtensorimpl(self, sizes, strides, storage_offset, quantizer); } else { + // NB: it is extremely important to perform a redispatch here for + // the MPS backend; if you call directly to as_strided_tensorimpl, + // the necessary metadata for MPS will not get setup and you will + // get silently wrong results result = self.as_strided(sizes, strides, storage_offset); } namedinference::propagate_names(result, self); @@ -2644,15 +2671,15 @@ Tensor transpose(const Tensor & self, int64_t dim0, int64_t dim1) { return sparse_compressed_transpose(self, dim0, dim1); } - // Transpose of a tensor is a view operation. - if (dim0 == dim1) { - return self; - } - if (self.is_mkldnn()) { return at::_mkldnn_transpose(self, dim0, dim1); } + // Transpose of a tensor is a view operation. + if (dim0 == dim1) { + return self.alias(); + } + DimVector sizes(self.sizes().begin(), self.sizes().end()); std::swap(sizes[dim0], sizes[dim1]); DimVector strides(self.strides().begin(), self.strides().end()); @@ -3204,19 +3231,13 @@ Tensor adjoint(const Tensor &self) { return _adjoint(self, /*transpose=*/false, "adjoint()"); } -Tensor view_meta(const Tensor& self, - at::SymIntArrayRef size) { - // TODO: Properly support SymInt view - return view_impl(self, c10::asIntArrayRefSlow(size)); -} - Tensor view(const Tensor& self, at::IntArrayRef size) { return view_impl(self, size); } Tensor alias(const Tensor& self) { - return alias_with_sizes_and_strides(self, self.sizes(), self.strides()); + return alias_with_sizes_and_strides(self, self.sizes(), self.strides()); } Tensor detach(const Tensor& self) { @@ -3592,7 +3613,7 @@ at::Tensor& expand_copy_SymInt_out(const at::Tensor & self, c10::SymIntArrayRef } -at::Tensor& expand_copy_out(const at::Tensor & self, at::SymIntArrayRef size, bool implicit, at::Tensor & out) { +at::Tensor& expand_copy_out_symint(const at::Tensor & self, at::SymIntArrayRef size, bool implicit, at::Tensor & out) { auto tmp = self.expand_symint(size, implicit); out.copy_(tmp); return out; @@ -3748,7 +3769,7 @@ void unbind_copy_int_out(const at::Tensor & self, int64_t dim, at::TensorList o } -at::Tensor& view_copy_out(const at::Tensor & self, at::SymIntArrayRef size, at::Tensor & out) { +at::Tensor& view_copy_out_symint(const at::Tensor & self, at::SymIntArrayRef size, at::Tensor & out) { auto tmp = self.view_symint(size); out.copy_(tmp); return out; diff --git a/aten/src/ATen/native/cpu/DistributionKernels.cpp b/aten/src/ATen/native/cpu/DistributionKernels.cpp index 617809e14292..4363cc9d62e3 100644 --- a/aten/src/ATen/native/cpu/DistributionKernels.cpp +++ b/aten/src/ATen/native/cpu/DistributionKernels.cpp @@ -103,88 +103,11 @@ void bernoulli_scalar_kernel(const TensorBase &self, double p, c10::optional gen) { +static void exponential_kernel(TensorIteratorBase& iter, double lambda, c10::optional gen) { CPUGeneratorImpl* generator = get_generator_or_default(gen, detail::getDefaultCPUGenerator()); templates::cpu::exponential_kernel(iter, lambda, generator); } -#if !AT_MKL_ENABLED() -void exponential_kernel(TensorIteratorBase& iter, double lambda, c10::optional gen) { - exponential_kernel_default(iter, lambda, gen); -} -#else -void exponential_kernel(TensorIteratorBase &iter, double lambda, c10::optional gen) { - Tensor self = iter.tensor(0); - if (lambda > 0 && !std::isinf(lambda) && !std::isnan(lambda) && cpuinfo_initialize() && - cpuinfo_vendor_intel == cpuinfo_get_processor(0)->core->vendor) { - CPUGeneratorImpl* generator = get_generator_or_default(gen, detail::getDefaultCPUGenerator()); - int64_t seed; - { - // See Note [Acquire lock when using random generators] - std::lock_guard lock(generator->mutex_); - if (self.scalar_type() == at::kDouble) - seed = generator->random64(); - else - seed = generator->random(); - } - int64_t n = self.numel(); - bool contig = self.is_contiguous(); - - AT_DISPATCH_ALL_TYPES_AND2(at::ScalarType::Half, at::ScalarType::BFloat16, self.scalar_type(), "exponential_cpu", [&] { - at::Tensor tmp_tensor; - constexpr bool is_df = std::is_same::value || std::is_same::value; - if (is_df && contig) { - tmp_tensor = self; - } else if (std::is_same::value) { - tmp_tensor = at::empty(self.sizes(), self.options().dtype(at::kDouble)); - } else { - tmp_tensor = at::empty(self.sizes(), self.options().dtype(at::kFloat)); - } - - scalar_t *self_ptr = self.data_ptr(); - using tmp_scalar_t = typename std::conditional_t::value, double, float>; - tmp_scalar_t *sample_ptr = tmp_tensor.data_ptr(); - - auto sample = [&](int64_t begin, int64_t end) { - int64_t len = end - begin; - if (len > 0) { - VSLStreamStatePtr stream; - if (std::is_same::value) { - vslNewStream(&stream, VSL_BRNG_MCG31, seed); - vslSkipAheadStream(stream, begin); - vdRngExponential(VSL_RNG_METHOD_EXPONENTIAL_ICDF, stream, len, - (double *)(sample_ptr + begin), 0, 1./lambda); - vslDeleteStream(&stream); - } else { - vslNewStream(&stream, VSL_BRNG_MCG31, seed); - vslSkipAheadStream(stream, begin); - vsRngExponential(VSL_RNG_METHOD_EXPONENTIAL_ICDF, stream, len, - (float *) (sample_ptr + begin), 0, 1./lambda); - vslDeleteStream(&stream); - } - // vectorized copy if using buffer and contiguous - if (!is_df && contig) { - scalar_t *self_seg = self_ptr + begin; - tmp_scalar_t *tmp_seg = sample_ptr + begin; - at::vec::convert(tmp_seg, self_seg, len); - } - } - }; - - parallel_for(0, n, /* grain_size= */ 800, sample); - - // copy_ if using buffer and non contiguous - if (!contig) { - self.copy_(tmp_tensor); - } - }); - } else { - // The situation of AMD, move to using the default version - exponential_kernel_default(iter, lambda, gen); - } -} -#endif - static void geometric_kernel(TensorIteratorBase& iter, double p, c10::optional gen) { CPUGeneratorImpl* generator = get_generator_or_default(gen, detail::getDefaultCPUGenerator()); templates::cpu::geometric_kernel(iter, p, generator); diff --git a/aten/src/ATen/native/cuda/AveragePool2d.cu b/aten/src/ATen/native/cuda/AveragePool2d.cu index 55632014a0de..46e96e902981 100644 --- a/aten/src/ATen/native/cuda/AveragePool2d.cu +++ b/aten/src/ATen/native/cuda/AveragePool2d.cu @@ -32,8 +32,8 @@ __device__ inline int max(int a, int b) { template __global__ void avg_pool2d_out_cuda_frame(const int nthreads, - const scalar_t* const bottom_data, const int channels, - const int height, const int width, const int pooled_height, + const scalar_t* const bottom_data, const int64_t channels, + const int64_t height, const int64_t width, const int64_t pooled_height, const int pooled_width, const int kernel_h, const int kernel_w, const int stride_h, const int stride_w, const int pad_h, const int pad_w, scalar_t* const top_data, const int divisor_override, @@ -81,8 +81,8 @@ __global__ void avg_pool2d_out_cuda_frame(const int nthreads, template __global__ void avg_pool2d_out_cuda_frame_nhwc(const int nthreads, - const scalar_t* const bottom_data, const int channels, - const int height, const int width, const int pooled_height, + const scalar_t* const bottom_data, const int64_t channels, + const int64_t height, const int64_t width, const int pooled_height, const int pooled_width, const int kernel_h, const int kernel_w, const int stride_h, const int stride_w, const int pad_h, const int pad_w, scalar_t* const top_data, const int divisor_override, @@ -130,8 +130,8 @@ __global__ void avg_pool2d_out_cuda_frame_nhwc(const int nthreads, template __global__ void avg_pool2d_backward_out_cuda_frame(const int nthreads, const scalar_t* const top_diff, - const int channels, const int height, - const int width, const int pooled_height, const int pooled_width, + const int64_t channels, const int64_t height, + const int64_t width, const int64_t pooled_height, const int64_t pooled_width, const int kernel_h, const int kernel_w, const int stride_h, const int stride_w, const int pad_h, const int pad_w, scalar_t* const bottom_diff, const int divisor_override, @@ -187,8 +187,8 @@ __global__ void avg_pool2d_backward_out_cuda_frame(const int nthreads, const sca template __global__ void avg_pool2d_backward_out_cuda_frame_nhwc(const int nthreads, const scalar_t* const top_diff, - const int channels, const int height, - const int width, const int pooled_height, const int pooled_width, + const int64_t channels, const int64_t height, + const int64_t width, const int pooled_height, const int pooled_width, const int kernel_h, const int kernel_w, const int stride_h, const int stride_w, const int pad_h, const int pad_w, scalar_t* const bottom_diff, const int divisor_override, diff --git a/aten/src/ATen/native/cuda/BinaryLogicalOpsKernels.cu b/aten/src/ATen/native/cuda/BinaryLogicalOpsKernels.cu index e69674412c79..cc6046c003e4 100644 --- a/aten/src/ATen/native/cuda/BinaryLogicalOpsKernels.cu +++ b/aten/src/ATen/native/cuda/BinaryLogicalOpsKernels.cu @@ -18,7 +18,7 @@ void logical_and_kernel_cuda(TensorIterator& iter) { #if AT_USE_JITERATOR() static const auto logical_and_string = jiterator_stringify( template - T logical_and_kernel(T a, T b) { + bool logical_and_kernel(T a, T b) { return a && b; } ); // logical_and_string @@ -48,24 +48,76 @@ void logical_and_kernel_cuda(TensorIterator& iter) { } } +const char logical_or_name[] = "logical_or_kernel"; void logical_or_kernel_cuda(TensorIterator& iter) { - AT_DISPATCH_ALL_TYPES_AND_COMPLEX_AND3(kHalf, kBool, ScalarType::BFloat16, - iter.common_dtype(), "logical_or_cuda", [&]() { + auto dtype = iter.common_dtype(); + if (at::isComplexType(dtype)) { +#if AT_USE_JITERATOR() + static const auto logical_or_string = jiterator_stringify( + template + bool logical_or_kernel(T a, T b) { + return a || b; + } + ); // logical_or_string + AT_DISPATCH_COMPLEX_TYPES(dtype, "logical_or_cuda", [&]() { + jitted_gpu_kernel< + /*name=*/ logical_or_name, + /*return_dtype=*/ scalar_t, + /*common_dtype=*/ scalar_t, + /*arity=*/ 2>(iter, logical_or_string); + }); +#else + AT_DISPATCH_COMPLEX_TYPES(dtype, "logical_or_cuda", [&]() { + gpu_kernel_with_scalars(iter, []GPU_LAMBDA(scalar_t a, scalar_t b) -> bool { + return a || b; + }); + }); +#endif + } else { + AT_DISPATCH_ALL_TYPES_AND3(kHalf, kBool, ScalarType::BFloat16, + dtype, "logical_or_cuda", [&]() { opmath_symmetric_gpu_kernel_with_scalars( iter, []GPU_LAMBDA(scalar_t a, scalar_t b) -> bool { return a || b; }); }); + } } +const char logical_xor_name[] = "logical_xor_kernel"; void logical_xor_kernel_cuda(TensorIterator& iter) { - AT_DISPATCH_ALL_TYPES_AND_COMPLEX_AND3(kHalf, kBool, ScalarType::BFloat16, - iter.common_dtype(), "logical_xor_cuda", [&]() { + auto dtype = iter.common_dtype(); + if (at::isComplexType(dtype)) { +#if AT_USE_JITERATOR() + static const auto logical_xor_string = jiterator_stringify( + template + bool logical_xor_kernel(T a, T b) { + return bool(a) != bool(b); + } + ); + AT_DISPATCH_COMPLEX_TYPES(dtype, "logical_xor_cuda", [&]() { + jitted_gpu_kernel< + /*name=*/ logical_xor_name, + /*return_dtype=*/ scalar_t, + /*common_dtype=*/ scalar_t, + /*arity=*/ 2>(iter, logical_xor_string); + }); // logical_xor_string +#else + AT_DISPATCH_COMPLEX_TYPES(dtype, "logical_xor_cuda", [&]() { + gpu_kernel_with_scalars(iter, []GPU_LAMBDA(scalar_t a, scalar_t b) -> bool { + return bool(a) != bool(b); + }); + }); +#endif + } else { + AT_DISPATCH_ALL_TYPES_AND3(kHalf, kBool, ScalarType::BFloat16, + dtype, "logical_xor_cuda", [&]() { opmath_symmetric_gpu_kernel_with_scalars( iter, []GPU_LAMBDA(scalar_t a, scalar_t b) -> bool { return bool(a) != bool(b); }); }); + } } REGISTER_DISPATCH(logical_and_stub, &logical_and_kernel_cuda); diff --git a/aten/src/ATen/native/cuda/Col2Im.cu b/aten/src/ATen/native/cuda/Col2Im.cu index fd57ec1f21d4..7b829b801a68 100644 --- a/aten/src/ATen/native/cuda/Col2Im.cu +++ b/aten/src/ATen/native/cuda/Col2Im.cu @@ -105,7 +105,7 @@ void col2im_out_cuda_template( output.zero_(); int64_t output_batch_stride = output.stride(0); - AT_DISPATCH_FLOATING_AND_COMPLEX_TYPES_AND1(kHalf, + AT_DISPATCH_FLOATING_AND_COMPLEX_TYPES_AND2(kHalf, kBFloat16, input.scalar_type(), "col2im_out_cuda", [&] { int64_t height_col = (output_height + 2 * pad_height - (dilation_height * (kernel_height - 1) + 1)) / diff --git a/aten/src/ATen/native/cuda/DilatedMaxPool2d.cu b/aten/src/ATen/native/cuda/DilatedMaxPool2d.cu index 05a201147241..dc1f771c9ab8 100644 --- a/aten/src/ATen/native/cuda/DilatedMaxPool2d.cu +++ b/aten/src/ATen/native/cuda/DilatedMaxPool2d.cu @@ -44,8 +44,8 @@ static __device__ inline int p_end(int size, int pad, int pooled_size, int strid // kernels borrowed from Caffe template __global__ void max_pool_forward_nchw(const int nthreads, const scalar_t* bottom_data, - const int channels, const int height, - const int width, const int pooled_height, const int pooled_width, + const int64_t channels, const int64_t height, + const int64_t width, const int pooled_height, const int pooled_width, const int kernel_h, const int kernel_w, const int stride_h, const int stride_w, const int pad_h, const int pad_w, const int dilation_h, const int dilation_w, scalar_t* top_data, @@ -83,8 +83,8 @@ __global__ void max_pool_forward_nchw(const int nthreads, const scalar_t* bottom template C10_LAUNCH_BOUNDS_1(CUDA_MAX_THREADS) __global__ void max_pool_forward_nhwc(const scalar_t* bottom_data, const int nbatch, - const int channels, const int height, - const int width, const int pooled_height, const int pooled_width, + const int64_t channels, const int64_t height, + const int64_t width, const int pooled_height, const int pooled_width, const int kernel_h, const int kernel_w, const int stride_h, const int stride_w, const int pad_h, const int pad_w, const int dilation_h, const int dilation_w, @@ -176,8 +176,8 @@ C10_LAUNCH_BOUNDS_2(BLOCK_THREADS, 4) C10_LAUNCH_BOUNDS_2(BLOCK_THREADS, 8) #endif __global__ void max_pool_backward_nchw(const scalar_t* top_diff, - const int64_t* top_mask, const int num, const int channels, - const int height, const int width, const int pooled_height, + const int64_t* top_mask, const int num, const int64_t channels, + const int64_t height, const int64_t width, const int pooled_height, const int pooled_width, const int kernel_h, const int kernel_w, const int stride_h, const int stride_w, const int pad_h, const int pad_w, const int dilation_h, const int dilation_w, @@ -209,8 +209,8 @@ __global__ void max_pool_backward_nchw(const scalar_t* top_diff, template C10_LAUNCH_BOUNDS_1(CUDA_MAX_THREADS) __global__ void max_pool_backward_nhwc(const scalar_t* top_diff, - const int64_t* top_mask, const int nbatch, const int channels, - const int height, const int width, const int pooled_height, + const int64_t* top_mask, const int nbatch, const int64_t channels, + const int64_t height, const int64_t width, const int pooled_height, const int pooled_width, const int kernel_h, const int kernel_w, const int stride_h, const int stride_w, const int pad_h, const int pad_w, const int dilation_h, const int dilation_w, @@ -242,9 +242,9 @@ __global__ void max_pool_backward_nhwc(const scalar_t* top_diff, int iH = (height + gridDim.z-1) / gridDim.z; int iW = (width + gridDim.y-1) / gridDim.y; int istartH = threadIdx.z + blockIdx.z*iH; - int iendH = ::min(istartH+iH, height); + int iendH = ::min(static_cast(istartH)+iH, height); int istartW = threadIdx.y + blockIdx.y*iW; - int iendW = ::min(istartW+iW, width); + int iendW = ::min(static_cast(istartW)+iW, width); for (int ih = istartH; ih < iendH; ih+=blockDim.z) { int phstart = p_start(ih, pad_h, kernel_h, dilation_h, stride_h); diff --git a/aten/src/ATen/native/cuda/Im2Col.cu b/aten/src/ATen/native/cuda/Im2Col.cu index 89b2a1879b4b..5ca540964d52 100644 --- a/aten/src/ATen/native/cuda/Im2Col.cu +++ b/aten/src/ATen/native/cuda/Im2Col.cu @@ -106,7 +106,7 @@ static void im2col_out_cuda_template( output.zero_(); // Launch kernel - AT_DISPATCH_FLOATING_AND_COMPLEX_TYPES_AND1(kHalf, + AT_DISPATCH_FLOATING_AND_COMPLEX_TYPES_AND2(kHalf, kBFloat16, input.scalar_type(), "im2col_out_cuda", [&] { Tensor input_n; Tensor output_n; diff --git a/aten/src/ATen/native/cuda/LinearAlgebraStubs.cpp b/aten/src/ATen/native/cuda/LinearAlgebraStubs.cpp index cb6cacb3630f..f5816c8c6747 100644 --- a/aten/src/ATen/native/cuda/LinearAlgebraStubs.cpp +++ b/aten/src/ATen/native/cuda/LinearAlgebraStubs.cpp @@ -93,11 +93,6 @@ void lazy_linalg_eigh_kernel(const Tensor& eigenvalues, const Tensor& eigenvecto linalg_eigh_stub(DeviceType::CUDA, eigenvalues, eigenvectors, infos, upper, compute_eigenvectors); } -std::tuple lazy_eig_kernel(const Tensor& self, bool& eigenvectors) { - loadLazyTorchLinalgLibrary(); - return eig_stub(DeviceType::CUDA, self, eigenvectors); -} - void lazy_linalg_eig_kernel(Tensor& eigenvalues, Tensor& eigenvectors, Tensor& infos, const Tensor& input, bool compute_eigenvectors) { getTorchLinalgLibrary(); linalg_eig_stub(DeviceType::CUDA, eigenvalues, eigenvectors, infos, input, compute_eigenvectors); @@ -155,7 +150,6 @@ REGISTER_CUDA_DISPATCH(orgqr_stub, &lazy_orgqr_kernel); REGISTER_CUDA_DISPATCH(ormqr_stub, &lazy_ormqr_kernel); REGISTER_CUDA_DISPATCH(geqrf_stub, &lazy_geqrf_kernel); REGISTER_CUDA_DISPATCH(linalg_eigh_stub, &lazy_linalg_eigh_kernel); -REGISTER_CUDA_DISPATCH(eig_stub, &lazy_eig_kernel); REGISTER_CUDA_DISPATCH(linalg_eig_stub, &lazy_linalg_eig_kernel); REGISTER_CUDA_DISPATCH(svd_stub, &lazy_svd_kernel) REGISTER_CUDA_DISPATCH(lu_solve_stub, &lazy_lu_solve); diff --git a/aten/src/ATen/native/cuda/MaxUnpooling.cu b/aten/src/ATen/native/cuda/MaxUnpooling.cu index 9c24c4ea8edc..ba1a7eb1f5cb 100644 --- a/aten/src/ATen/native/cuda/MaxUnpooling.cu +++ b/aten/src/ATen/native/cuda/MaxUnpooling.cu @@ -118,6 +118,10 @@ Tensor& max_unpooling2d_forward_out_cuda(const Tensor& self_, const Tensor& indices_, IntArrayRef output_size, Tensor& output) { + // See Note [Writing Nondeterministic Operations] + // Nondeterministic with duplicate indices + at::globalContext().alertNotDeterministic("max_unpooling2d_forward_out"); + TORCH_CHECK(output.is_contiguous(), "output must be contiguous"); TORCH_CHECK( indices_.scalar_type() == at::ScalarType::Long, @@ -291,6 +295,10 @@ Tensor& max_unpooling3d_forward_out_cuda(const Tensor& self_, IntArrayRef stride, IntArrayRef padding, Tensor& output) { + // See Note [Writing Nondeterministic Operations] + // Nondeterministic with duplicate indices + at::globalContext().alertNotDeterministic("max_unpooling3d_forward_out"); + TORCH_CHECK(output.is_contiguous(), "output must be contiguous"); max_unpooling3d_shape_check( self_, Tensor(), indices_, output_size, stride, padding, "max_unpooling3d_forward_out_cuda()"); diff --git a/aten/src/ATen/native/cuda/ReflectionPad.cu b/aten/src/ATen/native/cuda/ReflectionPad.cu index 33f71368ca10..5380b0fef5f2 100644 --- a/aten/src/ATen/native/cuda/ReflectionPad.cu +++ b/aten/src/ATen/native/cuda/ReflectionPad.cu @@ -335,7 +335,7 @@ void reflection_pad2d_out_template( int64_t size_y = nplane; int64_t size_z = nbatch; - AT_DISPATCH_FLOATING_AND_COMPLEX_TYPES_AND1(kHalf, + AT_DISPATCH_FLOATING_AND_COMPLEX_TYPES_AND2(kHalf, kBFloat16, input.scalar_type(), "reflection_pad2d_out_template", [&] { for (int64_t block_y = 0; block_y < size_y; block_y += 65535) { @@ -407,7 +407,7 @@ void reflection_pad2d_backward_out_template( int64_t size_y = nplane; int64_t size_z = nbatch; - AT_DISPATCH_FLOATING_AND_COMPLEX_TYPES_AND1(kHalf, + AT_DISPATCH_FLOATING_AND_COMPLEX_TYPES_AND2(kHalf, kBFloat16, input.scalar_type(), "reflection_pad2d_backward_out_template", [&] { for (int64_t block_y = 0; block_y < size_y; block_y += 65535) { @@ -463,8 +463,8 @@ TORCH_IMPL_FUNC(reflection_pad1d_out_cuda) Tensor input = input_.contiguous(); - AT_DISPATCH_FLOATING_AND_COMPLEX_TYPES_AND1( - kHalf, input.scalar_type(), "reflection_pad1d_out_template", [&] { + AT_DISPATCH_FLOATING_AND_COMPLEX_TYPES_AND2( + kHalf, kBFloat16, input.scalar_type(), "reflection_pad1d_out_template", [&] { reflection_pad1d_out_kernel<<< grid_size, block_size, @@ -520,7 +520,7 @@ TORCH_IMPL_FUNC(reflection_pad1d_backward_out_cuda)(const Tensor& grad_output_, dim3 block_size(output_w > 256 ? 256 : output_w); dim3 grid_size((int) ::ceil(output_w / 256.0), nplane, nbatch); - AT_DISPATCH_FLOATING_AND_COMPLEX_TYPES_AND1(kHalf, + AT_DISPATCH_FLOATING_AND_COMPLEX_TYPES_AND2(kHalf, kBFloat16, grad_input.scalar_type(), "reflection_pad1d_backward_out_cuda", [&] { reflection_pad1d_backward_out_kernel<<< grid_size, block_size, 0, at::cuda::getCurrentCUDAStream()>>>( @@ -589,7 +589,7 @@ TORCH_IMPL_FUNC(reflection_pad3d_out_cuda) ( auto input = input_.contiguous(); bool batch_mode = (input.dim() == 5); - AT_DISPATCH_FLOATING_AND_COMPLEX_TYPES_AND1(kHalf, + AT_DISPATCH_FLOATING_AND_COMPLEX_TYPES_AND2(kHalf, kBFloat16, input.scalar_type(), "reflection_pad3d_out_cuda", [&] { auto input_inner = input; auto output_inner = output; @@ -641,7 +641,7 @@ TORCH_IMPL_FUNC(reflection_pad3d_backward_out_cuda) ( int64_t pad_top = padding[2]; int64_t pad_front = padding[4]; - AT_DISPATCH_FLOATING_AND_COMPLEX_TYPES_AND1(kHalf, + AT_DISPATCH_FLOATING_AND_COMPLEX_TYPES_AND2(kHalf, kBFloat16, input.scalar_type(), "reflection_pad3d_backward_out_cuda", [&] { auto grad_input_ = grad_input; auto grad_output_ = grad_output; diff --git a/aten/src/ATen/native/cuda/linalg/BatchLinearAlgebra.cpp b/aten/src/ATen/native/cuda/linalg/BatchLinearAlgebra.cpp index 061e7e86de8b..a7d379ec4620 100644 --- a/aten/src/ATen/native/cuda/linalg/BatchLinearAlgebra.cpp +++ b/aten/src/ATen/native/cuda/linalg/BatchLinearAlgebra.cpp @@ -2036,96 +2036,6 @@ void linalg_eigh_kernel(const Tensor& eigenvalues, const Tensor& eigenvectors, c REGISTER_CUDA_DISPATCH(linalg_eigh_stub, &linalg_eigh_kernel); -// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ eig ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -// magmaEig uses a hybrid CPU-GPU algorithm, which takes and return CPU -// memory. So, we accept a GPU tensor, copy it to CPU memory, and later copy -// the returned values from CPU to GPU. See also magmaSymeig, which uses a -// similar approach. - -template -static void apply_eig(const Tensor& self, bool eigenvectors, Tensor& out_eigvals, Tensor& out_eigvecs, - int* info_ptr) { -#if !AT_MAGMA_ENABLED() -TORCH_CHECK(false, "Calling torch.eig on a CUDA tensor requires compiling PyTorch with MAGMA. " - "Either transfer the tensor to the CPU before calling torch.eig or recompile with MAGMA."); -#else - TORCH_INTERNAL_ASSERT(self.device() == at::kCPU, "Internal error: apply_eig needs a CPU tensor"); - using value_t = typename c10::scalar_value_type::type; - magma_vec_t jobvr = eigenvectors ? MagmaVec : MagmaNoVec; - magma_int_t n = magma_int_cast(self.size(-1), "n"); - auto self_data = self.data_ptr(); - - auto out_eigvals_data = out_eigvals.data_ptr(); - scalar_t *wr = out_eigvals_data; - - scalar_t *vr_data = NULL; - magma_int_t ldvr = 1; - if (jobvr == MagmaVec) - { - vr_data = out_eigvecs.data_ptr(); - ldvr = n; - } - - value_t *rwork_data = nullptr; - if (isComplexType(at::typeMetaToScalarType(self.dtype()))) { - ALLOCATE_ARRAY(rwork_data, value_t, n*2); - } - - if (n > 0) { - // call magmaEig once to get the optimal size of work_data - scalar_t wkopt; - magma_int_t info; - magmaEig(MagmaNoVec, jobvr, n, self_data, n, wr, NULL, 1, vr_data, ldvr, &wkopt, -1, rwork_data, &info); - magma_int_t lwork = static_cast(real_impl(wkopt)); - - // call it a 2nd time to to the actual work - scalar_t *work_data = nullptr; - ALLOCATE_ARRAY(work_data, scalar_t, lwork); - magmaEig(MagmaNoVec, jobvr, n, self_data, n, wr, NULL, 1, vr_data, ldvr, work_data, lwork, rwork_data, &info); - *info_ptr = info; - } -#endif -} - -/* - * Internal helper; like eig_cuda but: - * 1. assume that self is a square matrix of side "n" - * 2. return CPU tensors (because this is what magmaEig returns), which will be copied to GPU memory - * by the caller - */ -std::tuple eig_kernel_impl(const Tensor& self, bool& eigenvectors) { - int64_t n = self.size(-1); - // copy self to pinned CPU memory - auto self_working_copy = at::empty_strided( - {n, n}, // square matrix - {1, n}, // column-ordered, as magmaEig expects - at::TensorOptions(at::kCPU).dtype(self.dtype()).pinned_memory(true)); - self_working_copy.copy_(self); - - // tensors holding the results. We use empty_strided to make them column-ordered - auto options = self.options().device(at::kCPU).memory_format(LEGACY_CONTIGUOUS_MEMORY_FORMAT); - Tensor out_eigvals; - if (isComplexType(at::typeMetaToScalarType(self.dtype()))) { - out_eigvals = at::empty({n}, options); - } else { - out_eigvals = at::empty_strided({n, 2}, {1, n}, options); - } - auto out_eigvecs = eigenvectors - ? at::empty_strided({n, n}, {1, n}, options) - : Tensor(); - - auto infos = at::zeros({}, self_working_copy.options().dtype(kInt)); - AT_DISPATCH_FLOATING_AND_COMPLEX_TYPES(self.scalar_type(), "eig_cuda", [&]{ - apply_eig(self_working_copy, eigenvectors, out_eigvals, out_eigvecs, infos.data_ptr()); - }); - at::_linalg_check_errors(infos, "eig", /*is_matrix*/true); - - return std::tuple(out_eigvals, out_eigvecs); -} - -REGISTER_CUDA_DISPATCH(eig_stub, &eig_kernel_impl); - // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ linalg_eig ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* diff --git a/aten/src/ATen/native/metal/ops/MetalReshape.mm b/aten/src/ATen/native/metal/ops/MetalReshape.mm index 1001b6690ad8..eca282a25bae 100644 --- a/aten/src/ATen/native/metal/ops/MetalReshape.mm +++ b/aten/src/ATen/native/metal/ops/MetalReshape.mm @@ -64,7 +64,7 @@ Tensor view(const Tensor& input, c10::SymIntArrayRef sym_size) { Tensor reshape(const Tensor& input, IntArrayRef shape) { TORCH_CHECK(input.is_metal()); - return view(input, c10::SymIntArrayRef::fromIntArrayRef(shape)); + return view(input, c10::fromIntArrayRef(shape)); } Tensor flatten_using_ints( diff --git a/aten/src/ATen/native/mps/operations/BinaryOps.mm b/aten/src/ATen/native/mps/operations/BinaryOps.mm index b619307ef8aa..4a92048e4c4a 100644 --- a/aten/src/ATen/native/mps/operations/BinaryOps.mm +++ b/aten/src/ATen/native/mps/operations/BinaryOps.mm @@ -72,16 +72,37 @@ void binaryOpTensor(const Tensor& self, const Tensor& other, const Scalar& alpha // this type inference is only required at the time of graph creation const ScalarType common_dtype = c10::promoteTypes(self.scalar_type(), other.scalar_type()); - if (self.scalar_type() != common_dtype) { - primaryCastTensor = castMPSTensor(mpsGraph, newCachedGraph->primaryTensor, common_dtype); + + // Condition - + // 1. Division operation + // 2. Inputs are not float + bool div_condition = op_name.rfind("div", 0) == 0 + && (!(common_dtype == ScalarType::Float || common_dtype == ScalarType::Half)); + + auto compute_type = ScalarType::Float; + + if(div_condition) { + + if(output_.scalar_type() == ScalarType::Float || output_.scalar_type() == ScalarType::Half) + compute_type = output_.scalar_type(); + + primaryCastTensor = castMPSTensor(mpsGraph, newCachedGraph->primaryTensor, compute_type); + secondaryCastTensor = castMPSTensor(mpsGraph, newCachedGraph->secondaryTensor, compute_type); } - if (other.scalar_type() != common_dtype) { - secondaryCastTensor = castMPSTensor(mpsGraph, newCachedGraph->secondaryTensor, common_dtype); + else { + if (self.scalar_type() != common_dtype) { + primaryCastTensor = castMPSTensor(mpsGraph, newCachedGraph->primaryTensor, common_dtype); + } + if (other.scalar_type() != common_dtype) { + secondaryCastTensor = castMPSTensor(mpsGraph, newCachedGraph->secondaryTensor, common_dtype); + } } newCachedGraph->outputTensor = binaryBlock(newCachedGraph, primaryCastTensor, secondaryCastTensor); // Cast output tensor to an expected type if needed, which addresses discrepancy when int64 scalar is added to int32 tensor // Output tensor should have been promoted but it remains an int32 tensor - if (output_.scalar_type() != common_dtype) { + + if ((div_condition && compute_type != output_.scalar_type()) || + output_.scalar_type() != common_dtype) { newCachedGraph->outputTensor = castMPSTensor(mpsGraph, newCachedGraph->outputTensor, output_.scalar_type()); } } @@ -138,7 +159,11 @@ void div_mode_template(const Tensor& self, const Tensor& other, MPSGraphTensor* divTensor = [mpsGraph divisionWithPrimaryTensor:primaryCastTensor secondaryTensor:secondaryCastTensor name:nil]; - if (!rounding_mode.has_value()) { + // Rounding is a no-op for integral types, and also a reasonable workaround + // For MPSGraph bug on Apple Silicon, that throws `Function floorOp_i64 was not found in the library` + // See https://github.com/pytorch/pytorch/issues/84995 + bool isFloatOutput = ([divTensor dataType] & MPSDataTypeFloatBit) != 0; + if (!rounding_mode.has_value() || !isFloatOutput) { return divTensor; } else if (*rounding_mode == "trunc") { return trunc_tensor(mpsGraph, divTensor); diff --git a/aten/src/ATen/native/mps/operations/Copy.mm b/aten/src/ATen/native/mps/operations/Copy.mm index 3c2ab0d6c2f8..b99e87661e30 100644 --- a/aten/src/ATen/native/mps/operations/Copy.mm +++ b/aten/src/ATen/native/mps/operations/Copy.mm @@ -36,7 +36,7 @@ // Copy sourceBuffer into destBuffer, casting sourceBuffer to src.scalar_type(). // The shapes and dtypes are taken from dst and src, but their storage pointers are not used. void copy_cast_mps(at::Tensor& dst, const at::Tensor& src, - id destBuffer, id sourceBuffer) { + id destBuffer, id sourceBuffer, bool non_blocking = true) { using namespace mps; struct CachedGraph : public MPSCachedGraph @@ -84,6 +84,8 @@ void copy_cast_mps(at::Tensor& dst, const at::Tensor& src, NSDictionary* feeds = @{cachedGraph->inputTensor_: srcData}; NSDictionary* results = @{cachedGraph->outputTensor_: dstData}; runMPSGraph(stream, cachedGraph->graph(), feeds, results); + if (!non_blocking) + stream->synchronize(SyncType::COMMIT_AND_WAIT); } } @@ -113,38 +115,52 @@ void copy_cast_mps(at::Tensor& dst, const at::Tensor& src, src = src_; } id sourceBuffer = getMTLBufferStorage(src); - size_t src_total_size = src_.is_view() ? at::detail::computeStorageNbytesContiguous(src.sizes(), src.element_size(), src.storage_offset()) : - src.nbytes(); - size_t size_to_copy = src.nbytes(); - - // In case of dtype change, first convert src inplace - if (src_.dtype() != dst_.dtype()) { - copy_cast_mps(dst, src, sourceBuffer, sourceBuffer); - // Use the element size of dst to calculate the total size after casting - size_to_copy = (size_to_copy / src.element_size()) * dst.element_size(); - } - - // If there's anything wrong with source, we shouldn't return dst_ silently and must error out. - TORCH_INTERNAL_ASSERT(sourceBuffer && size_to_copy > 0); - TORCH_INTERNAL_ASSERT(src_total_size >= storage_byte_offset); - TORCH_INTERNAL_ASSERT(dst.nbytes() >= (dst.storage_offset() * dst.element_size())); + size_t dst_tensor_nbytes = dst.nbytes(); @autoreleasepool { MTLResourceOptions options = MTLResourceOptionCPUCacheModeDefault | MTLResourceStorageModeShared; NSUInteger alignedLength = 0; void* host_dst = dst.storage().data(); - void* alignedPtr = pageAlignedBlockPtr(host_dst, (NSUInteger)src_total_size, &alignedLength); + void* alignedPtr = pageAlignedBlockPtr(host_dst, (NSUInteger)dst_tensor_nbytes, &alignedLength); + NSUInteger destOffset = (uintptr_t(host_dst) - uintptr_t(alignedPtr)); + // 4 bytes alignment required on macos for blits. + TORCH_INTERNAL_ASSERT(destOffset % 4 == 0, "Unaligned blit request"); + id destBuffer = [device newBufferWithBytesNoCopy:alignedPtr length:alignedLength options:options deallocator:nil]; - NSUInteger destOffset = uintptr_t(host_dst) - uintptr_t(alignedPtr); - // 4 bytes alignment required on macos for blits. - TORCH_INTERNAL_ASSERT(destOffset % 4 == 0, "Unaligned blit request"); + id tmpBuffer = sourceBuffer; + Tensor tmp; + bool needsBlit = true; + if (src_.dtype() != dst.dtype()) { + if (destOffset == 0 && storage_byte_offset == 0) { + // Return the casted tensor directly if there's no destination offset + needsBlit = false; + tmpBuffer = destBuffer; + } else if (src.element_size() < dst.element_size()) { + tmp = at::native::empty_mps(dst.sizes(), dst.scalar_type(), c10::nullopt, kMPS); + tmpBuffer = getMTLBufferStorage(tmp); + } + } + + size_t size_to_copy = src.nbytes(); + // In case of dtype change, first convert src inplace + if (src_.dtype() != dst.dtype()) { + copy_cast_mps(dst, src, tmpBuffer, sourceBuffer, non_blocking); + } + + if (needsBlit) { + size_to_copy = (size_to_copy / src.element_size()) * dst.element_size(); - stream->copy_and_sync(sourceBuffer, destBuffer, size_to_copy, storage_byte_offset, destOffset, non_blocking); - [destBuffer release]; + // If there's anything wrong with source, we shouldn't return dst_ silently and must error out. + TORCH_INTERNAL_ASSERT(sourceBuffer && dst_tensor_nbytes > 0); + TORCH_INTERNAL_ASSERT(dst_tensor_nbytes >= (dst.storage_offset() * dst.element_size())); + + stream->copy_and_sync(tmpBuffer, destBuffer, size_to_copy, storage_byte_offset, destOffset, non_blocking); + [destBuffer release]; + } } if (!dst.is_same(dst_)) { dst_.copy_(dst, non_blocking); @@ -235,17 +251,29 @@ void copy_blit_mps(void* dst, const void* src, size_t size) { } else { src = src_; } + id destBuffer = getMTLBufferStorage(dst_); + id sourceBuffer = getMTLBufferStorage(src); + // Scatter to `dst` if the memory is not contiguous // If the memory is not contiguous, it means that the tensor has strides and we would not be // able to do the copy using a single blit if (!dst_.is_contiguous()) { - return scatterViewTensor(src, dst_); + Tensor tmp; + if (src.dtype() != dst_.dtype()) { + id tmpBuffer = sourceBuffer; + if (src.element_size() < dst_.element_size()) { + tmp = at::native::empty_mps(dst_.sizes(), dst_.scalar_type(), c10::nullopt, kMPS); + tmpBuffer = getMTLBufferStorage(tmp); + } + + copy_cast_mps(dst_, src, tmpBuffer, sourceBuffer); + } + + return scatterViewTensor((src.dtype() != dst_.dtype() && tmp.has_storage()) ? tmp : src, dst_); } src._set_conj(src_.is_conj()); src._set_neg(src_.is_neg()); - id destBuffer = getMTLBufferStorage(dst_); - id sourceBuffer = getMTLBufferStorage(src); const size_t src_size = src.nbytes(); if (src.dtype() == dst_.dtype()) { MPSStream* stream = getCurrentMPSStream(); diff --git a/aten/src/ATen/native/mps/operations/Normalization.mm b/aten/src/ATen/native/mps/operations/Normalization.mm index 2e026b9acb46..5384ee666fea 100644 --- a/aten/src/ATen/native/mps/operations/Normalization.mm +++ b/aten/src/ATen/native/mps/operations/Normalization.mm @@ -823,7 +823,7 @@ string get_mem_string(c10::MemoryFormat memory_format) { const int normalized_ndim = normalized_shape.size(); // NOLINTNEXTLINE(bugprone-narrowing-conversions,cppcoreguidelines-narrowing-conversions) const int axis = input_ndim - normalized_ndim; - at::Tensor input_reshaped = input.view({1, M, -1}); + at::Tensor input_reshaped = input.reshape({1, M, -1}); // Unlike Batch Normalization, which applies scalar scale and bias for each // entire channel/plane with the affine option, Layer Normalization applies // per-element scale and bias. E.g. For input {N, C, H, W}, weight for diff --git a/aten/src/ATen/native/mps/operations/UnaryOps.mm b/aten/src/ATen/native/mps/operations/UnaryOps.mm index 2231a66fb3ac..97f3d18626ef 100644 --- a/aten/src/ATen/native/mps/operations/UnaryOps.mm +++ b/aten/src/ATen/native/mps/operations/UnaryOps.mm @@ -61,6 +61,14 @@ void unary_op(const Tensor& self, const Tensor& output, std::string op_name, Una MPSGraphTensor* trunc_tensor(MPSGraph* mpsGraph, MPSGraphTensor* inputTensor) { + // Rounding is a no-op for integral types, and also a reasonable workaround + // For MPSGraph bug on Apple Silicon, that throws `Function floorOp_i64 was not found in the library` + // See https://github.com/pytorch/pytorch/issues/84995 + bool isFloatInput = ([inputTensor dataType] & MPSDataTypeFloatBit) != 0; + if (!isFloatInput) { + return inputTensor; + } + MPSGraphTensor* zeroTensor = [mpsGraph constantWithScalar:0.0 dataType:inputTensor.dataType]; MPSGraphTensor* predicateTensor = [mpsGraph lessThanWithPrimaryTensor:inputTensor diff --git a/aten/src/ATen/native/native_functions.yaml b/aten/src/ATen/native/native_functions.yaml index dfb3bddc5238..f706bbd995c7 100644 --- a/aten/src/ATen/native/native_functions.yaml +++ b/aten/src/ATen/native/native_functions.yaml @@ -1209,6 +1209,15 @@ - func: concat.names_out(Tensor[] tensors, Dimname dim, *, Tensor(a!) out) -> Tensor(a!) +# alias for torch.cat +- func: concatenate(Tensor[] tensors, int dim=0) -> Tensor + +- func: concatenate.out(Tensor[] tensors, int dim=0, *, Tensor(a!) out) -> Tensor(a!) + +- func: concatenate.names(Tensor[] tensors, Dimname dim) -> Tensor + +- func: concatenate.names_out(Tensor[] tensors, Dimname dim, *, Tensor(a!) out) -> Tensor(a!) + - func: block_diag(Tensor[] tensors) -> Tensor variants: function dispatch: @@ -2054,7 +2063,7 @@ CPU: empty_cpu CUDA: empty_cuda MPS: empty_mps - Meta: empty_meta + Meta: empty_meta_symint MkldnnCPU: empty_mkldnn SparseCPU, SparseCUDA, SparseMeta: empty_sparse SparseCsrCPU, SparseCsrCUDA: empty_sparse_compressed @@ -2065,13 +2074,13 @@ - func: new_empty(Tensor self, SymInt[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor variants: method dispatch: - CompositeExplicitAutograd: new_empty + CompositeExplicitAutograd: new_empty_symint autogen: new_empty.out -- func: new_empty_strided(Tensor self, int[] size, int[] stride, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor +- func: new_empty_strided(Tensor self, SymInt[] size, SymInt[] stride, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor variants: method dispatch: - CompositeExplicitAutogradNonFunctional: new_empty_strided + CompositeExplicitAutogradNonFunctional: new_empty_strided_symint autogen: new_empty_strided.out - func: new_full(Tensor self, int[] size, Scalar fill_value, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor @@ -2159,12 +2168,12 @@ SparseCsrCPU, SparseCsrCUDA: empty_like_sparse_csr autogen: empty_like.out -- func: empty_strided(int[] size, int[] stride, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor +- func: empty_strided(SymInt[] size, SymInt[] stride, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor dispatch: CPU: empty_strided_cpu CUDA: empty_strided_cuda MPS: empty_strided_mps - Meta: empty_strided_meta + Meta: empty_strided_meta_symint QuantizedCPU, QuantizedCUDA: empty_strided_unknown_quantized autogen: empty_strided.out @@ -5287,12 +5296,24 @@ CUDA: nested_from_padded_cuda autogen: _nested_from_padded.out +# These private functions are temporary. They will be updated/deleted when nested tensors switch to using SymInts for their metadata representation - func: _nested_tensor_size(Tensor self) -> Tensor variants: method dispatch: NestedTensorCPU, NestedTensorCUDA: _nested_tensor_size autogen: _nested_tensor_size.out +- func: _nested_tensor_strides(Tensor self) -> Tensor + variants: method + dispatch: + NestedTensorCPU, NestedTensorCUDA: _nested_tensor_strides + autogen: _nested_tensor_strides.out + +- func: _nested_tensor_offsets(Tensor self) -> int[] + variants: method + dispatch: + NestedTensorCPU, NestedTensorCUDA: _nested_tensor_offsets + # _nested_from_padded is not usable from Python, so # _nested_from_padded_and_nested_example is available for testing. - func: _nested_from_padded_and_nested_example(Tensor padded, Tensor nt_example) -> Tensor @@ -5300,6 +5321,22 @@ NestedTensorCPU, NestedTensorCUDA: NestedTensor_from_padded_and_nested_example autogen: _nested_from_padded_and_nested_example.out +# The input arguments' types to this functions are temporary. When nested tensors switch to using SymInts for their metadata representation +# this will need to be updated +- func: _nested_view_from_buffer(Tensor(a) self, Tensor nested_size, Tensor nested_strides, int[] offsets) -> Tensor(a) + variants: function + device_check: NoCheck + dispatch: + CPU, CUDA: _nested_view_from_buffer + +- func: _nested_view_from_buffer_copy(Tensor self, Tensor nested_size, Tensor nested_strides, int[] offsets) -> Tensor + variants: function + device_check: NoCheck + tags: view_copy + dispatch: + CompositeExplicitAutogradNonFunctional: _nested_view_from_buffer_copy + autogen: _nested_view_from_buffer_copy.out + - func: _trilinear(Tensor i1, Tensor i2, Tensor i3, int[] expand1, int[] expand2, int[] expand3, int[] sumdim, int unroll_dim=1) -> Tensor dispatch: # calls unsqueeze @@ -5548,7 +5585,7 @@ - func: zeros(SymInt[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor dispatch: - CompositeExplicitAutograd: zeros + CompositeExplicitAutograd: zeros_symint - func: zeros.out(SymInt[] size, *, Tensor(a!) out) -> Tensor(a!) dispatch: @@ -6271,6 +6308,7 @@ dispatch: SparseCPU, SparseCUDA, SparseMeta: values_sparse SparseCsrCPU, SparseCsrCUDA: values_sparse_csr + NestedTensorCPU, NestedTensorCUDA: values_nested device_check: NoCheck device_guard: False @@ -6319,11 +6357,12 @@ SparseCPU, SparseCUDA: copy_sparse_ autogen: copy_sparse_to_sparse, copy_sparse_to_sparse.out +# By adding the AutogradNestedTensor this makes this function CompositeImplicit-like for nested tensors - func: unbind.int(Tensor(a -> *) self, int dim=0) -> Tensor(a)[] variants: function, method dispatch: CompositeExplicitAutograd: unbind - NestedTensorCPU, NestedTensorCUDA: NestedTensor_unbind + CompositeImplicitAutogradNestedTensor: NestedTensor_unbind - func: unbind.Dimname(Tensor(a -> *) self, Dimname dim) -> Tensor(a)[] variants: function, method @@ -6889,8 +6928,7 @@ device_check: NoCheck device_guard: False dispatch: - Meta: view_meta - ZeroTensor, CPU, CUDA, QuantizedCPU, QuantizedCUDA, MPS: view + ZeroTensor, Meta, CPU, CUDA, QuantizedCPU, QuantizedCUDA, MPS: view MkldnnCPU: mkldnn_view NestedTensorCPU, NestedTensorCUDA: view_nested @@ -8114,15 +8152,6 @@ CUDA: _symeig_helper_cuda autogen: _symeig_helper.out -- func: eig.e(Tensor self, bool eigenvectors=False, *, Tensor(a!) e, Tensor(b!) v) -> (Tensor(a!) eigenvalues, Tensor(b!) eigenvectors) - dispatch: - CompositeExplicitAutograd: eig_out - -- func: eig(Tensor self, bool eigenvectors=False) -> (Tensor eigenvalues, Tensor eigenvectors) - variants: method, function - dispatch: - CompositeExplicitAutograd: eig - - func: svd.U(Tensor self, bool some=True, bool compute_uv=True, *, Tensor(a!) U, Tensor(b!) S, Tensor(c!) V) -> (Tensor(a!) U, Tensor(b!) S, Tensor(c!) V) - func: svd(Tensor self, bool some=True, bool compute_uv=True) -> (Tensor U, Tensor S, Tensor V) @@ -12568,6 +12597,21 @@ - func: linalg_multi_dot.out(Tensor[] tensors, *, Tensor(a!) out) -> Tensor(a!) python_module: linalg +## Functions related to the `torch.nested` namespace +# Note [nested namespace binding] +# Functions in the nested python module should have their names start with +# "nested_" underscore and be bound to the desired Python name in +# torch/nested/__init__.py, and the desired C++ name in torch/csrc/api/include/torch/nested.h. +# The "nested_" names should be hidden from the user and not documented. + +- func: nested_to_padded_tensor(Tensor self, float padding, int[]? output_size=None) -> Tensor + python_module: nested + variants: function + dispatch: + NestedTensorCPU: NestedTensor_to_padded_tensor_generic + NestedTensorCUDA: NestedTensor_to_padded_tensor_cuda + autogen: nested_to_padded_tensor.out + ## Functions that are only for testing # It is undocumented and should not be used outside of tests. - func: _test_serialization_subcmul(Tensor self, Tensor other, Scalar alpha=1) -> Tensor @@ -12938,7 +12982,7 @@ - func: expand_copy.out(Tensor self, SymInt[] size, *, bool implicit=False, Tensor(a!) out) -> Tensor(a!) variants: function dispatch: - CompositeExplicitAutograd: expand_copy_out + CompositeExplicitAutograd: expand_copy_out_symint - func: permute_copy.out(Tensor self, int[] dims, *, Tensor(a!) out) -> Tensor(a!) @@ -13058,7 +13102,7 @@ - func: view_copy.out(Tensor self, SymInt[] size, *, Tensor(a!) out) -> Tensor(a!) variants: function dispatch: - CompositeExplicitAutograd: view_copy_out + CompositeExplicitAutograd: view_copy_out_symint - func: view_copy.dtype_out(Tensor self, ScalarType dtype, *, Tensor(a!) out) -> Tensor(a!) @@ -13078,13 +13122,6 @@ dispatch: CompositeExplicitAutograd: alias_copy_out -- func: to_padded_tensor(Tensor self, float padding, int[]? output_size=None) -> Tensor - variants: method - dispatch: - NestedTensorCPU: NestedTensor_to_padded_tensor_generic - NestedTensorCUDA: NestedTensor_to_padded_tensor_cuda - autogen: to_padded_tensor.out - - func: _nested_tensor_softmax_with_shape(Tensor self, Tensor query) -> Tensor dispatch: NestedTensorCPU: NestedTensor_softmax_dropout @@ -13137,6 +13174,11 @@ structured: True variants: function +- func: _flash_scaled_dot_product_attention(Tensor query, Tensor key, Tensor value, Tensor cum_seq_q, Tensor cum_seq_k, int max_q, int max_k, float dropout_p, bool causal) -> Tensor + variants: function + dispatch: + CUDA: flash_scaled_dot_product_attention + - func: _transformer_decoder_only_layer_fwd(Tensor src, int embed_dim, int num_heads, Tensor qkv_weight, Tensor qkv_bias, Tensor proj_weight, Tensor proj_bias, bool use_gelu, bool norm_first, float eps, Tensor norm_weight_1, Tensor norm_bias_1, Tensor norm_weight_2, Tensor norm_bias_2, Tensor ffn_weight_1, Tensor ffn_bias_1, Tensor ffn_weight_2, Tensor ffn_bias_2, Tensor? mask=None, Tensor? incr_key=None, Tensor? incr_value=None) -> (Tensor, Tensor, Tensor) variants: function dispatch: diff --git a/aten/src/ATen/native/nested/NestedTensorMath.cpp b/aten/src/ATen/native/nested/NestedTensorMath.cpp index 97a09d9bc2a2..073cad74188c 100644 --- a/aten/src/ATen/native/nested/NestedTensorMath.cpp +++ b/aten/src/ATen/native/nested/NestedTensorMath.cpp @@ -74,7 +74,8 @@ std::vector NestedTensor_unbind( if (ntensors == 0) { return result_tensors; } - const at::Tensor& buffer = self_ptr->get_buffer(); + // This returns a differentiable view of self as a regular tensor + auto buffer = self.values(); std::vector sizes = NestedTensor_get_sizes(self_ptr), strides = NestedTensor_get_strides(self_ptr); const std::vector& offsets = self_ptr->get_offsets(); @@ -1138,6 +1139,60 @@ Tensor view_nested(const Tensor& self, IntArrayRef proposed_shape) { "Use .reshape(...) instead."); return create_nested_view_tensor(self, sizemat_reshaped, stridemat_reshaped, std::vector(self_ptr->get_offsets())); } + /** + * Create a buffer tensor that is a view of self + * + * This serves as the boundary between nested and non nested tensor + * view conversions + * + * @return Returns a new non nested tensor that + * aliases the same storage as self + */ +Tensor values_nested(const Tensor& self) { + TORCH_INTERNAL_ASSERT(self.is_nested(), "Can only create a buffer from Nested Tensor"); + auto* nt_self = get_nested_tensor_impl(self); + return nt_self->get_buffer(); +} + +/** + * Create a nested tensor that is a view of a buffer + * + * This serves as the boundary between non nested tensor and nested + * view conversions + * + * @return Returns a nested tensor that + * aliases the same storage as buffer + */ +Tensor _nested_view_from_buffer( + const Tensor& buffer, + const Tensor& nested_size_tensor, + const Tensor& nested_stride_tensor, + IntArrayRef offsets) { + TORCH_INTERNAL_ASSERT( + !buffer.is_nested(), + "Can only a create Nested Tensor from a normal tensor buffer"); + TORCH_INTERNAL_ASSERT(buffer.dim() == 1, "The input buffer must be flat"); + TORCH_INTERNAL_ASSERT(nested_size_tensor.dim() == 2, "Expected the nested size tensor to be two dimensional."); + uint64_t num_elements_nested_size = at::prod(nested_size_tensor, 1).sum().item(); + uint64_t buffer_storage_size = buffer.storage().nbytes()/buffer.dtype().itemsize(); + TORCH_INTERNAL_ASSERT( + buffer_storage_size == num_elements_nested_size, + "The number of elements in the buffer must equal the nested tensor size but buffer size: ", + buffer_storage_size, + " and nested tensor size: ", + num_elements_nested_size, + "."); + + TORCH_INTERNAL_ASSERT(nested_stride_tensor.dim() == 2, "Expected the nested stride tensor to be two dimensional."); + TORCH_INTERNAL_ASSERT(nested_size_tensor.size(0) == nested_stride_tensor.size(0), "Expected the first dimension of nested size and nested stride tensor to be equal."); + TORCH_INTERNAL_ASSERT(nested_stride_tensor.size(0) == (int64_t)offsets.size(), "Expected the first dimension of nested stride tensor to equal the length of offsets."); + return at::detail::make_tensor( + c10::TensorImpl::VIEW, + buffer, + nested_size_tensor, + nested_stride_tensor, + std::vector(offsets.begin(), offsets.end())); +} // See Note [Special size rule for nested tensor] Tensor reshape_nested(const Tensor& self, IntArrayRef proposed_shape) { @@ -1151,7 +1206,7 @@ Tensor reshape_nested(const Tensor& self, IntArrayRef proposed_shape) { ntensors > 0, "empty nested tensor cannot be reshaped"); // basic information after reshaping - int64_t ntensors_reshaped; + int64_t ntensors_reshaped{0}; if (proposed_shape[0] >= 0) { ntensors_reshaped = proposed_shape[0]; } @@ -1169,7 +1224,7 @@ Tensor reshape_nested(const Tensor& self, IntArrayRef proposed_shape) { // reshaping underlying tensor dimensions does not change offset // determine reshaped size and stride const Tensor& sizemat = self_ptr->get_nested_size_tensor(); - bool viewable; + bool viewable{false}; Tensor sizemat_reshaped, stridemat_reshaped; std::tie(viewable, sizemat_reshaped, stridemat_reshaped) = NestedTensor_compute_size_stride( sizes, strides, proposed_shape, sizemat.options()); diff --git a/aten/src/ATen/native/nested/NestedTensorTransformerFunctions.cpp b/aten/src/ATen/native/nested/NestedTensorTransformerFunctions.cpp index 231eca94f072..35a1c83e2360 100644 --- a/aten/src/ATen/native/nested/NestedTensorTransformerFunctions.cpp +++ b/aten/src/ATen/native/nested/NestedTensorTransformerFunctions.cpp @@ -7,6 +7,7 @@ #include #include +#include namespace at { namespace native { @@ -243,5 +244,204 @@ Tensor NestedTensor_to_mask(const Tensor& nt, c10::optional mask_dim, c } return result; } +std::tuple cumulative_and_max_seq_len(Tensor qkv) { + TORCH_CHECK( + qkv.is_nested(), + "QKV must be nested for flash cumulative_seq_len calculation.") + auto* nt_impl = get_nested_tensor_impl(qkv); + const auto& sizes = nt_impl->get_nested_size_tensor(); + auto size_tensor_stride = sizes.stride(0); + + const int64_t batch_size = qkv.size(0); + auto cumulative_seqlen = at::zeros( + {batch_size + 1}, TensorOptions().device(at::kCPU).dtype(at::kInt)); + + auto* sizes_ptr = sizes.data_ptr(); + auto* cumulative_seqlen_ptr = cumulative_seqlen.data_ptr(); + + int32_t sum = 0; + int64_t max_seqlen = -1; + cumulative_seqlen_ptr[0] = sum; + for (const auto i : c10::irange(batch_size)) { + // Calculate the cumulative sum of the sequence lengths + auto current_seq_len = sizes_ptr[i * size_tensor_stride]; + sum += current_seq_len; + cumulative_seqlen_ptr[i + 1] = sum; + + // Find the max element while we traverse + max_seqlen = std::max(max_seqlen, current_seq_len); + } + // Send to GPU, this is pretty light weight calc for normal batch size + // but maybe this needs to be on gpu + cumulative_seqlen = cumulative_seqlen.to(TensorOptions().device(at::kCUDA)); + return std::tuple{cumulative_seqlen, max_seqlen}; +} + +Tensor flash_attention_helper( + const Tensor& query, + const Tensor& key, + const Tensor& value, + double dropout_p, + bool causal) { + // Query is of size (batch_size x ragged_seq_len x (3 or 1) x n_heads x + // head_did + int64_t head_dim{query.size(-1)}; + int64_t num_heads{query.size(-2)}; + + auto cumulative_and_max_q = cumulative_and_max_seq_len(query); + Tensor cumulative_sequence_length_q = std::get<0>(cumulative_and_max_q); + int64_t max_seqlen_batch_q = std::get<1>(cumulative_and_max_q); + + if (key.is_same(value) || query.is_same(key) || query.is_same(value)) { + int64_t Nnz_q{cumulative_sequence_length_q[-1].item()}; + + // For the packed case we need to set the output size for dim 2 to 1 + auto atten_size = get_nested_size_tensor(query).clone(); + atten_size.index({at::indexing::Slice(), 1}) = 1; + + auto qkv_buffer_reshaped = + get_buffer(query).view({Nnz_q, 3, num_heads, head_dim}).transpose(0, 1).contiguous(); + + auto i0 = qkv_buffer_reshaped[0]; + auto i1 = qkv_buffer_reshaped[1]; + auto i2 = qkv_buffer_reshaped[2]; + + TORCH_CHECK(i0.is_contiguous()); + TORCH_CHECK(i1.is_contiguous()); + TORCH_CHECK(i2.is_contiguous()); + + // If we are passing in query, key, value all the same tensors then we have + // packed them into one tensor and need to slice for flash attention + Tensor atten_buffer = at::_flash_scaled_dot_product_attention( + i0, + i1, + i2, + cumulative_sequence_length_q, + cumulative_sequence_length_q, + max_seqlen_batch_q, + max_seqlen_batch_q, + dropout_p, + causal); + // Output of flash_attention is a regular tensor lets wrap it back up to + // form a nested tensor + return wrap_buffer(atten_buffer.view(-1), atten_size); + } + + // Query, Key, and Value are not all the same tensor and therefore need to + // calculate K meta data + + // The nested tensors will be of shape {Batch_size x ragged_seq_len x + // num_heads * head_dim } + auto cumulative_and_max_k = cumulative_and_max_seq_len(key); + Tensor cumulative_sequence_length_k = std::get<0>(cumulative_and_max_k); + int64_t max_seqlen_batch_k = std::get<1>(cumulative_and_max_k); + + // K and V have to have the same Nnz, should probably torch_check before now + // assume in order to not iterate over v + int64_t Nnz_q{cumulative_sequence_length_q[-1].item()}; + int64_t Nnz_kv{cumulative_sequence_length_k[-1].item()}; + + auto query_buffer_reshaped = + get_buffer(query).view({Nnz_q, num_heads, head_dim}); + auto key_buffer_reshaped = + get_buffer(key).view({Nnz_kv, num_heads, head_dim}); + auto value_buffer_reshaped = + get_buffer(value).view({Nnz_kv, num_heads, head_dim}); + + Tensor atten_buffer = at::_flash_scaled_dot_product_attention( + query_buffer_reshaped, + key_buffer_reshaped, + value_buffer_reshaped, + cumulative_sequence_length_q, + cumulative_sequence_length_k, + max_seqlen_batch_q, + max_seqlen_batch_k, + dropout_p, + causal); + // Output of flash_attention is a regular tensor lets wrap it back up to + // form a nested tensor, the size of which should match the query tensor + return wrap_buffer(atten_buffer.view(-1), get_nested_size_tensor(query)); +} + +Tensor flash_attention_helper_dense( + const Tensor& query, + const Tensor& key, + const Tensor& value, + double dropout_p, + bool causal) { + TORCH_INTERNAL_ASSERT( + !query.is_nested() && !key.is_nested() && !value.is_nested()); + // Query is of size (batch_size x dense_seq_len x 3 x n_heads + // head_dim) + const auto batch_size = query.size(0); + auto max_seqlen_batch_q = query.size(1); + int64_t head_dim{query.size(-1)}; + int64_t num_heads{query.size(-2)}; + + auto cumulative_sequence_length_q = at::arange( + 0, + (batch_size + 1) * max_seqlen_batch_q, + max_seqlen_batch_q, + TensorOptions().device(at::kCUDA).dtype(at::kInt)); + int64_t Nnz_q{batch_size * max_seqlen_batch_q}; + + if (key.is_same(value) || query.is_same(key) || query.is_same(value)) { + // In the dense case flash attention expects an input that is + // (b*s) x num_heads x head_dim + auto query_reshaped = query.reshape({Nnz_q, 3, num_heads, head_dim}); + // If we are passing in query, key, value all the same tensors than we have + // packed them into one tensor and need to slice for flash attention + + Tensor atten_buffer = at::_flash_scaled_dot_product_attention( + query_reshaped.index({at::indexing::Slice(), 0}), + query_reshaped.index({at::indexing::Slice(), 1}), + query_reshaped.index({at::indexing::Slice(), 2}), + cumulative_sequence_length_q, + cumulative_sequence_length_q, + max_seqlen_batch_q, + max_seqlen_batch_q, + dropout_p, + causal); + // Reshape output to convert nnz to batch_size and seq_len + return atten_buffer.reshape( + {batch_size, max_seqlen_batch_q, num_heads, head_dim}); + } + + // Query, Key, and Value are not all the same tensor and therefore need to + // calculate K meta data + auto max_seqlen_batch_k = key.size(1); + auto cumulative_sequence_length_k = at::arange( + 0, + (batch_size + 1) * max_seqlen_batch_k, + max_seqlen_batch_k, + TensorOptions().device(at::kCUDA).dtype(at::kInt)); + + // K and V have to have the same Nnz, should probably torch_check before + // assume for now in order to not iterate over v + int64_t Nnz_kv{batch_size * max_seqlen_batch_k}; + + // Calculate head dim + TORCH_INTERNAL_ASSERT(query.size(-1) == key.size(-1)); + TORCH_INTERNAL_ASSERT(query.size(-1) == value.size(-1)); + + auto query_reshaped = query.reshape({Nnz_q, num_heads, head_dim}); + auto key_reshaped = key.reshape({Nnz_kv, num_heads, head_dim}); + auto value_reshaped = value.reshape({Nnz_kv, num_heads, head_dim}); + + Tensor atten_buffer = at::_flash_scaled_dot_product_attention( + query_reshaped, + key_reshaped, + value_reshaped, + cumulative_sequence_length_q, + cumulative_sequence_length_k, + max_seqlen_batch_q, + max_seqlen_batch_k, + dropout_p, + causal); + // Reshape output to convert nnz to batch_size and seq_len + return atten_buffer.reshape( + {batch_size, max_seqlen_batch_q, num_heads, head_dim}); +} + } // namespace native } // namespace at diff --git a/aten/src/ATen/native/nested/NestedTensorTransformerFunctions.h b/aten/src/ATen/native/nested/NestedTensorTransformerFunctions.h index 77eb0145d684..09b35d9c39e9 100644 --- a/aten/src/ATen/native/nested/NestedTensorTransformerFunctions.h +++ b/aten/src/ATen/native/nested/NestedTensorTransformerFunctions.h @@ -83,5 +83,19 @@ void add_padding_kernelLauncher( const std::vector& output_sizes, const int batch_size, const int output_batch_size); + +Tensor flash_attention_helper_dense( + const Tensor& query, + const Tensor& key, + const Tensor& value, + double dropout_p, + bool causal); + +Tensor flash_attention_helper( + const Tensor& query, + const Tensor& key, + const Tensor& value, + double dropout_p, + bool causal); } // namespace native } // namespace at diff --git a/aten/src/ATen/native/nested/NestedTensorUtils.cpp b/aten/src/ATen/native/nested/NestedTensorUtils.cpp index 74fd9773c366..0d560849af06 100644 --- a/aten/src/ATen/native/nested/NestedTensorUtils.cpp +++ b/aten/src/ATen/native/nested/NestedTensorUtils.cpp @@ -14,6 +14,13 @@ at::Tensor _nested_tensor_size(const at::Tensor& self) { return get_nested_size_tensor(self); } +at::Tensor _nested_tensor_strides(const at::Tensor& self){ + return get_nested_tensor_impl(self) -> get_nested_stride_tensor(); +} +std::vector _nested_tensor_offsets(const at::Tensor& self){ + return get_nested_tensor_impl(self) -> get_offsets(); +} + // Helper functions for getting information about a nested tensor's shape. std::vector NestedTensor_get_max_size_from_size_tensor( const Tensor& sizes) { diff --git a/aten/src/ATen/native/nested/cuda/NestedTensorTransformerFunctions.cpp b/aten/src/ATen/native/nested/cuda/NestedTensorTransformerFunctions.cpp index 54d2b7ffd0c5..1cfeac9a5e12 100644 --- a/aten/src/ATen/native/nested/cuda/NestedTensorTransformerFunctions.cpp +++ b/aten/src/ATen/native/nested/cuda/NestedTensorTransformerFunctions.cpp @@ -1,4 +1,5 @@ #include +#include #include #include @@ -9,10 +10,19 @@ #include #endif +// TODO Consider moving all flash_attention code, nested tensor included to +// Transformer library + +#ifdef USE_FLASH_ATTENTION +#include +#endif + #include #include #include +#include + namespace at { namespace native { namespace { @@ -207,5 +217,37 @@ Tensor NestedTensor_to_padded_tensor_cuda( return NestedTensor_to_padded_tensor_generic(t, padding, output_size); } +Tensor flash_scaled_dot_product_attention( + const Tensor& query, + const Tensor& key, + const Tensor& value, + const Tensor& cumulative_sequence_length_q, + const Tensor& cumulative_sequence_length_k, + const int64_t max_seqlen_batch_q, + const int64_t max_seqlen_batch_k, + double dropout_p, + bool causal) { +#if defined(USE_FLASH_ATTENTION) + auto softmax_scale = std::pow(query.size(-1), -0.5); + std::vector output = fmha::mha_fwd( + query, + key, + value, + cumulative_sequence_length_q, + cumulative_sequence_length_k, + max_seqlen_batch_q, + max_seqlen_batch_k, + dropout_p, + softmax_scale, + false, + causal, + false, + c10::nullopt); + return output[0]; +#endif + TORCH_CHECK(false, "USE_FLASH_ATTENTION was not enabled for build.") + return Tensor{}; +} + } // namespace native } // namespace at diff --git a/aten/src/ATen/native/quantized/AffineQuantizerBase.cpp b/aten/src/ATen/native/quantized/AffineQuantizerBase.cpp index e40f8ef1fdb0..5d02d9e04ed7 100644 --- a/aten/src/ATen/native/quantized/AffineQuantizerBase.cpp +++ b/aten/src/ATen/native/quantized/AffineQuantizerBase.cpp @@ -71,6 +71,33 @@ void quantize_vec( (float)scale, (int32_t)zero_point, precision}); } +#if defined(__ARM_NEON__) || defined(__aarch64__) +// For use when compiling FBGEMM on aarch64 but still supporting x86 +// intrinsics via simde +template +T quantize_val_arm( + const float scale, + const int32_t zero_point, + const float value) { + constexpr int32_t qmin = std::numeric_limits::min(); + constexpr int32_t qmax = std::numeric_limits::max(); + float inv_scale = 1.0f / scale; + auto r = zero_point + static_cast(std::nearbyint(value * inv_scale)); + r = std::max(r, qmin); + r = std::min(r, qmax); + return static_cast(r); +} + +template uint8_t quantize_val_arm( + const float scale, + const int32_t zero_point, + const float value); +template int8_t quantize_val_arm( + const float scale, + const int32_t zero_point, + const float value); +#endif + template inline float dequantize_val(double scale, int64_t zero_point, T value) { // NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init) diff --git a/aten/src/ATen/native/quantized/cpu/OnednnUtils.h b/aten/src/ATen/native/quantized/cpu/OnednnUtils.h index 6ad70356b3e0..4ae88871e9f5 100644 --- a/aten/src/ATen/native/quantized/cpu/OnednnUtils.h +++ b/aten/src/ATen/native/quantized/cpu/OnednnUtils.h @@ -4,8 +4,7 @@ #if AT_MKLDNN_ENABLED() #include #include -#include -#include +#include struct PackedLinearWeightsOnednn : public LinearPackedParamsBase { PackedLinearWeightsOnednn( diff --git a/aten/src/ATen/native/quantized/cpu/qnnpack/cmake/DownloadGoogleTest.cmake b/aten/src/ATen/native/quantized/cpu/qnnpack/cmake/DownloadGoogleTest.cmake index 4a86d641e412..66b2232b5925 100644 --- a/aten/src/ATen/native/quantized/cpu/qnnpack/cmake/DownloadGoogleTest.cmake +++ b/aten/src/ATen/native/quantized/cpu/qnnpack/cmake/DownloadGoogleTest.cmake @@ -11,7 +11,7 @@ project(googletest-download NONE) include(ExternalProject) ExternalProject_Add(googletest URL https://github.com/google/googletest/archive/release-1.10.0.zip - URL_HASH SHA256=f3ed3b58511efd272eb074a3a6d6fb79d7c2e6a0e374323d1e6bcbcc1ef141bf + URL_HASH SHA256=94c634d499558a76fa649edb13721dce6e98fb1e7018dfaeba3cd7a083945e91 SOURCE_DIR "${CONFU_DEPENDENCIES_SOURCE_DIR}/googletest" BINARY_DIR "${CONFU_DEPENDENCIES_BINARY_DIR}/googletest" CONFIGURE_COMMAND "" diff --git a/aten/src/ATen/native/quantized/cpu/qnnpack/deps/clog/CMakeLists.txt b/aten/src/ATen/native/quantized/cpu/qnnpack/deps/clog/CMakeLists.txt index f19d6c61f33f..e763e4e3ba93 100644 --- a/aten/src/ATen/native/quantized/cpu/qnnpack/deps/clog/CMakeLists.txt +++ b/aten/src/ATen/native/quantized/cpu/qnnpack/deps/clog/CMakeLists.txt @@ -63,7 +63,7 @@ set_target_properties(clog PROPERTIES C_EXTENSIONS NO) CLOG_TARGET_RUNTIME_LIBRARY(clog) set_target_properties(clog PROPERTIES PUBLIC_HEADER include/clog.h) -target_include_directories(clog BEFORE PUBLIC include) +target_include_directories(clog PUBLIC $ $) if(CLOG_LOG_TO_STDIO) target_compile_definitions(clog PRIVATE CLOG_LOG_TO_STDIO=1) else() @@ -73,7 +73,10 @@ if(ANDROID AND NOT CLOG_LOG_TO_STDIO) target_link_libraries(clog PRIVATE log) endif() +add_library(cpuinfo::clog ALIAS clog) + install(TARGETS clog + EXPORT cpuinfo-targets LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}" ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}" PUBLIC_HEADER DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}") diff --git a/aten/src/ATen/native/quantized/cpu/qnnpack/deps/clog/cmake/DownloadGoogleTest.cmake b/aten/src/ATen/native/quantized/cpu/qnnpack/deps/clog/cmake/DownloadGoogleTest.cmake index 4a86d641e412..66b2232b5925 100644 --- a/aten/src/ATen/native/quantized/cpu/qnnpack/deps/clog/cmake/DownloadGoogleTest.cmake +++ b/aten/src/ATen/native/quantized/cpu/qnnpack/deps/clog/cmake/DownloadGoogleTest.cmake @@ -11,7 +11,7 @@ project(googletest-download NONE) include(ExternalProject) ExternalProject_Add(googletest URL https://github.com/google/googletest/archive/release-1.10.0.zip - URL_HASH SHA256=f3ed3b58511efd272eb074a3a6d6fb79d7c2e6a0e374323d1e6bcbcc1ef141bf + URL_HASH SHA256=94c634d499558a76fa649edb13721dce6e98fb1e7018dfaeba3cd7a083945e91 SOURCE_DIR "${CONFU_DEPENDENCIES_SOURCE_DIR}/googletest" BINARY_DIR "${CONFU_DEPENDENCIES_BINARY_DIR}/googletest" CONFIGURE_COMMAND "" diff --git a/aten/src/ATen/native/sparse/Macros.h b/aten/src/ATen/native/sparse/Macros.h new file mode 100644 index 000000000000..10174e9ad655 --- /dev/null +++ b/aten/src/ATen/native/sparse/Macros.h @@ -0,0 +1,16 @@ +#pragma once + +#if defined(__CUDACC__) || defined(__HIPCC__) +#define GPUCC +#define FUNCAPI __host__ __device__ +#define INLINE __forceinline__ +#else +#define FUNCAPI +#define INLINE inline +#endif + +#if defined(_WIN32) || defined(_WIN64) +#define RESTRICT __restrict +#else +#define RESTRICT __restrict__ +#endif diff --git a/aten/src/ATen/native/sparse/SparseBinaryOpIntersectionCommon.h b/aten/src/ATen/native/sparse/SparseBinaryOpIntersectionCommon.h new file mode 100644 index 000000000000..6e2a50a3c1f4 --- /dev/null +++ b/aten/src/ATen/native/sparse/SparseBinaryOpIntersectionCommon.h @@ -0,0 +1,532 @@ +#pragma once + +#include +#include +#include +#include +#include +#include + +#ifndef AT_PER_OPERATOR_HEADERS +#include +#include +#else +#include +#include +#include +#include +#include +#endif + +#ifdef GPUCC +#define NAME "sparse_binary_op_intersection_cuda" +#else +#define NAME "sparse_binary_op_intersection_cpu" +#endif + +#define CALL(...) __VA_ARGS__(); +#define EXPAND(b, n, ...) \ + if (b) { \ + using index_t ## n = int32_t; \ + __VA_ARGS__ \ + } \ + else { \ + using index_t ## n = int64_t; \ + __VA_ARGS__ \ + } +#define BOOL_TO_INDEX_TYPE1(b0, ...) \ + EXPAND(b0, 0, CALL(__VA_ARGS__)) +#define BOOL_TO_INDEX_TYPE2(b1, b0, ...) \ + EXPAND(b1, 1, BOOL_TO_INDEX_TYPE1(b0, __VA_ARGS__)) +#define BOOL_TO_INDEX_TYPE3(b2, b1, b0, ...) \ + EXPAND(b2, 2, BOOL_TO_INDEX_TYPE2(b1, b0, __VA_ARGS__)) + +namespace at { +namespace native { + +namespace { + +using at::sparse::get_sparse_impl; + +// ForwardIt: only legacy random access iterator is supported. +template +static FUNCAPI INLINE +ForwardIt find_bound(ForwardIt first, ForwardIt last, const T& value) { + ForwardIt RESTRICT it; + typename std::iterator_traits::difference_type count, step; + // NOTE: std::distance(first, last) compiles but produces wrong results on CUDA, + // so only legacy random access iterators are safe in this code. + count = last - first; + + while (count > 0) { + it = first; + step = count / 2; + // avoiding std::advance(it, step), + // although it does work unlike std::distance on CUDA. + it += step; + // The decision which separates finding a lower bound vs an upper bound. + // Note that a lower bound is a value at *it with the smallest index + // such that *it >= value if such value exists, or last if does not. + // Similarly, an upper bound is a value at *it with the smallest index + // such that *it > value if such value exists, or last if does not. + // Let is_lower = true and *it < value, then we know that *it and values + // preceeding *it cannot contain a lower bound, so we adjust initial iterator range + // from [first, first + count] to [first + step + 1, first + count - (step + 1)], + // where +1 skips the element at which we have just evaluated *it < value. + // Samilar logic holds when is_lower = false. + if (is_lower ? *it < value : value >= *it) { + first = ++it; + count -= step + 1; + } + else { + count = step; + } + } + return first; +} + +template