Update on "[ONNX] Fix assign input shape for tuple inputs & primitive…

… type inputs (#54112)" [ghstack-poisoned]
pytorch · Apr 16, 2021 · aef7de6 · aef7de6
2 parents 919afe9 + 291bc05
commit aef7de6
Show file tree

Hide file tree

Showing 138 changed files with 3,633 additions and 2,660 deletions.
diff --git a/.github/scripts/generate_linux_ci_workflows.py b/.github/scripts/generate_linux_ci_workflows.py
@@ -0,0 +1,164 @@
+#!/usr/bin/env python
+
+from pathlib import Path
+
+import jinja2
+
+DOCKER_REGISTRY = "308535385114.dkr.ecr.us-east-1.amazonaws.com"
+
+GITHUB_DIR = Path(__file__).parent.parent
+
+CPU_TEST_RUNNER = "linux.2xlarge"
+CUDA_TEST_RUNNER = "linux.8xlarge.nvidia.gpu"
+
+
+class PyTorchLinuxWorkflow:
+    def __init__(self, build_environment: str, docker_image_base: str):
+        self.build_environment = build_environment
+        self.docker_image_base = docker_image_base
+        self.test_runner_type = CPU_TEST_RUNNER
+        if "cuda" in build_environment:
+            self.test_runner_type = CUDA_TEST_RUNNER
+
+    def generate_workflow_file(
+        self, workflow_template: jinja2.Template, jinja_env: jinja2.Environment
+    ) -> Path:
+        output_file_path = GITHUB_DIR.joinpath(
+            f"workflows/{self.build_environment}.yml"
+        )
+        with open(output_file_path, "w") as output_file:
+            output_file.write(
+                workflow_template.render(
+                    build_environment=self.build_environment,
+                    docker_image_base=self.docker_image_base,
+                    test_runner_type=self.test_runner_type
+                )
+            )
+            output_file.write('\n')
+        return output_file_path
+
+
+WORKFLOWS = [
+    PyTorchLinuxWorkflow(
+        build_environment="pytorch-linux-xenial-py3.6-gcc5.4",
+        docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-py3.6-gcc5.4",
+    ),
+    # PyTorchLinuxWorkflow(
+    #     build_environment="pytorch-paralleltbb-linux-xenial-py3.6-gcc5.4",
+    #     docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-py3.6-gcc5.4",
+    # ),
+    # PyTorchLinuxWorkflow(
+    #     build_environment="pytorch-parallelnative-linux-xenial-py3.6-gcc5.4",
+    #     docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-py3.6-gcc5.4",
+    # ),
+    # PyTorchLinuxWorkflow(
+    #     build_environment="pytorch-pure_torch-linux-xenial-py3.6-gcc5.4",
+    #     docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-py3.6-gcc5.4",
+    # ),
+    # PyTorchLinuxWorkflow(
+    #     build_environment="pytorch-linux-xenial-py3.6-gcc7",
+    #     docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-py3.6-gcc7",
+    # ),
+    # PyTorchLinuxWorkflow(
+    #     build_environment="pytorch-linux-xenial-py3-clang5-asan",
+    #     docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-py3-clang5-asan",
+    # ),
+    # PyTorchLinuxWorkflow(
+    #     build_environment="pytorch-linux-xenial-py3-clang7-onnx",
+    #     docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-py3-clang7-onnx",
+    # ),
+    # PyTorchLinuxWorkflow(
+    #     build_environment="pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7",
+    #     docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7",
+    # ),
+    # PyTorchLinuxWorkflow(
+    #     build_environment="pytorch-linux-xenial-cuda11.1-cudnn8-py3-gcc7",
+    #     docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-cuda11.1-cudnn8-py3-gcc7",
+    # ),
+    # PyTorchLinuxWorkflow(
+    #     build_environment="pytorch-libtorch-linux-xenial-cuda11.1-cudnn8-py3-gcc7",
+    #     docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-cuda11.1-cudnn8-py3-gcc7",
+    # ),
+    # PyTorchLinuxWorkflow(
+    #     build_environment="pytorch-linux-bionic-py3.6-clang9-noarch",
+    #     docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-bionic-py3.6-clang9",
+    # ),
+    # PyTorchLinuxWorkflow(
+    #     build_environment="pytorch-xla-linux-bionic-py3.6-clang9",
+    #     docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-bionic-py3.6-clang9",
+    # ),
+    # PyTorchLinuxWorkflow(
+    #     build_environment="pytorch-vulkan-linux-bionic-py3.6-clang9",
+    #     docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-bionic-py3.6-clang9",
+    # ),
+    # PyTorchLinuxWorkflow(
+    #     build_environment="pytorch-linux-bionic-py3.8-gcc9-coverage",
+    #     docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-bionic-py3.8-gcc9",
+    # ),
+    # PyTorchLinuxWorkflow(
+    #     build_environment="pytorch-linux-bionic-rocm3.9-py3.6",
+    #     docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-bionic-rocm3.9-py3.6",
+    # ),
+    # PyTorchLinuxWorkflow(
+    #     build_environment="pytorch-linux-xenial-py3-clang5-android-ndk-r19c-x86_32",
+    #     docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-py3-clang5-android-ndk-r19c",
+    # ),
+    # PyTorchLinuxWorkflow(
+    #     build_environment="pytorch-linux-xenial-py3-clang5-android-ndk-r19c-x86_64",
+    #     docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-py3-clang5-android-ndk-r19c",
+    # ),
+    # PyTorchLinuxWorkflow(
+    #     build_environment="pytorch-linux-xenial-py3-clang5-android-ndk-r19c-arm-v7a",
+    #     docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-py3-clang5-android-ndk-r19c",
+    # ),
+    # PyTorchLinuxWorkflow(
+    #     build_environment="pytorch-linux-xenial-py3-clang5-android-ndk-r19c-arm-v8a",
+    #     docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-py3-clang5-android-ndk-r19c",
+    # ),
+    # PyTorchLinuxWorkflow(
+    #     build_environment="pytorch-linux-xenial-py3-clang5-mobile",
+    #     docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-py3-clang5-asan",
+    # ),
+    # PyTorchLinuxWorkflow(
+    #     build_environment="pytorch-linux-xenial-py3-clang5-mobile-custom-dynamic",
+    #     docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-py3-clang5-android-ndk-r19c",
+    # ),
+    # PyTorchLinuxWorkflow(
+    #     build_environment="pytorch-linux-xenial-py3-clang5-mobile-custom-static",
+    #     docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-py3-clang5-android-ndk-r19c",
+    # ),
+    # PyTorchLinuxWorkflow(
+    #     build_environment="pytorch-linux-xenial-py3-clang5-mobile-code-analysis",
+    #     docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-py3-clang5-android-ndk-r19c",
+    # ),
+    # PyTorchLinuxWorkflow(
+    #     build_environment="pytorch-linux-xenial-py3-clang5-android-ndk-r19c-x86_32",
+    #     docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-py3-clang5-android-ndk-r19c",
+    # ),
+    # PyTorchLinuxWorkflow(
+    #     build_environment="pytorch-linux-xenial-py3-clang5-android-ndk-r19c-x86_64",
+    #     docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-py3-clang5-android-ndk-r19c",
+    # ),
+    # PyTorchLinuxWorkflow(
+    #     build_environment="pytorch-linux-xenial-py3-clang5-android-ndk-r19c-arm-v7a",
+    #     docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-py3-clang5-android-ndk-r19c",
+    # ),
+    # PyTorchLinuxWorkflow(
+    #     build_environment="pytorch-linux-xenial-py3-clang5-android-ndk-r19c-arm-v8a",
+    #     docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-py3-clang5-android-ndk-r19c",
+    # ),
+]
+
+if __name__ == "__main__":
+    jinja_env = jinja2.Environment(
+        variable_start_string="!{{",
+        loader=jinja2.FileSystemLoader(str(GITHUB_DIR.joinpath("templates"))),
+    )
+    workflow_template = jinja_env.get_template("linux_ci_workflow.yml.in")
+    for workflow in WORKFLOWS:
+        print(
+            workflow.generate_workflow_file(
+                workflow_template=workflow_template,
+                jinja_env=jinja_env
+            )
+        )
diff --git a/.github/scripts/install_nvidia_utils_linux.sh b/.github/scripts/install_nvidia_utils_linux.sh
@@ -0,0 +1,43 @@
+#!/usr/bin/env bash
+
+set -eou pipefail
+
+DISTRIBUTION=$(. /etc/os-release;echo $ID$VERSION_ID) \
+DRIVER_FN="NVIDIA-Linux-x86_64-460.39.run"
+YUM_REPO_URL="https://nvidia.github.io/nvidia-docker/${DISTRIBUTION}/nvidia-docker.repo"
+
+install_nvidia_docker2_amzn2() {
+    (
+        set -x
+        # Needed for yum-config-manager
+        sudo yum install -y yum-utils
+        sudo yum-config-manager --add-repo "${YUM_REPO_URL}"
+        sudo yum install -y nvidia-docker2
+        sudo systemctl restart docker
+    )
+}
+
+install_nvidia_driver() {
+    (
+        set -x
+        sudo yum groupinstall -y "Development Tools"
+        curl -fsL -o nvidia_driver "https://s3.amazonaws.com/ossci-linux/nvidia_driver/$DRIVER_FN"
+        sudo /bin/bash nvidia_driver -s --no-drm || (sudo cat /var/log/nvidia-installer.log && false)
+        nvidia-smi
+    )
+}
+
+# Install container toolkit based on distribution
+echo "== Installing nvidia container toolkit for ${DISTRIBUTION} =="
+case "${DISTRIBUTION}" in
+    amzn*)
+        install_nvidia_docker2_amzn2
+        ;;
+    *)
+        echo "ERROR: Unknown distribution ${DISTRIBUTION}"
+        exit 1
+        ;;
+esac
+
+echo "== Installing nvidia driver ${DRIVER_FN} =="
+install_nvidia_driver
diff --git a/.github/scripts/report_git_status.sh b/.github/scripts/report_git_status.sh
@@ -0,0 +1,5 @@
+#!/usr/bin/env bash
+CHANGES=$(git status --porcelain)
+echo "$CHANGES"
+git diff
+[ -z "$CHANGES" ]
diff --git a/.github/templates/linux_ci_workflow.yml.in b/.github/templates/linux_ci_workflow.yml.in
@@ -0,0 +1,174 @@
+# @generated by .github/scripts/generate_linux_ci_workflows.py, Do not update manually
+#
+# Template is at:    .github/templates/linux_ci_workflow.yml
+# Generation script: .github/scripts/generate_linux_ci_workflows.py
+name: Linux CI (!{{ build_environment }})
+
+on:
+  # TODO: Enable pull_request builds when we can verify capacity can be met by auto-scalers
+  # pull_request:
+  push:
+    branches:
+      - master
+      - release/*
+  workflow_dispatch:
+
+env:
+  BUILD_ENVIRONMENT: !{{ build_environment }}
+  DOCKER_IMAGE_BASE: !{{ docker_image_base }}
+  SCCACHE_BUCKET: ossci-compiler-cache-circleci-v2
+  TORCH_CUDA_ARCH_LIST: 5.2
+  IN_CI: 1
+
+jobs:
+  calculate-docker-image:
+    runs-on: ubuntu-18.04
+    outputs:
+      docker_image: ${{ steps.calculate-tag.outputs.docker_image }}
+    steps:
+      - name: Checkout PyTorch
+        uses: actions/checkout@v2
+      - name: Calculate docker image tag
+        id: calculate-tag
+        run: |
+          DOCKER_TAG=$(git rev-parse HEAD:.circleci/docker)
+          echo "::set-output name=docker_image::${DOCKER_IMAGE_BASE}:${DOCKER_TAG}"
+  build:
+    runs-on: linux.2xlarge
+    needs: calculate-docker-image
+    env:
+      DOCKER_IMAGE: ${{ needs.calculate-docker-image.outputs.docker_image }}
+    steps:
+      - name: Chown workspace
+        run: |
+          # Ensure the working directory gets chowned back to the current user
+          docker run --rm -v "$(pwd)":/v -w /v alpine chown -R "$(id -u):$(id -g)" .
+      - name: Checkout PyTorch
+        uses: actions/checkout@v2
+        with:
+          fetch-depth: 0 # deep clone, to allow sharding to use git rev-list
+          submodules: recursive
+      - name: Log in to ECR
+        run: |
+          aws ecr get-login --no-include-email --region us-east-1 > /tmp/ecr-login.sh
+          bash /tmp/ecr-login.sh
+          rm /tmp/ecr-login.sh
+      - name: Pull docker image
+        run: |
+          docker pull "${DOCKER_IMAGE}"
+      - name: Build PyTorch
+        run: |
+          SCCACHE_MAX_JOBS=$(( $(nproc) - 1 ))
+          MEMORY_LIMIT_MAX_JOBS=8 # our "linux.2xlarge" runner has 16 vCPUs, if we use all of them we'll OOM
+          export MAX_JOBS=$(( SCCACHE_MAX_JOBS > MEMORY_LIMIT_MAX_JOBS ? MEMORY_LIMIT_MAX_JOBS : SCCACHE_MAX_JOBS ))
+          docker run \
+            -e BUILD_ENVIRONMENT \
+            -e MAX_JOBS \
+            -e SCCACHE_BUCKET \
+            -e SKIP_SCCACHE_INITIALIZATION=1 \
+            -e TORCH_CUDA_ARCH_LIST \
+            --security-opt seccomp=unconfined \
+            --cap-add=SYS_PTRACE \
+            --tty \
+            --user jenkins \
+            -v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
+            -w /var/lib/jenkins/workspace \
+            "${DOCKER_IMAGE}" \
+            sh -c 'sudo chown -R jenkins . && .jenkins/pytorch/build.sh'
+      - name: Chown workspace
+        run: |
+          # Ensure the working directory gets chowned back to the current user
+          docker run --rm -v "$(pwd)":/v -w /v alpine chown -R "$(id -u):$(id -g)" .
+      - name: Archive artifacts into zip
+        run: |
+          zip -q -r artifacts.zip dist build
+      - uses: actions/upload-artifact@v2
+        name: Store PyTorch Build Artifacts
+        with:
+          name: ${{ env.BUILD_ENVIRONMENT }}
+          retention-days: 30
+          if-no-files-found: error
+          path:
+            artifacts.zip
+      - name: Clean up docker images
+        if: always()
+        run: |
+          # Prune all of the docker images
+          docker system prune -af
+  test:
+    runs-on: !{{ test_runner_type }}
+    needs:
+      - calculate-docker-image
+      - build
+    env:
+      DOCKER_IMAGE: ${{ needs.calculate-docker-image.outputs.docker_image }}
+    steps:
+      - name: Chown workspace
+        run: |
+          # Ensure the working directory gets chowned back to the current user
+          docker run --rm -v "$(pwd)":/v -w /v alpine chown -R "$(id -u):$(id -g)" .
+      - name: Checkout PyTorch
+        uses: actions/checkout@v2
+      - name: Log in to ECR
+        run: |
+          aws ecr get-login --no-include-email --region us-east-1 > /tmp/ecr-login.sh
+          bash /tmp/ecr-login.sh
+          rm /tmp/ecr-login.sh
+      - name: Pull docker image
+        run: |
+          docker pull "${DOCKER_IMAGE}"
+      - name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
+        if: ${{ contains(env.BUILD_ENVIRONMENT, 'cuda') }}
+        run: |
+          bash .github/scripts/install_nvidia_utils_linux.sh
+          echo "GPU_FLAG=--gpus all" >> "${GITHUB_ENV}"
+      - name: Determine shm-size
+        run: |
+          shm_size="1g"
+          case "${BUILD_ENVIRONMENT}" in
+            *cuda*)
+              shm_size="2g"
+              ;;
+            *rocm*)
+              shm_size="8g"
+              ;;
+          esac
+          echo "SHM_SIZE=${shm_size}" >> "${GITHUB_ENV}"
+      - uses: actions/download-artifact@v2
+        name: Download PyTorch Build Artifacts
+        with:
+          name: ${{ env.BUILD_ENVIRONMENT }}
+      - name: Unzip artifacts
+        run: |
+          unzip -q artifacts.zip
+      - name: Output disk space left
+        run: |
+          sudo df -H
+      - name: Test PyTorch
+        run: |
+          SCCACHE_MAX_JOBS=$(( $(nproc) - 1 ))
+          MEMORY_LIMIT_MAX_JOBS=8 # our "linux.2xlarge" runner has 16 vCPUs, if we use all of them we'll OOM
+          export MAX_JOBS=$(( SCCACHE_MAX_JOBS > MEMORY_LIMIT_MAX_JOBS ? MEMORY_LIMIT_MAX_JOBS : SCCACHE_MAX_JOBS ))
+          # Used for GPU_FLAG since that doesn't play nice
+          # shellcheck disable=SC2086
+          docker run \
+            ${GPU_FLAG:-} \
+            -e BUILD_ENVIRONMENT \
+            -e IN_CI \
+            -e MAX_JOBS \
+            --security-opt seccomp=unconfined \
+            --cap-add=SYS_PTRACE \
+            --shm-size="${SHM_SIZE}" \
+            --tty \
+            --user jenkins \
+            -v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
+            -w /var/lib/jenkins/workspace \
+            "${DOCKER_IMAGE}" \
+            sh -c 'sudo chown -R jenkins . && pip install dist/*.whl && .jenkins/pytorch/test.sh'
+      - name: Clean up docker images
+        if: always()
+        run: |
+          # Ensure the working directory gets chowned back to the current user
+          docker run --rm -v "$(pwd)":/v -w /v alpine chown -R "$(id -u):$(id -g)" .
+          # Prune all of the docker images
+          docker system prune -af