Skip to content

Commit

Permalink
Update on "[ONNX] Update repeat_interleave symbolic (#54312)"
Browse files Browse the repository at this point in the history
Add implementation for cases when
- interleaving happens along dim which consist of dynamic axes

[ghstack-poisoned]
  • Loading branch information
BowenBao committed Apr 16, 2021
2 parents 2bd6d08 + aef7de6 commit cd965e0
Show file tree
Hide file tree
Showing 138 changed files with 3,633 additions and 2,660 deletions.
164 changes: 164 additions & 0 deletions .github/scripts/generate_linux_ci_workflows.py
@@ -0,0 +1,164 @@
#!/usr/bin/env python

from pathlib import Path

import jinja2

DOCKER_REGISTRY = "308535385114.dkr.ecr.us-east-1.amazonaws.com"

GITHUB_DIR = Path(__file__).parent.parent

CPU_TEST_RUNNER = "linux.2xlarge"
CUDA_TEST_RUNNER = "linux.8xlarge.nvidia.gpu"


class PyTorchLinuxWorkflow:
def __init__(self, build_environment: str, docker_image_base: str):
self.build_environment = build_environment
self.docker_image_base = docker_image_base
self.test_runner_type = CPU_TEST_RUNNER
if "cuda" in build_environment:
self.test_runner_type = CUDA_TEST_RUNNER

def generate_workflow_file(
self, workflow_template: jinja2.Template, jinja_env: jinja2.Environment
) -> Path:
output_file_path = GITHUB_DIR.joinpath(
f"workflows/{self.build_environment}.yml"
)
with open(output_file_path, "w") as output_file:
output_file.write(
workflow_template.render(
build_environment=self.build_environment,
docker_image_base=self.docker_image_base,
test_runner_type=self.test_runner_type
)
)
output_file.write('\n')
return output_file_path


WORKFLOWS = [
PyTorchLinuxWorkflow(
build_environment="pytorch-linux-xenial-py3.6-gcc5.4",
docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-py3.6-gcc5.4",
),
# PyTorchLinuxWorkflow(
# build_environment="pytorch-paralleltbb-linux-xenial-py3.6-gcc5.4",
# docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-py3.6-gcc5.4",
# ),
# PyTorchLinuxWorkflow(
# build_environment="pytorch-parallelnative-linux-xenial-py3.6-gcc5.4",
# docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-py3.6-gcc5.4",
# ),
# PyTorchLinuxWorkflow(
# build_environment="pytorch-pure_torch-linux-xenial-py3.6-gcc5.4",
# docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-py3.6-gcc5.4",
# ),
# PyTorchLinuxWorkflow(
# build_environment="pytorch-linux-xenial-py3.6-gcc7",
# docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-py3.6-gcc7",
# ),
# PyTorchLinuxWorkflow(
# build_environment="pytorch-linux-xenial-py3-clang5-asan",
# docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-py3-clang5-asan",
# ),
# PyTorchLinuxWorkflow(
# build_environment="pytorch-linux-xenial-py3-clang7-onnx",
# docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-py3-clang7-onnx",
# ),
# PyTorchLinuxWorkflow(
# build_environment="pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7",
# docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7",
# ),
# PyTorchLinuxWorkflow(
# build_environment="pytorch-linux-xenial-cuda11.1-cudnn8-py3-gcc7",
# docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-cuda11.1-cudnn8-py3-gcc7",
# ),
# PyTorchLinuxWorkflow(
# build_environment="pytorch-libtorch-linux-xenial-cuda11.1-cudnn8-py3-gcc7",
# docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-cuda11.1-cudnn8-py3-gcc7",
# ),
# PyTorchLinuxWorkflow(
# build_environment="pytorch-linux-bionic-py3.6-clang9-noarch",
# docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-bionic-py3.6-clang9",
# ),
# PyTorchLinuxWorkflow(
# build_environment="pytorch-xla-linux-bionic-py3.6-clang9",
# docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-bionic-py3.6-clang9",
# ),
# PyTorchLinuxWorkflow(
# build_environment="pytorch-vulkan-linux-bionic-py3.6-clang9",
# docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-bionic-py3.6-clang9",
# ),
# PyTorchLinuxWorkflow(
# build_environment="pytorch-linux-bionic-py3.8-gcc9-coverage",
# docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-bionic-py3.8-gcc9",
# ),
# PyTorchLinuxWorkflow(
# build_environment="pytorch-linux-bionic-rocm3.9-py3.6",
# docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-bionic-rocm3.9-py3.6",
# ),
# PyTorchLinuxWorkflow(
# build_environment="pytorch-linux-xenial-py3-clang5-android-ndk-r19c-x86_32",
# docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-py3-clang5-android-ndk-r19c",
# ),
# PyTorchLinuxWorkflow(
# build_environment="pytorch-linux-xenial-py3-clang5-android-ndk-r19c-x86_64",
# docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-py3-clang5-android-ndk-r19c",
# ),
# PyTorchLinuxWorkflow(
# build_environment="pytorch-linux-xenial-py3-clang5-android-ndk-r19c-arm-v7a",
# docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-py3-clang5-android-ndk-r19c",
# ),
# PyTorchLinuxWorkflow(
# build_environment="pytorch-linux-xenial-py3-clang5-android-ndk-r19c-arm-v8a",
# docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-py3-clang5-android-ndk-r19c",
# ),
# PyTorchLinuxWorkflow(
# build_environment="pytorch-linux-xenial-py3-clang5-mobile",
# docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-py3-clang5-asan",
# ),
# PyTorchLinuxWorkflow(
# build_environment="pytorch-linux-xenial-py3-clang5-mobile-custom-dynamic",
# docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-py3-clang5-android-ndk-r19c",
# ),
# PyTorchLinuxWorkflow(
# build_environment="pytorch-linux-xenial-py3-clang5-mobile-custom-static",
# docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-py3-clang5-android-ndk-r19c",
# ),
# PyTorchLinuxWorkflow(
# build_environment="pytorch-linux-xenial-py3-clang5-mobile-code-analysis",
# docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-py3-clang5-android-ndk-r19c",
# ),
# PyTorchLinuxWorkflow(
# build_environment="pytorch-linux-xenial-py3-clang5-android-ndk-r19c-x86_32",
# docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-py3-clang5-android-ndk-r19c",
# ),
# PyTorchLinuxWorkflow(
# build_environment="pytorch-linux-xenial-py3-clang5-android-ndk-r19c-x86_64",
# docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-py3-clang5-android-ndk-r19c",
# ),
# PyTorchLinuxWorkflow(
# build_environment="pytorch-linux-xenial-py3-clang5-android-ndk-r19c-arm-v7a",
# docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-py3-clang5-android-ndk-r19c",
# ),
# PyTorchLinuxWorkflow(
# build_environment="pytorch-linux-xenial-py3-clang5-android-ndk-r19c-arm-v8a",
# docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-py3-clang5-android-ndk-r19c",
# ),
]

if __name__ == "__main__":
jinja_env = jinja2.Environment(
variable_start_string="!{{",
loader=jinja2.FileSystemLoader(str(GITHUB_DIR.joinpath("templates"))),
)
workflow_template = jinja_env.get_template("linux_ci_workflow.yml.in")
for workflow in WORKFLOWS:
print(
workflow.generate_workflow_file(
workflow_template=workflow_template,
jinja_env=jinja_env
)
)
43 changes: 43 additions & 0 deletions .github/scripts/install_nvidia_utils_linux.sh
@@ -0,0 +1,43 @@
#!/usr/bin/env bash

set -eou pipefail

DISTRIBUTION=$(. /etc/os-release;echo $ID$VERSION_ID) \
DRIVER_FN="NVIDIA-Linux-x86_64-460.39.run"
YUM_REPO_URL="https://nvidia.github.io/nvidia-docker/${DISTRIBUTION}/nvidia-docker.repo"

install_nvidia_docker2_amzn2() {
(
set -x
# Needed for yum-config-manager
sudo yum install -y yum-utils
sudo yum-config-manager --add-repo "${YUM_REPO_URL}"
sudo yum install -y nvidia-docker2
sudo systemctl restart docker
)
}

install_nvidia_driver() {
(
set -x
sudo yum groupinstall -y "Development Tools"
curl -fsL -o nvidia_driver "https://s3.amazonaws.com/ossci-linux/nvidia_driver/$DRIVER_FN"
sudo /bin/bash nvidia_driver -s --no-drm || (sudo cat /var/log/nvidia-installer.log && false)
nvidia-smi
)
}

# Install container toolkit based on distribution
echo "== Installing nvidia container toolkit for ${DISTRIBUTION} =="
case "${DISTRIBUTION}" in
amzn*)
install_nvidia_docker2_amzn2
;;
*)
echo "ERROR: Unknown distribution ${DISTRIBUTION}"
exit 1
;;
esac

echo "== Installing nvidia driver ${DRIVER_FN} =="
install_nvidia_driver
5 changes: 5 additions & 0 deletions .github/scripts/report_git_status.sh
@@ -0,0 +1,5 @@
#!/usr/bin/env bash
CHANGES=$(git status --porcelain)
echo "$CHANGES"
git diff
[ -z "$CHANGES" ]
174 changes: 174 additions & 0 deletions .github/templates/linux_ci_workflow.yml.in
@@ -0,0 +1,174 @@
# @generated by .github/scripts/generate_linux_ci_workflows.py, Do not update manually
#
# Template is at: .github/templates/linux_ci_workflow.yml
# Generation script: .github/scripts/generate_linux_ci_workflows.py
name: Linux CI (!{{ build_environment }})

on:
# TODO: Enable pull_request builds when we can verify capacity can be met by auto-scalers
# pull_request:
push:
branches:
- master
- release/*
workflow_dispatch:

env:
BUILD_ENVIRONMENT: !{{ build_environment }}
DOCKER_IMAGE_BASE: !{{ docker_image_base }}
SCCACHE_BUCKET: ossci-compiler-cache-circleci-v2
TORCH_CUDA_ARCH_LIST: 5.2
IN_CI: 1

jobs:
calculate-docker-image:
runs-on: ubuntu-18.04
outputs:
docker_image: ${{ steps.calculate-tag.outputs.docker_image }}
steps:
- name: Checkout PyTorch
uses: actions/checkout@v2
- name: Calculate docker image tag
id: calculate-tag
run: |
DOCKER_TAG=$(git rev-parse HEAD:.circleci/docker)
echo "::set-output name=docker_image::${DOCKER_IMAGE_BASE}:${DOCKER_TAG}"
build:
runs-on: linux.2xlarge
needs: calculate-docker-image
env:
DOCKER_IMAGE: ${{ needs.calculate-docker-image.outputs.docker_image }}
steps:
- name: Chown workspace
run: |
# Ensure the working directory gets chowned back to the current user
docker run --rm -v "$(pwd)":/v -w /v alpine chown -R "$(id -u):$(id -g)" .
- name: Checkout PyTorch
uses: actions/checkout@v2
with:
fetch-depth: 0 # deep clone, to allow sharding to use git rev-list
submodules: recursive
- name: Log in to ECR
run: |
aws ecr get-login --no-include-email --region us-east-1 > /tmp/ecr-login.sh
bash /tmp/ecr-login.sh
rm /tmp/ecr-login.sh
- name: Pull docker image
run: |
docker pull "${DOCKER_IMAGE}"
- name: Build PyTorch
run: |
SCCACHE_MAX_JOBS=$(( $(nproc) - 1 ))
MEMORY_LIMIT_MAX_JOBS=8 # our "linux.2xlarge" runner has 16 vCPUs, if we use all of them we'll OOM
export MAX_JOBS=$(( SCCACHE_MAX_JOBS > MEMORY_LIMIT_MAX_JOBS ? MEMORY_LIMIT_MAX_JOBS : SCCACHE_MAX_JOBS ))
docker run \
-e BUILD_ENVIRONMENT \
-e MAX_JOBS \
-e SCCACHE_BUCKET \
-e SKIP_SCCACHE_INITIALIZATION=1 \
-e TORCH_CUDA_ARCH_LIST \
--security-opt seccomp=unconfined \
--cap-add=SYS_PTRACE \
--tty \
--user jenkins \
-v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
-w /var/lib/jenkins/workspace \
"${DOCKER_IMAGE}" \
sh -c 'sudo chown -R jenkins . && .jenkins/pytorch/build.sh'
- name: Chown workspace
run: |
# Ensure the working directory gets chowned back to the current user
docker run --rm -v "$(pwd)":/v -w /v alpine chown -R "$(id -u):$(id -g)" .
- name: Archive artifacts into zip
run: |
zip -q -r artifacts.zip dist build
- uses: actions/upload-artifact@v2
name: Store PyTorch Build Artifacts
with:
name: ${{ env.BUILD_ENVIRONMENT }}
retention-days: 30
if-no-files-found: error
path:
artifacts.zip
- name: Clean up docker images
if: always()
run: |
# Prune all of the docker images
docker system prune -af
test:
runs-on: !{{ test_runner_type }}
needs:
- calculate-docker-image
- build
env:
DOCKER_IMAGE: ${{ needs.calculate-docker-image.outputs.docker_image }}
steps:
- name: Chown workspace
run: |
# Ensure the working directory gets chowned back to the current user
docker run --rm -v "$(pwd)":/v -w /v alpine chown -R "$(id -u):$(id -g)" .
- name: Checkout PyTorch
uses: actions/checkout@v2
- name: Log in to ECR
run: |
aws ecr get-login --no-include-email --region us-east-1 > /tmp/ecr-login.sh
bash /tmp/ecr-login.sh
rm /tmp/ecr-login.sh
- name: Pull docker image
run: |
docker pull "${DOCKER_IMAGE}"
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
if: ${{ contains(env.BUILD_ENVIRONMENT, 'cuda') }}
run: |
bash .github/scripts/install_nvidia_utils_linux.sh
echo "GPU_FLAG=--gpus all" >> "${GITHUB_ENV}"
- name: Determine shm-size
run: |
shm_size="1g"
case "${BUILD_ENVIRONMENT}" in
*cuda*)
shm_size="2g"
;;
*rocm*)
shm_size="8g"
;;
esac
echo "SHM_SIZE=${shm_size}" >> "${GITHUB_ENV}"
- uses: actions/download-artifact@v2
name: Download PyTorch Build Artifacts
with:
name: ${{ env.BUILD_ENVIRONMENT }}
- name: Unzip artifacts
run: |
unzip -q artifacts.zip
- name: Output disk space left
run: |
sudo df -H
- name: Test PyTorch
run: |
SCCACHE_MAX_JOBS=$(( $(nproc) - 1 ))
MEMORY_LIMIT_MAX_JOBS=8 # our "linux.2xlarge" runner has 16 vCPUs, if we use all of them we'll OOM
export MAX_JOBS=$(( SCCACHE_MAX_JOBS > MEMORY_LIMIT_MAX_JOBS ? MEMORY_LIMIT_MAX_JOBS : SCCACHE_MAX_JOBS ))
# Used for GPU_FLAG since that doesn't play nice
# shellcheck disable=SC2086
docker run \
${GPU_FLAG:-} \
-e BUILD_ENVIRONMENT \
-e IN_CI \
-e MAX_JOBS \
--security-opt seccomp=unconfined \
--cap-add=SYS_PTRACE \
--shm-size="${SHM_SIZE}" \
--tty \
--user jenkins \
-v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
-w /var/lib/jenkins/workspace \
"${DOCKER_IMAGE}" \
sh -c 'sudo chown -R jenkins . && pip install dist/*.whl && .jenkins/pytorch/test.sh'
- name: Clean up docker images
if: always()
run: |
# Ensure the working directory gets chowned back to the current user
docker run --rm -v "$(pwd)":/v -w /v alpine chown -R "$(id -u):$(id -g)" .
# Prune all of the docker images
docker system prune -af

0 comments on commit cd965e0

Please sign in to comment.