.circleci/verbatim-sources/job-specs/pytorch-job-specs.yml

jobs:
  pytorch_linux_build:
    <<: *pytorch_params
    machine:
      image: ubuntu-1604:202007-01
    steps:
    # See Note [Workspace for CircleCI scripts] in job-specs-setup.yml
    - checkout
    - calculate_docker_image_tag
    - setup_linux_system_environment
    - optional_merge_target_branch
    - setup_ci_environment
    - run:
        name: Build
        no_output_timeout: "1h"
        command: |
          set -e
          if [[ "${DOCKER_IMAGE}" == *rocm3.9* ]]; then
            export DOCKER_TAG="f3d89a32912f62815e4feaeed47e564e887dffd6"
          fi
          if [[ ${BUILD_ENVIRONMENT} == *"pure_torch"* ]]; then
            echo 'BUILD_CAFFE2=OFF' >> "${BASH_ENV}"
          fi
          if [[ ${BUILD_ENVIRONMENT} == *"paralleltbb"* ]]; then
            echo 'ATEN_THREADING=TBB' >> "${BASH_ENV}"
            echo 'USE_TBB=1' >> "${BASH_ENV}"
          elif [[ ${BUILD_ENVIRONMENT} == *"parallelnative"* ]]; then
            echo 'ATEN_THREADING=NATIVE' >> "${BASH_ENV}"
          fi
          echo "Parallel backend flags: "${PARALLEL_FLAGS}
          # Pull Docker image and run build
          echo "DOCKER_IMAGE: "${DOCKER_IMAGE}:${DOCKER_TAG}
          time docker pull ${DOCKER_IMAGE}:${DOCKER_TAG} >/dev/null
          export id=$(docker run --env-file "${BASH_ENV}" --cap-add=SYS_PTRACE --security-opt seccomp=unconfined --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -t -d -w /var/lib/jenkins ${DOCKER_IMAGE}:${DOCKER_TAG})

          git submodule sync && git submodule update -q --init --recursive

          docker cp /home/circleci/project/. $id:/var/lib/jenkins/workspace

          export COMMAND='((echo "sudo chown -R jenkins workspace && cd workspace && .jenkins/pytorch/build.sh && find ${BUILD_ROOT} -type f -name "*.a" -or -name "*.o" -delete") | docker exec -u jenkins -i "$id" bash) 2>&1'

          echo ${COMMAND} > ./command.sh && unbuffer bash ./command.sh | ts

          # Copy dist folder back
          docker cp $id:/var/lib/jenkins/workspace/dist /home/circleci/project/. || echo "Dist folder not found"

          # Push intermediate Docker image for next phase to use
          if [ -z "${BUILD_ONLY}" ]; then
            # Note [Special build images]
            # The xla build uses the same docker image as
            # pytorch_linux_bionic_py3_6_clang9_build. In the push step, we have to
            # distinguish between them so the test can pick up the correct image.
            output_image=${DOCKER_IMAGE}:${DOCKER_TAG}-${CIRCLE_SHA1}
            if [[ ${BUILD_ENVIRONMENT} == *"xla"* ]]; then
              export COMMIT_DOCKER_IMAGE=$output_image-xla
            elif [[ ${BUILD_ENVIRONMENT} == *"libtorch"* ]]; then
              export COMMIT_DOCKER_IMAGE=$output_image-libtorch
            elif [[ ${BUILD_ENVIRONMENT} == *"paralleltbb"* ]]; then
              export COMMIT_DOCKER_IMAGE=$output_image-paralleltbb
            elif [[ ${BUILD_ENVIRONMENT} == *"parallelnative"* ]]; then
              export COMMIT_DOCKER_IMAGE=$output_image-parallelnative
            elif [[ ${BUILD_ENVIRONMENT} == *"android-ndk-r19c-x86_64"* ]]; then
              export COMMIT_DOCKER_IMAGE=$output_image-android-x86_64
            elif [[ ${BUILD_ENVIRONMENT} == *"android-ndk-r19c-arm-v7a"* ]]; then
              export COMMIT_DOCKER_IMAGE=$output_image-android-arm-v7a
            elif [[ ${BUILD_ENVIRONMENT} == *"android-ndk-r19c-arm-v8a"* ]]; then
              export COMMIT_DOCKER_IMAGE=$output_image-android-arm-v8a
            elif [[ ${BUILD_ENVIRONMENT} == *"android-ndk-r19c-x86_32"* ]]; then
              export COMMIT_DOCKER_IMAGE=$output_image-android-x86_32
            elif [[ ${BUILD_ENVIRONMENT} == *"android-ndk-r19c-vulkan-x86_32"* ]]; then
              export COMMIT_DOCKER_IMAGE=$output_image-android-vulkan-x86_32
            elif [[ ${BUILD_ENVIRONMENT} == *"vulkan-linux"* ]]; then
              export COMMIT_DOCKER_IMAGE=$output_image-vulkan
            else
              export COMMIT_DOCKER_IMAGE=$output_image
            fi
            docker commit "$id" ${COMMIT_DOCKER_IMAGE}
            time docker push ${COMMIT_DOCKER_IMAGE}
          fi
    - store_artifacts:
        path: /home/circleci/project/dist

  pytorch_linux_test:
    <<: *pytorch_params
    machine:
      image: ubuntu-1604:202007-01
    steps:
    # See Note [Workspace for CircleCI scripts] in job-specs-setup.yml
    - checkout
    - calculate_docker_image_tag
    - setup_linux_system_environment
    - setup_ci_environment
    - run:
        name: Download Docker image
        no_output_timeout: "90m"
        command: |
          set -e
          export PYTHONUNBUFFERED=1
          if [[ "${DOCKER_IMAGE}" == *rocm3.9* ]]; then
            export DOCKER_TAG="f3d89a32912f62815e4feaeed47e564e887dffd6"
          fi
          # See Note [Special build images]
          output_image=${DOCKER_IMAGE}:${DOCKER_TAG}-${CIRCLE_SHA1}
          if [[ ${BUILD_ENVIRONMENT} == *"xla"* ]]; then
            export COMMIT_DOCKER_IMAGE=$output_image-xla
          elif [[ ${BUILD_ENVIRONMENT} == *"libtorch"* ]]; then
            export COMMIT_DOCKER_IMAGE=$output_image-libtorch
          elif [[ ${BUILD_ENVIRONMENT} == *"paralleltbb"* ]]; then
            export COMMIT_DOCKER_IMAGE=$output_image-paralleltbb
          elif [[ ${BUILD_ENVIRONMENT} == *"parallelnative"* ]]; then
            export COMMIT_DOCKER_IMAGE=$output_image-parallelnative
          elif [[ ${BUILD_ENVIRONMENT} == *"vulkan-linux"* ]]; then
            export COMMIT_DOCKER_IMAGE=$output_image-vulkan
          else
            export COMMIT_DOCKER_IMAGE=$output_image
          fi
          echo "DOCKER_IMAGE: "${COMMIT_DOCKER_IMAGE}

          if [[ ${BUILD_ENVIRONMENT} == *"paralleltbb"* ]]; then
            echo 'ATEN_THREADING=TBB' >> "${BASH_ENV}"
            echo 'USE_TBB=1' >> "${BASH_ENV}"
          elif [[ ${BUILD_ENVIRONMENT} == *"parallelnative"* ]]; then
            echo 'ATEN_THREADING=NATIVE' >> "${BASH_ENV}"
          fi
          echo "Parallel backend flags: "${PARALLEL_FLAGS}

          time docker pull ${COMMIT_DOCKER_IMAGE} >/dev/null

          # TODO: Make this less painful
          if [ -n "${USE_CUDA_DOCKER_RUNTIME}" ]; then
            export id=$(docker run --env-file "${BASH_ENV}" --cap-add=SYS_PTRACE --security-opt seccomp=unconfined --gpus all --shm-size=2g -t -d -w /var/lib/jenkins ${COMMIT_DOCKER_IMAGE})
          elif [[ ${BUILD_ENVIRONMENT} == *"rocm"* ]]; then
            hostname
            export id=$(docker run --env-file "${BASH_ENV}" --cap-add=SYS_PTRACE --security-opt seccomp=unconfined --shm-size=8g --ipc=host --device /dev/kfd --device /dev/dri --group-add video -t -d -w /var/lib/jenkins ${COMMIT_DOCKER_IMAGE})
          else
            export id=$(docker run --env-file "${BASH_ENV}" --cap-add=SYS_PTRACE --security-opt seccomp=unconfined --shm-size=1g --ipc=host -t -d -w /var/lib/jenkins ${COMMIT_DOCKER_IMAGE})
          fi
          echo "id=${id}" >> "${BASH_ENV}"

    - run:
        name: Check for no AVX instruction by default
        no_output_timeout: "20m"
        command: |
          set -e
          is_vanilla_build() {
            if [ "${BUILD_ENVIRONMENT}" == "pytorch-linux-bionic-py3.6-clang9-test" ]; then
              return 0
            fi
            if [ "${BUILD_ENVIRONMENT}" == "pytorch-linux-xenial-py3.6-gcc5.4-test" ]; then
              return 0
            fi
            return 1
          }

          if is_vanilla_build; then
            echo "apt-get update && apt-get install -y qemu-user gdb" | docker exec -u root -i "$id" bash
            echo "cd workspace/build; qemu-x86_64 -g 2345 -cpu Broadwell -E ATEN_CPU_CAPABILITY=default ./bin/basic --gtest_filter=BasicTest.BasicTestCPU & gdb ./bin/basic -ex 'set pagination off' -ex 'target remote :2345' -ex 'continue' -ex 'bt' -ex='set confirm off' -ex 'quit \$_isvoid(\$_exitcode)'" | docker exec -u jenkins -i "$id" bash
          else
            echo "Skipping for ${BUILD_ENVIRONMENT}"
          fi
    - run:
        name: Run tests
        no_output_timeout: "90m"
        command: |
          set -e

          cat >docker_commands.sh \<<EOL
          # =================== The following code will be executed inside Docker container ===================
          set -ex
          export SCRIBE_GRAPHQL_ACCESS_TOKEN="${SCRIBE_GRAPHQL_ACCESS_TOKEN}"
          ${PARALLEL_FLAGS}
          cd workspace
          EOL
          if [[ ${BUILD_ENVIRONMENT} == *"multigpu"* ]]; then
            echo ".jenkins/pytorch/multigpu-test.sh" >> docker_commands.sh
          elif [[ ${BUILD_ENVIRONMENT} == *onnx* ]]; then
            echo "pip install click mock tabulate networkx==2.0" >> docker_commands.sh
            echo "pip -q install --user \"file:///var/lib/jenkins/workspace/third_party/onnx#egg=onnx\"" >> docker_commands.sh
            echo ".jenkins/caffe2/test.sh" >> docker_commands.sh
          else
            echo ".jenkins/pytorch/test.sh" >> docker_commands.sh
          fi
          echo "(cat docker_commands.sh | docker exec -u jenkins -i "$id" bash) 2>&1" > command.sh
          unbuffer bash command.sh | ts
    - run:
        name: Report results
        no_output_timeout: "5m"
        command: |
          set -e
          docker stats --all --no-stream

          cat >docker_commands.sh \<<EOL
          # =================== The following code will be executed inside Docker container ===================
          set -ex
          export BUILD_ENVIRONMENT=${BUILD_ENVIRONMENT}
          export SCRIBE_GRAPHQL_ACCESS_TOKEN="${SCRIBE_GRAPHQL_ACCESS_TOKEN}"
          export CIRCLE_TAG="${CIRCLE_TAG:-}"
          export CIRCLE_SHA1="$CIRCLE_SHA1"
          export CIRCLE_PR_NUMBER="${CIRCLE_PR_NUMBER:-}"
          export CIRCLE_BRANCH="$CIRCLE_BRANCH"
          export CIRCLE_JOB="$CIRCLE_JOB"
          export CIRCLE_WORKFLOW_ID="$CIRCLE_WORKFLOW_ID"
          cd workspace
          python test/print_test_stats.py --upload-to-s3 test
          EOL
          echo "(cat docker_commands.sh | docker exec -u jenkins -i "$id" bash) 2>&1" > command.sh
          unbuffer bash command.sh | ts

          echo "Retrieving test reports"
          docker cp $id:/var/lib/jenkins/workspace/test/test-reports ./ || echo 'No test reports found!'
          if [[ ${BUILD_ENVIRONMENT} == *"coverage"* ]]; then
              echo "Retrieving C++ coverage report"
              docker cp $id:/var/lib/jenkins/workspace/build/coverage.info ./test
          fi
          if [[ ${BUILD_ENVIRONMENT} == *"coverage"* || ${BUILD_ENVIRONMENT} == *"onnx"* ]]; then
              echo "Retrieving Python coverage report"
              docker cp $id:/var/lib/jenkins/workspace/test/.coverage ./test
              docker cp $id:/var/lib/jenkins/workspace/test/coverage.xml ./test
              python3 -mpip install codecov
              python3 -mcodecov
          fi
        when: always
    - store_test_results:
        path: test-reports

  pytorch_windows_build:
    <<: *pytorch_windows_params
    parameters:
      executor:
        type: string
        default: "windows-xlarge-cpu-with-nvidia-cuda"
      build_environment:
        type: string
        default: ""
      test_name:
        type: string
        default: ""
      cuda_version:
        type: string
        default: "10.1"
      python_version:
        type: string
        default: "3.6"
      vc_version:
        type: string
        default: "14.16"
      vc_year:
        type: string
        default: "2019"
      vc_product:
        type: string
        default: "BuildTools"
      use_cuda:
        type: string
        default: ""
    executor: <<parameters.executor>>
    steps:
      - checkout
      - run:
          name: Install Cuda
          no_output_timeout: 30m
          command: |
            if [[ "${USE_CUDA}" == "1" ]]; then
              .circleci/scripts/windows_cuda_install.sh
            fi
      - run:
          name: Install Cudnn
          command : |
            if [[ "${USE_CUDA}" == "1" ]]; then
              .circleci/scripts/windows_cudnn_install.sh
            fi
      - run:
          name: Build
          no_output_timeout: "90m"
          command: |
            set -e
            set +x
            export AWS_ACCESS_KEY_ID=${CIRCLECI_AWS_ACCESS_KEY_FOR_WIN_BUILD_V1}
            export AWS_SECRET_ACCESS_KEY=${CIRCLECI_AWS_SECRET_KEY_FOR_WIN_BUILD_V1}
            set -x
            .jenkins/pytorch/win-build.sh
      - persist_to_workspace:
          root: "C:/w"
          paths: build-results
      - store_artifacts:
          path: C:/w/build-results

  pytorch_windows_test:
    <<: *pytorch_windows_params
    parameters:
      executor:
        type: string
        default: "windows-medium-cpu-with-nvidia-cuda"
      build_environment:
        type: string
        default: ""
      test_name:
        type: string
        default: ""
      cuda_version:
        type: string
        default: "10.1"
      python_version:
        type: string
        default: "3.6"
      vc_version:
        type: string
        default: "14.16"
      vc_year:
        type: string
        default: "2019"
      vc_product:
        type: string
        default: "BuildTools"
      use_cuda:
        type: string
        default: ""
    executor: <<parameters.executor>>
    steps:
      - checkout
      - attach_workspace:
          at: c:/users/circleci/workspace
      - run:
          name: Install Cuda
          no_output_timeout: 30m
          command: |
            if [[ "${CUDA_VERSION}" != "cpu" ]]; then
              if [[ "${CUDA_VERSION}" != "10" || "${JOB_EXECUTOR}" != "windows-with-nvidia-gpu" ]]; then
                .circleci/scripts/windows_cuda_install.sh
              fi
            fi
      - run:
          name: Install Cudnn
          command : |
            if [[ "${CUDA_VERSION}" != "cpu" ]]; then
              .circleci/scripts/windows_cudnn_install.sh
            fi
      - run:
          name: Test
          no_output_timeout: "30m"
          command: |
            set -e
            export IN_CI=1
            set +x
            export AWS_ACCESS_KEY_ID=${CIRCLECI_AWS_ACCESS_KEY_FOR_WIN_BUILD_V1}
            export AWS_SECRET_ACCESS_KEY=${CIRCLECI_AWS_SECRET_KEY_FOR_WIN_BUILD_V1}
            set -x
            .jenkins/pytorch/win-test.sh
      - store_test_results:
          path: test/test-reports