Skip to content

Commit

Permalink
CUDA 12.4 ARM wheel integration to CD - nightly build (#126174)
Browse files Browse the repository at this point in the history
rebasing #124112.
too many conflict files, so starting a new PR.

Test pytorch/builder#1775 (merged) for ARM wheel addition
Test pytorch/builder#1828 (merged) for setting MAX_JOBS

Current issue to follow up:
#126980

Co-authored-by: Aidyn-A <aidyn.b.aitzhan@gmail.com>
Pull Request resolved: #126174
Approved by: https://github.com/nWEIdia, https://github.com/atalman
  • Loading branch information
tinglvv authored and pytorchmergebot committed May 27, 2024
1 parent 7763c83 commit 1c2e221
Show file tree
Hide file tree
Showing 5 changed files with 289 additions and 29 deletions.
50 changes: 35 additions & 15 deletions .github/scripts/generate_binary_build_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@
CPU_S390X_ARCH = ["cpu-s390x"]


CUDA_AARCH64_ARCH = ["cuda-aarch64"]


PYTORCH_EXTRA_INSTALL_REQUIREMENTS = {
"11.8": (
"nvidia-cuda-nvrtc-cu11==11.8.89; platform_system == 'Linux' and platform_machine == 'x86_64' | " # noqa: B950
Expand Down Expand Up @@ -135,6 +138,8 @@ def arch_type(arch_version: str) -> str:
return "cpu-aarch64"
elif arch_version in CPU_S390X_ARCH:
return "cpu-s390x"
elif arch_version in CUDA_AARCH64_ARCH:
return "cuda-aarch64"
else: # arch_version should always be "cpu" in this case
return "cpu"

Expand All @@ -155,6 +160,7 @@ def arch_type(arch_version: str) -> str:
"cpu-cxx11-abi": f"pytorch/manylinuxcxx11-abi-builder:cpu-cxx11-abi-{DEFAULT_TAG}",
"cpu-aarch64": f"pytorch/manylinuxaarch64-builder:cpu-aarch64-{DEFAULT_TAG}",
"cpu-s390x": f"pytorch/manylinuxs390x-builder:cpu-s390x-{DEFAULT_TAG}",
"cuda-aarch64": f"pytorch/manylinuxaarch64-builder:cuda12.4-{DEFAULT_TAG}",
}

CONDA_CONTAINER_IMAGES = {
Expand Down Expand Up @@ -213,6 +219,7 @@ def translate_desired_cuda(gpu_arch_type: str, gpu_arch_version: str) -> str:
"cpu-cxx11-abi": "cpu-cxx11-abi",
"cpu-s390x": "cpu",
"cuda": f"cu{gpu_arch_version.replace('.', '')}",
"cuda-aarch64": "cu124",
"rocm": f"rocm{gpu_arch_version}",
}.get(gpu_arch_type, gpu_arch_version)

Expand Down Expand Up @@ -293,11 +300,11 @@ def generate_libtorch_matrix(
"libtorch_variant": libtorch_variant,
"libtorch_config": abi_version if os == "windows" else "",
"devtoolset": abi_version if os != "windows" else "",
"container_image": LIBTORCH_CONTAINER_IMAGES[
(arch_version, abi_version)
]
if os != "windows"
else "",
"container_image": (
LIBTORCH_CONTAINER_IMAGES[(arch_version, abi_version)]
if os != "windows"
else ""
),
"package_type": "libtorch",
"build_name": f"libtorch-{gpu_arch_type}{gpu_arch_version}-{libtorch_variant}-{abi_version}".replace(
".", "_"
Expand Down Expand Up @@ -330,7 +337,7 @@ def generate_wheels_matrix(
elif os == "linux-aarch64":
# Only want the one arch as the CPU type is different and
# uses different build/test scripts
arches = ["cpu-aarch64"]
arches = ["cpu-aarch64", "cuda-aarch64"]
elif os == "linux-s390x":
# Only want the one arch as the CPU type is different and
# uses different build/test scripts
Expand All @@ -346,11 +353,16 @@ def generate_wheels_matrix(
or arch_version == "cpu-cxx11-abi"
or arch_version == "cpu-aarch64"
or arch_version == "cpu-s390x"
or arch_version == "cuda-aarch64"
else arch_version
)

# 12.1 linux wheels require PYTORCH_EXTRA_INSTALL_REQUIREMENTS to install
if arch_version in ["12.4", "12.1", "11.8"] and os == "linux":
if (
arch_version in ["12.4", "12.1", "11.8"]
and os == "linux"
or arch_version == "cuda-aarch64"
):
ret.append(
{
"python_version": python_version,
Expand All @@ -359,10 +371,16 @@ def generate_wheels_matrix(
"desired_cuda": translate_desired_cuda(
gpu_arch_type, gpu_arch_version
),
"devtoolset": "",
"devtoolset": (
"cxx11-abi" if arch_version == "cuda-aarch64" else ""
),
"container_image": WHEEL_CONTAINER_IMAGES[arch_version],
"package_type": package_type,
"pytorch_extra_install_requirements": PYTORCH_EXTRA_INSTALL_REQUIREMENTS[arch_version], # fmt: skip
"pytorch_extra_install_requirements": (
PYTORCH_EXTRA_INSTALL_REQUIREMENTS[arch_version] # fmt: skip
if os != "linux-aarch64"
else ""
),
"build_name": f"{package_type}-py{python_version}-{gpu_arch_type}{gpu_arch_version}".replace( # noqa: B950
".", "_"
),
Expand All @@ -377,17 +395,19 @@ def generate_wheels_matrix(
"desired_cuda": translate_desired_cuda(
gpu_arch_type, gpu_arch_version
),
"devtoolset": "cxx11-abi"
if arch_version == "cpu-cxx11-abi"
else "",
"devtoolset": (
"cxx11-abi" if arch_version == "cpu-cxx11-abi" else ""
),
"container_image": WHEEL_CONTAINER_IMAGES[arch_version],
"package_type": package_type,
"build_name": f"{package_type}-py{python_version}-{gpu_arch_type}{gpu_arch_version}".replace(
".", "_"
),
"pytorch_extra_install_requirements":
PYTORCH_EXTRA_INSTALL_REQUIREMENTS["12.1"] # fmt: skip
if os != "linux" else "",
"pytorch_extra_install_requirements": (
PYTORCH_EXTRA_INSTALL_REQUIREMENTS["12.1"] # fmt: skip
if os != "linux"
else ""
),
}
)
return ret
Expand Down
14 changes: 10 additions & 4 deletions .github/templates/linux_binary_build_workflow.yml.j2
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ jobs:
uses: ./.github/workflows/_binary-build-linux.yml
with:!{{ upload.binary_env_as_input(config) }}
{%- if "aarch64" in build_environment %}
runs_on: linux.arm64.2xlarge
runs_on: linux.arm64.m7g.4xlarge
ALPINE_IMAGE: "arm64v8/alpine"
{%- elif "s390x" in build_environment %}
runs_on: linux.s390x
Expand All @@ -71,12 +71,17 @@ jobs:
{%- if config.pytorch_extra_install_requirements is defined and config.pytorch_extra_install_requirements|d('')|length > 0 %}
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: !{{ config.pytorch_extra_install_requirements }}
{%- endif %}
{%- if config["gpu_arch_type"] == "cuda-aarch64" %}
timeout-minutes: 420
{%- endif %}
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}

{%- if config["gpu_arch_type"] != "cuda-aarch64" %}
!{{ config["build_name"] }}-test: # Testing
if: ${{ github.repository_owner == 'pytorch' }}
needs: !{{ config["build_name"] }}-build
{%- if config["gpu_arch_type"] != "rocm" %}
{%- if config["gpu_arch_type"] != "rocm" %}
uses: ./.github/workflows/_binary-test-linux.yml
with:!{{ upload.binary_env_as_input(config) }}
build_name: !{{ config["build_name"] }}
Expand All @@ -96,7 +101,7 @@ jobs:
{%- endif %}
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
{%- else %}
{%- else %}
runs-on: linux.rocm.gpu
timeout-minutes: !{{ common.timeout_minutes }}
!{{ upload.binary_env(config) }}
Expand All @@ -121,7 +126,8 @@ jobs:
uses: ./pytorch/.github/actions/test-pytorch-binary
- name: Teardown ROCm
uses: ./.github/actions/teardown-rocm
{%- endif %}
{%- endif %}
{%- endif %}

{%- if branches == "nightly" %}
!{{ upload.upload_binaries(config) }}
Expand Down
4 changes: 4 additions & 0 deletions .github/templates/upload.yml.j2
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,11 @@
id-token: write
contents: read
{%- if has_test %}
{%- if config["gpu_arch_type"] == "cuda-aarch64" %}
needs: !{{ config["build_name"] }}-build
{%- else %}
needs: !{{ config["build_name"] }}-test
{%- endif %}
{%- else %}
needs: !{{ config["build_name"] }}-build
{%- endif %}
Expand Down
15 changes: 10 additions & 5 deletions .github/workflows/_binary-build-linux.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,15 @@ on:
type: string
description: The build environment
runs_on:
required: false
default: linux.12xlarge
type: string
description: Hardware to run this "build"job on, linux.12xlarge or linux.arm64.2xlarge.
required: false
default: linux.12xlarge
type: string
description: Hardware to run this "build"job on, linux.12xlarge or linux.arm64.2xlarge.
timeout-minutes:
required: false
default: 210
type: number
description: timeout for the job
ALPINE_IMAGE:
required: false
type: string
Expand Down Expand Up @@ -78,7 +83,7 @@ on:
jobs:
build:
runs-on: ${{ inputs.runs_on }}
timeout-minutes: 210
timeout-minutes: ${{ inputs.timeout-minutes }}
env:
PYTORCH_ROOT: ${{ inputs.PYTORCH_ROOT }}
BUILDER_ROOT: ${{ inputs.BUILDER_ROOT }}
Expand Down
Loading

0 comments on commit 1c2e221

Please sign in to comment.