Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[T155306323] [WIP] Add ARM Support in OSS CI #1813

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions .github/scripts/fbgemm_gpu_build.bash
Original file line number Diff line number Diff line change
Expand Up @@ -294,14 +294,17 @@ build_fbgemm_gpu_package () {
echo "################################################################################"
echo ""

# manylinux1_x86_64 is specified for PyPI upload
# manylinux2014 is specified, bc manylinux1 does not support aarch64
# See https://github.com/pypa/manylinux
local plat_name="manylinux2014_${MACHINE_NAME}"

# Distribute Python extensions as wheels on Linux
echo "[BUILD] Building FBGEMM-GPU wheel (VARIANT=${fbgemm_variant}) ..."
print_exec conda run -n "${env_name}" \
python setup.py bdist_wheel \
--package_name="${package_name}" \
--python-tag="${python_tag}" \
--plat-name="manylinux1_${MACHINE_NAME}" \
--plat-name="${plat_name}" \
"${build_args[@]}"

# Run checks on the built libraries
Expand Down
1 change: 1 addition & 0 deletions .github/scripts/utils_system.bash
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,7 @@ __print_system_info_linux () {
echo "################################################################################"
echo "[INFO] Print Linux distribution info ..."
print_exec uname -a
print_exec uname -m
print_exec cat /proc/version
print_exec cat /etc/os-release
}
Expand Down
5 changes: 4 additions & 1 deletion .github/workflows/fbgemm_ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ concurrency:

jobs:
build-linux:
runs-on: linux.12xlarge
runs-on: ${{ matrix.host-machine }}
container:
image: ${{ matrix.container-image }}
options: --user root
Expand All @@ -34,6 +34,9 @@ jobs:
strategy:
fail-fast: false
matrix:
host-machine: [
"linux.12xlarge", # x86 machine
]
container-image: [ "ubuntu:20.04" ]
library-type: [ static, shared ]

Expand Down
20 changes: 15 additions & 5 deletions .github/workflows/fbgemm_gpu_ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ concurrency:

jobs:
build_and_test_amd:
runs-on: linux.12xlarge
runs-on: ${{ matrix.host-machine.instance }}
container:
image: ${{ matrix.container-image }}
options: --user root
Expand All @@ -42,6 +42,9 @@ jobs:
strategy:
fail-fast: false
matrix:
host-machine: [
{ arch: x86, instance: "linux.12xlarge" },
]
container-image: [ "ubuntu:20.04" ]
python-version: [ "3.8", "3.9", "3.10" ]
rocm-version: [ "5.3", "5.4.2" ]
Expand Down Expand Up @@ -94,7 +97,7 @@ jobs:


test_amd_gpu:
runs-on: rocm
runs-on: ${{ matrix.host-machine.instance }}
container:
image: "rocm/dev-ubuntu-20.04:${{ matrix.rocm-version }}-complete"
options: --user root --device=/dev/kfd --device=/dev/dri --ipc=host --shm-size 16G --group-add video --cap-add=SYS_PTRACE --security-opt seccomp=unconfined
Expand All @@ -108,6 +111,9 @@ jobs:
strategy:
fail-fast: false
matrix:
host-machine: [
{ arch: x86, instance: "rocm" },
]
# ROCm machines are limited, so we only test against Python 3.10
python-version: [ "3.10" ]
rocm-version: [ "5.3", "5.4.2" ]
Expand Down Expand Up @@ -157,7 +163,7 @@ jobs:


build_and_test_cpu:
runs-on: linux.12xlarge
runs-on: ${{ matrix.host-machine.instance }}
container:
image: ${{ matrix.container-image }}
options: --user root
Expand All @@ -170,8 +176,12 @@ jobs:
strategy:
fail-fast: false
matrix:
host-machine: [
{ arch: x86, instance: "linux.4xlarge" },
{ arch: arm, instance: "linux.t4g.2xlarge" },
]
container-image: [ "ubuntu:20.04", "ubuntu:22.04" ]
python-version: [ "3.8", "3.9", "3.10" ]
python-version: [ "3.8", "3.9", "3.10", "3.11" ]

steps:
- name: Setup Build Container
Expand Down Expand Up @@ -210,5 +220,5 @@ jobs:
run: . $PRELUDE; cd fbgemm_gpu; build_fbgemm_gpu_install $BUILD_ENV cpu

- name: Test FBGEMM_GPU-CPU Nightly Installation
timeout-minutes: 10
timeout-minutes: 15
run: . $PRELUDE; cd fbgemm_gpu/test; run_fbgemm_gpu_tests $BUILD_ENV cpu
22 changes: 15 additions & 7 deletions .github/workflows/fbgemm_gpu_cpu_nightly.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ concurrency:
jobs:
# Build on CPU hosts, run tests, and upload to GHA
build_artifact:
runs-on: linux.4xlarge
runs-on: ${{ matrix.host-machine.instance }}
container:
image: amazonlinux:2023
options: --user root
Expand All @@ -54,6 +54,10 @@ jobs:
# Don't fast-fail all the other builds if one of the them fails
fail-fast: false
matrix:
host-machine: [
{ arch: x86, instance: "linux.4xlarge" },
{ arch: arm, instance: "linux.t4g.2xlarge" },
]
python-version: [ "3.8", "3.9", "3.10", "3.11" ]

steps:
Expand Down Expand Up @@ -84,7 +88,7 @@ jobs:
run: . $PRELUDE; install_build_tools $BUILD_ENV

- name: Install PyTorch-CPU Nightly
run: . $PRELUDE; install_pytorch_conda $BUILD_ENV nightly cpu
run: . $PRELUDE; install_pytorch_pip $BUILD_ENV nightly cpu

- name: Prepare FBGEMM_GPU Build
run: . $PRELUDE; cd fbgemm_gpu; prepare_fbgemm_gpu_build $BUILD_ENV
Expand All @@ -95,13 +99,13 @@ jobs:
- name: Upload Built Wheel as GHA Artifact
uses: actions/upload-artifact@v3
with:
name: fbgemm_gpu_nightly_cpu_${{ matrix.python-version }}.whl
name: fbgemm_gpu_nightly_cpu_${{ matrix.host-machine.arch }}_${{ matrix.python-version }}.whl
path: fbgemm_gpu/dist/fbgemm_gpu_nightly_cpu-*.whl


# Download the built artifact from GHA, test on GPU, and push to PyPI
test_and_publish_artifact:
runs-on: linux.4xlarge
runs-on: ${{ matrix.host-machine.instance }}
container:
image: amazonlinux:2023
options: --user root
Expand All @@ -114,6 +118,10 @@ jobs:
strategy:
fail-fast: false
matrix:
host-machine: [
{ arch: x86, instance: "linux.4xlarge" },
{ arch: arm, instance: "linux.t4g.2xlarge" },
]
python-version: [ "3.8", "3.9", "3.10", "3.11" ]
needs: build_artifact

Expand All @@ -129,7 +137,7 @@ jobs:
- name: Download Wheel Artifact from GHA
uses: actions/download-artifact@v3
with:
name: fbgemm_gpu_nightly_cpu_${{ matrix.python-version }}.whl
name: fbgemm_gpu_nightly_cpu_${{ matrix.host-machine.arch }}_${{ matrix.python-version }}.whl

- name: Display System Info
run: . $PRELUDE; print_system_info; print_ec2_info
Expand All @@ -143,8 +151,8 @@ jobs:
- name: Create Conda Environment
run: . $PRELUDE; create_conda_environment $BUILD_ENV ${{ matrix.python-version }}

- name: Install PyTorch Nightly
run: . $PRELUDE; install_pytorch_conda $BUILD_ENV nightly cpu
- name: Install PyTorch-CPU Nightly
run: . $PRELUDE; install_pytorch_pip $BUILD_ENV nightly cpu

- name: Prepare FBGEMM_GPU Build
run: . $PRELUDE; cd fbgemm_gpu; prepare_fbgemm_gpu_build $BUILD_ENV
Expand Down
20 changes: 14 additions & 6 deletions .github/workflows/fbgemm_gpu_cpu_release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ concurrency:
jobs:
# Build on CPU hosts, run tests, and upload to GHA
build_artifact:
runs-on: linux.4xlarge
runs-on: ${{ matrix.host-machine.instance }}
container:
image: amazonlinux:2023
options: --user root
Expand All @@ -45,6 +45,10 @@ jobs:
# Don't fast-fail all the other builds if one of the them fails
fail-fast: false
matrix:
host-machine: [
{ arch: x86, instance: "linux.4xlarge" },
{ arch: arm, instance: "linux.t4g.2xlarge" },
]
python-version: [ "3.8", "3.9", "3.10", "3.11" ]

steps:
Expand Down Expand Up @@ -75,7 +79,7 @@ jobs:
run: . $PRELUDE; install_build_tools $BUILD_ENV

- name: Install PyTorch-CPU Test
run: . $PRELUDE; install_pytorch_conda $BUILD_ENV test cpu
run: . $PRELUDE; install_pytorch_pip $BUILD_ENV test cpu

- name: Prepare FBGEMM_GPU Build
run: . $PRELUDE; cd fbgemm_gpu; prepare_fbgemm_gpu_build $BUILD_ENV
Expand All @@ -86,13 +90,13 @@ jobs:
- name: Upload Built Wheel as GHA Artifact
uses: actions/upload-artifact@v3
with:
name: fbgemm_gpu_cpu_${{ matrix.python-version }}.whl
name: fbgemm_gpu_release_cpu_${{ matrix.host-machine.arch }}_${{ matrix.python-version }}.whl
path: fbgemm_gpu/dist/fbgemm_gpu_cpu-*.whl


# Download the built artifact from GHA, test on GPU, and push to PyPI
test_and_publish_artifact:
runs-on: linux.4xlarge
runs-on: ${{ matrix.host-machine.instance }}
container:
image: amazonlinux:2023
options: --user root
Expand All @@ -105,6 +109,10 @@ jobs:
strategy:
fail-fast: false
matrix:
host-machine: [
{ arch: x86, instance: "linux.4xlarge" },
{ arch: arm, instance: "linux.t4g.2xlarge" },
]
python-version: [ "3.8", "3.9", "3.10", "3.11" ]
needs: build_artifact

Expand All @@ -120,7 +128,7 @@ jobs:
- name: Download Wheel Artifact from GHA
uses: actions/download-artifact@v3
with:
name: fbgemm_gpu_cpu_${{ matrix.python-version }}.whl
name: fbgemm_gpu_release_cpu_${{ matrix.host-machine.arch }}_${{ matrix.python-version }}.whl

- name: Display System Info
run: . $PRELUDE; print_system_info; print_ec2_info
Expand All @@ -135,7 +143,7 @@ jobs:
run: . $PRELUDE; create_conda_environment $BUILD_ENV ${{ matrix.python-version }}

- name: Install PyTorch Test
run: . $PRELUDE; install_pytorch_conda $BUILD_ENV test cpu
run: . $PRELUDE; install_pytorch_pip $BUILD_ENV test cpu

- name: Prepare FBGEMM_GPU Build
run: . $PRELUDE; cd fbgemm_gpu; prepare_fbgemm_gpu_build $BUILD_ENV
Expand Down
14 changes: 10 additions & 4 deletions .github/workflows/fbgemm_gpu_cuda_nightly.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ concurrency:
jobs:
# Build on CPU hosts and upload to GHA
build_artifact:
runs-on: linux.24xlarge
runs-on: ${{ matrix.host-machine.instance }}
container:
image: amazonlinux:2023
options: --user root
Expand All @@ -53,6 +53,9 @@ jobs:
# Don't fast-fail all the other builds if one of the them fails
fail-fast: false
matrix:
host-machine: [
{ arch: x86, instance: "linux.24xlarge" },
]
python-version: [ "3.8", "3.9", "3.10", "3.11" ]
cuda-version: [ "11.7.1", "11.8.0" ]

Expand Down Expand Up @@ -102,15 +105,15 @@ jobs:
- name: Upload Built Wheel as GHA Artifact
uses: actions/upload-artifact@v3
with:
name: fbgemm_gpu_nightly_${{ matrix.python-version }}_cuda${{ matrix.cuda-version }}.whl
name: fbgemm_gpu_nightly_cuda_${{ matrix.host-machine.arch }}_${{ matrix.python-version }}_cu${{ matrix.cuda-version }}.whl
path: fbgemm_gpu/dist/fbgemm_gpu_nightly-*.whl


# Download the built artifact from GHA, test on GPU, and push to PyPI
test_and_publish_artifact:
# runs-on: linux.4xlarge.nvidia.gpu
# Use available instance types - https://github.com/pytorch/test-infra/blob/main/.github/scale-config.yml
runs-on: linux.g5.4xlarge.nvidia.gpu
runs-on: ${{ matrix.host-machine.instance }}
defaults:
run:
shell: bash
Expand All @@ -121,6 +124,9 @@ jobs:
strategy:
fail-fast: false
matrix:
host-machine: [
{ arch: x86, instance: "linux.g5.4xlarge.nvidia.gpu" },
]
python-version: [ "3.8", "3.9", "3.10", "3.11" ]
cuda-version: [ "11.7.1", "11.8.0" ]
# Specify exactly ONE CUDA version for artifact publish
Expand All @@ -136,7 +142,7 @@ jobs:
- name: Download Wheel Artifact from GHA
uses: actions/download-artifact@v3
with:
name: fbgemm_gpu_nightly_${{ matrix.python-version }}_cuda${{ matrix.cuda-version }}.whl
name: fbgemm_gpu_nightly_cuda_${{ matrix.host-machine.arch }}_${{ matrix.python-version }}_cu${{ matrix.cuda-version }}.whl

# Use PyTorch test infrastructure action - https://github.com/pytorch/test-infra/blob/main/.github/actions/setup-nvidia/action.yml
- name: Install NVIDIA Drivers and NVIDIA-Docker Runtime
Expand Down
14 changes: 10 additions & 4 deletions .github/workflows/fbgemm_gpu_cuda_release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ concurrency:
jobs:
# Build on CPU hosts and upload to GHA
build_artifact:
runs-on: linux.24xlarge
runs-on: ${{ matrix.host-machine.instance }}
container:
image: amazonlinux:2023
options: --user root
Expand All @@ -45,6 +45,9 @@ jobs:
# Don't fast-fail all the other builds if one of the them fails
fail-fast: false
matrix:
host-machine: [
{ arch: x86, instance: "linux.24xlarge" },
]
python-version: [ "3.8", "3.9", "3.10", "3.11" ]
cuda-version: [ "11.7.1", "11.8.0" ]

Expand Down Expand Up @@ -93,13 +96,13 @@ jobs:
- name: Upload Built Wheel as GHA Artifact
uses: actions/upload-artifact@v3
with:
name: fbgemm_gpu_${{ matrix.python-version }}_cuda${{ matrix.cuda-version }}.whl
name: fbgemm_gpu_release_cuda_${{ matrix.host-machine.arch }}_${{ matrix.python-version }}_cu${{ matrix.cuda-version }}.whl
path: fbgemm_gpu/dist/fbgemm_gpu-*.whl


# Download the built artifact from GHA, test on GPU, and push to PyPI
test_and_publish_artifact:
runs-on: linux.g5.4xlarge.nvidia.gpu
runs-on: ${{ matrix.host-machine.instance }}
defaults:
run:
shell: bash
Expand All @@ -110,6 +113,9 @@ jobs:
strategy:
fail-fast: false
matrix:
host-machine: [
{ arch: x86, instance: "linux.g5.4xlarge.nvidia.gpu" },
]
python-version: [ "3.8", "3.9", "3.10", "3.11" ]
cuda-version: [ "11.7.1", "11.8.0" ]
# Specify exactly ONE CUDA version for artifact publish
Expand All @@ -125,7 +131,7 @@ jobs:
- name: Download Wheel Artifact from GHA
uses: actions/download-artifact@v3
with:
name: fbgemm_gpu_${{ matrix.python-version }}_cuda${{ matrix.cuda-version }}.whl
name: fbgemm_gpu_release_cuda_${{ matrix.host-machine.arch }}_${{ matrix.python-version }}_cu${{ matrix.cuda-version }}.whl

- name: Install NVIDIA Drivers and NVIDIA-Docker Runtime
uses: pytorch/test-infra/.github/actions/setup-nvidia@main
Expand Down
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
# Add address sanitizer
set(USE_SANITIZER "" CACHE STRING "options include address, leak, ...")

# Check if compiler supports avx512
# Check if compiler supports AVX512
include(CheckCXXCompilerFlag)
if(MSVC)
CHECK_CXX_COMPILER_FLAG(/arch:AVX512 COMPILER_SUPPORTS_AVX512)
Expand Down
3 changes: 2 additions & 1 deletion fbgemm_gpu/src/jagged_tensor_ops_autograd.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,13 @@

#include <ATen/ATen.h>
#include <ATen/AccumulateType.h>
#include <ATen/TensorUtils.h>
#include <ATen/core/dispatch/Dispatcher.h>
#include <c10/core/SymIntArrayRef.h>
#include <torch/csrc/autograd/custom_function.h>
#include <torch/library.h>
#include <torch/torch.h>

#include "ATen/TensorUtils.h"
#include "fbgemm_gpu/sparse_ops.h"
#include "fbgemm_gpu/sparse_ops_utils.h"

Expand Down
Loading