Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 8 additions & 12 deletions .github/scripts/filter-matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
jetpack_cuda_versions: List[str] = ["cu126"]

jetpack_container_image: str = "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
sbsa_container_image: str = "quay.io/pypa/manylinux_2_34_aarch64"
sbsa_container_image: str = "quay.io/pypa/manylinux_2_39_aarch64"


def validate_matrix(matrix_dict: Dict[str, Any]) -> None:
Expand All @@ -41,19 +41,15 @@ def filter_matrix_item(
# Skipping disabled CUDA version
return False
if is_jetpack:
if limit_pr_builds:
# pr build,matrix passed from test-infra is cu128, python 3.9, change to cu126, python 3.10
item["desired_cuda"] = "cu126"
item["python_version"] = "3.10"
# pr build,matrix passed from test-infra is cu126,cu128 and cu130, python 3.10, filter to cu126, python 3.10
# nightly/main build, matrix passed from test-infra is cu126, cu128 and cu130, all python versions, filter to cu126, python 3.10
if (
item["python_version"] in jetpack_python_versions
and item["desired_cuda"] in jetpack_cuda_versions
):
item["container_image"] = jetpack_container_image
return True
else:
# nightly/main build, matrix passed from test-infra is cu128, all python versions, change to cu126, python 3.10
if item["python_version"] in jetpack_python_versions:
item["desired_cuda"] = "cu126"
item["container_image"] = jetpack_container_image
return True
return False
return False
else:
if item["gpu_arch_type"] == "cuda-aarch64":
# pytorch image:pytorch/manylinuxaarch64-builder:cuda12.8 comes with glibc2.28
Expand Down
41 changes: 30 additions & 11 deletions .github/scripts/install-cuda-aarch64.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,23 +8,42 @@ install_cuda_aarch64() {
# CUDA_MAJOR_VERSION: cu128 --> 12
CUDA_MAJOR_VERSION=${CU_VERSION:2:2}
dnf config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/sbsa/cuda-rhel8.repo
# nccl version must match libtorch_cuda.so was built with https://github.com/pytorch/pytorch/blob/main/.ci/docker/ci_commit_pins/nccl-cu12.txt
dnf -y install cuda-compiler-${CU_VER}.aarch64 \

# nccl version must match libtorch_cuda.so was built with
if [[ ${CU_VERSION:0:4} == "cu12" ]]; then
# cu12: https://github.com/pytorch/pytorch/blob/main/.ci/docker/ci_commit_pins/nccl-cu12.txt
if [[ ${CU_VERSION} == "cu128" ]]; then
nccl_version="2.26.2-1"
elif [[ ${CU_VERSION} == "cu126" ]]; then
nccl_version="2.24.3-1"
else
# removed cu129 support from pytorch upstream
echo "Unsupported CUDA version: ${CU_VERSION}"
exit 1
fi
elif [[ ${CU_VERSION:0:4} == "cu13" ]]; then
# cu13: https://github.com/pytorch/pytorch/blob/main/.ci/docker/ci_commit_pins/nccl-cu13.txt
nccl_version="2.27.7-1"
fi

dnf --nogpgcheck -y install cuda-compiler-${CU_VER}.aarch64 \
cuda-libraries-${CU_VER}.aarch64 \
cuda-libraries-devel-${CU_VER}.aarch64 \
libnccl-2.27.3-1+cuda${CU_DOT_VER} libnccl-devel-2.27.3-1+cuda${CU_DOT_VER} libnccl-static-2.27.3-1+cuda${CU_DOT_VER}
libnccl-${nccl_version}+cuda${CU_DOT_VER} libnccl-devel-${nccl_version}+cuda${CU_DOT_VER} libnccl-static-${nccl_version}+cuda${CU_DOT_VER}
dnf clean all

nvshmem_version=3.3.9
# nvshmem version is from https://github.com/pytorch/pytorch/blob/f9fa138a3910bd1de1e7acb95265fa040672a952/.ci/docker/common/install_cuda.sh#L67
nvshmem_version=3.3.24
nvshmem_path="https://developer.download.nvidia.com/compute/redist/nvshmem/${nvshmem_version}/builds/cuda${CUDA_MAJOR_VERSION}/txz/agnostic/aarch64"
nvshmem_filename="libnvshmem_cuda12-linux-sbsa-${nvshmem_version}.tar.gz"
curl -L ${nvshmem_path}/${nvshmem_filename} -o nvshmem.tar.gz
tar -xzf nvshmem.tar.gz
cp -a libnvshmem/lib/* /usr/local/cuda/lib64/
cp -a libnvshmem/include/* /usr/local/cuda/include/
rm -rf nvshmem.tar.gz nvshmem
nvshmem_prefix="libnvshmem-linux-sbsa-${nvshmem_version}_cuda${CUDA_MAJOR_VERSION}-archive"
nvshmem_tarname="${nvshmem_prefix}.tar.xz"
curl -L ${nvshmem_path}/${nvshmem_tarname} -o nvshmem.tar.xz
tar -xJf nvshmem.tar.xz
cp -a ${nvshmem_prefix}/lib/* /usr/local/cuda/lib64/
cp -a ${nvshmem_prefix}/include/* /usr/local/cuda/include/
rm -rf nvshmem.tar.xz ${nvshmem_prefix}
echo "nvshmem ${nvshmem_version} for cuda ${CUDA_MAJOR_VERSION} installed successfully"

export PATH=/usr/local/cuda/bin:$PATH
export LD_LIBRARY_PATH=/usr/local/cuda/lib64:/usr/local/cuda/include:/usr/lib64:$LD_LIBRARY_PATH
ls -lart /usr/local/
nvcc --version
Expand Down
12 changes: 12 additions & 0 deletions .github/scripts/install-cuda-dss.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# for now we only need to install cuda_dss for jetpack
install_cuda_dss_aarch64() {
echo "install cuda_dss for ${CU_VERSION}"
arch_path='sbsa'
# version is from https://github.com/pytorch/pytorch/blob/22c5e8c17c7551c9dd2855589ae774c1e147343a/.ci/docker/common/install_cudss.sh
CUDSS_NAME="libcudss-linux-${arch_path}-0.3.0.9_cuda12-archive"
curl --retry 3 -OLs https://developer.download.nvidia.com/compute/cudss/redist/libcudss/linux-${arch_path}/${CUDSS_NAME}.tar.xz
# only for cuda 12
tar xf ${CUDSS_NAME}.tar.xz
cp -a ${CUDSS_NAME}/include/* /usr/local/cuda/include/
cp -a ${CUDSS_NAME}/lib/* /usr/local/cuda/lib64/
}
4 changes: 2 additions & 2 deletions .github/workflows/build_linux.yml
Original file line number Diff line number Diff line change
Expand Up @@ -348,8 +348,8 @@ jobs:
source "${BUILD_ENV_FILE}"
WHEEL_NAME=$(ls "${{ inputs.repository }}/dist/")
echo "$WHEEL_NAME"
if [[ ${{ inputs.is-jetpack }} == true ]]; then
echo "Skipping smoke test for jetpack, since it is not the actual jetpack environment"
if [[ ${{ inputs.architecture }} == "aarch64" ]]; then
echo "Skipping smoke test for aarch64, since it is not an actual gpu runner"
else
${CONDA_RUN} pip install "${{ inputs.repository }}/dist/$WHEEL_NAME"
# Checking that we have a pinned version of torch in our dependency tree
Expand Down
2 changes: 1 addition & 1 deletion MODULE.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ new_local_repository(
new_local_repository(
name = "cuda_win",
build_file = "@//third_party/cuda:BUILD",
path = "C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v12.9/",
path = "C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v13.0/",
)

http_archive = use_repo_rule("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
Expand Down
6 changes: 5 additions & 1 deletion packaging/pre_build_script.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,15 @@ if [[ $(uname -m) == "aarch64" ]]; then
if [[ ${os_name} == "ubuntu" ]]; then
IS_JETPACK=true
apt-get update
apt-get install -y ninja-build gettext curl libopenblas-dev zip unzip
apt-get install -y ninja-build gettext curl libopenblas-dev zip unzip libfmt-dev
else
IS_SBSA=true
yum install -y ninja-build gettext zip unzip
yum install -y fmt-devel
fi
else
BAZEL_PLATFORM="amd64"
yum install -y fmt-devel
fi


Expand All @@ -43,6 +45,8 @@ pip uninstall -y torch torchvision

if [[ ${IS_JETPACK} == true ]]; then
# install torch 2.8 for jp6.2
source .github/scripts/install-cuda-dss.sh
install_cuda_dss_aarch64
pip install torch==2.8.0 --index-url=https://pypi.jetson-ai-lab.io/jp6/cu126/
else
TORCH=$(grep "^torch>" py/requirements.txt)
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ def load_dep_info():

dir_path = os.path.join(str(get_root_dir()), "py")

IS_AARCH64 = platform.uname().processor == "aarch64"
IS_AARCH64 = platform.machine() == "aarch64"
IS_JETPACK = False

PY_ONLY = False
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -242,17 +242,17 @@ def test_hierarchical_adjacency_partition_with_two_backends_with_torch_executed_
)
from torch_tensorrt.dynamo.lowering import (
get_decompositions,
post_lowering,
pre_export_lowering,
)

model = self.SimpleModel().cuda().eval()
example_input = torch.randn(1, 3, 224, 224).cuda()

exported_program = torch.export.export(model, (example_input,))
exported_program = pre_export_lowering(exported_program)
exported_program = exported_program.run_decompositions(get_decompositions())
gm = exported_program.module()

gm = post_lowering(gm)
partitioned_graph, _ = partitioning.hierarchical_adjacency_partition(
gm,
min_block_size=1,
Expand Down
2 changes: 1 addition & 1 deletion toolchains/ci_workspaces/MODULE.bazel.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ new_local_repository(
new_local_repository(
name = "cuda_l4t",
build_file = "@//third_party/cuda:BUILD",
path = "/usr/local/cuda-12.9",
path = "/usr/local/cuda-12.6",
)

new_local_repository(
Expand Down
Loading