Skip to content

Commit

Permalink
Update
Browse files Browse the repository at this point in the history
[ghstack-poisoned]
  • Loading branch information
williamwen42 committed May 13, 2024
2 parents 440e4d4 + 69004e6 commit db38b81
Show file tree
Hide file tree
Showing 321 changed files with 10,622 additions and 4,414 deletions.
2 changes: 1 addition & 1 deletion .ci/docker/centos-rocm/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ RUN if [ -n "${DB}" ]; then bash ./install_db.sh; fi
RUN rm install_db.sh
ENV INSTALLED_DB ${DB}

# (optional) Install vision packages like OpenCV and ffmpeg
# (optional) Install vision packages like OpenCV
ARG VISION
COPY ./common/install_vision.sh ./common/cache_vision_models.sh ./common/common_utils.sh ./
RUN if [ -n "${VISION}" ]; then bash ./install_vision.sh; fi
Expand Down
112 changes: 32 additions & 80 deletions .ci/docker/common/install_rocm.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,6 @@ ver() {
printf "%3d%03d%03d%03d" $(echo "$1" | tr '.' ' ');
}

# Map ROCm version to AMDGPU version
declare -A AMDGPU_VERSIONS=( ["5.0"]="21.50" ["5.1.1"]="22.10.1" ["5.2"]="22.20" )

install_ubuntu() {
apt-get update
if [[ $UBUNTU_VERSION == 18.04 ]]; then
Expand All @@ -26,31 +23,14 @@ install_ubuntu() {
apt-get install -y libc++1
apt-get install -y libc++abi1

if [[ $(ver $ROCM_VERSION) -ge $(ver 4.5) ]]; then
# Add amdgpu repository
UBUNTU_VERSION_NAME=`cat /etc/os-release | grep UBUNTU_CODENAME | awk -F= '{print $2}'`
local amdgpu_baseurl
if [[ $(ver $ROCM_VERSION) -ge $(ver 5.3) ]]; then
amdgpu_baseurl="https://repo.radeon.com/amdgpu/${ROCM_VERSION}/ubuntu"
else
amdgpu_baseurl="https://repo.radeon.com/amdgpu/${AMDGPU_VERSIONS[$ROCM_VERSION]}/ubuntu"
fi
echo "deb [arch=amd64] ${amdgpu_baseurl} ${UBUNTU_VERSION_NAME} main" > /etc/apt/sources.list.d/amdgpu.list
fi

ROCM_REPO="ubuntu"
if [[ $(ver $ROCM_VERSION) -lt $(ver 4.2) ]]; then
ROCM_REPO="xenial"
fi

if [[ $(ver $ROCM_VERSION) -ge $(ver 5.3) ]]; then
ROCM_REPO="${UBUNTU_VERSION_NAME}"
fi
# Add amdgpu repository
UBUNTU_VERSION_NAME=`cat /etc/os-release | grep UBUNTU_CODENAME | awk -F= '{print $2}'`
echo "deb [arch=amd64] https://repo.radeon.com/amdgpu/${ROCM_VERSION}/ubuntu ${UBUNTU_VERSION_NAME} main" > /etc/apt/sources.list.d/amdgpu.list

# Add rocm repository
wget -qO - http://repo.radeon.com/rocm/rocm.gpg.key | apt-key add -
local rocm_baseurl="http://repo.radeon.com/rocm/apt/${ROCM_VERSION}"
echo "deb [arch=amd64] ${rocm_baseurl} ${ROCM_REPO} main" > /etc/apt/sources.list.d/rocm.list
echo "deb [arch=amd64] ${rocm_baseurl} ${UBUNTU_VERSION_NAME} main" > /etc/apt/sources.list.d/rocm.list
apt-get update --allow-insecure-repositories

DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated \
Expand All @@ -68,29 +48,18 @@ install_ubuntu() {
# precompiled miopen kernels added in ROCm 3.5, renamed in ROCm 5.5
# search for all unversioned packages
# if search fails it will abort this script; use true to avoid case where search fails
if [[ $(ver $ROCM_VERSION) -ge $(ver 5.5) ]]; then
MIOPENHIPGFX=$(apt-cache search --names-only miopen-hip-gfx | awk '{print $1}' | grep -F -v . || true)
if [[ "x${MIOPENHIPGFX}" = x ]]; then
echo "miopen-hip-gfx package not available" && exit 1
else
DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated ${MIOPENHIPGFX}
fi
MIOPENHIPGFX=$(apt-cache search --names-only miopen-hip-gfx | awk '{print $1}' | grep -F -v . || true)
if [[ "x${MIOPENHIPGFX}" = x ]]; then
echo "miopen-hip-gfx package not available" && exit 1
else
MIOPENKERNELS=$(apt-cache search --names-only miopenkernels | awk '{print $1}' | grep -F -v . || true)
if [[ "x${MIOPENKERNELS}" = x ]]; then
echo "miopenkernels package not available" && exit 1
else
DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated ${MIOPENKERNELS}
fi
DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated ${MIOPENHIPGFX}
fi

# ROCm 6.0 had a regression where journal_mode was enabled on the kdb files resulting in permission errors at runtime
if [[ $(ver $ROCM_VERSION) -ge $(ver 6.0) ]]; then
for kdb in /opt/rocm/share/miopen/db/*.kdb
do
sqlite3 $kdb "PRAGMA journal_mode=off; PRAGMA VACUUM;"
done
fi
for kdb in /opt/rocm/share/miopen/db/*.kdb
do
sqlite3 $kdb "PRAGMA journal_mode=off; PRAGMA VACUUM;"
done

# Cleanup
apt-get autoclean && apt-get clean
Expand All @@ -107,25 +76,19 @@ install_centos() {
yum install -y epel-release
yum install -y dkms kernel-headers-`uname -r` kernel-devel-`uname -r`

if [[ $(ver $ROCM_VERSION) -ge $(ver 4.5) ]]; then
# Add amdgpu repository
local amdgpu_baseurl
if [[ $OS_VERSION == 9 ]]; then
amdgpu_baseurl="https://repo.radeon.com/amdgpu/${AMDGPU_VERSIONS[$ROCM_VERSION]}/rhel/9.0/main/x86_64"
else
if [[ $(ver $ROCM_VERSION) -ge $(ver 5.3) ]]; then
amdgpu_baseurl="https://repo.radeon.com/amdgpu/${ROCM_VERSION}/rhel/7.9/main/x86_64"
else
amdgpu_baseurl="https://repo.radeon.com/amdgpu/${AMDGPU_VERSIONS[$ROCM_VERSION]}/rhel/7.9/main/x86_64"
fi
fi
echo "[AMDGPU]" > /etc/yum.repos.d/amdgpu.repo
echo "name=AMDGPU" >> /etc/yum.repos.d/amdgpu.repo
echo "baseurl=${amdgpu_baseurl}" >> /etc/yum.repos.d/amdgpu.repo
echo "enabled=1" >> /etc/yum.repos.d/amdgpu.repo
echo "gpgcheck=1" >> /etc/yum.repos.d/amdgpu.repo
echo "gpgkey=http://repo.radeon.com/rocm/rocm.gpg.key" >> /etc/yum.repos.d/amdgpu.repo
# Add amdgpu repository
local amdgpu_baseurl
if [[ $OS_VERSION == 9 ]]; then
amdgpu_baseurl="https://repo.radeon.com/amdgpu/${ROCM_VERSION}/rhel/9.0/main/x86_64"
else
amdgpu_baseurl="https://repo.radeon.com/amdgpu/${ROCM_VERSION}/rhel/7.9/main/x86_64"
fi
echo "[AMDGPU]" > /etc/yum.repos.d/amdgpu.repo
echo "name=AMDGPU" >> /etc/yum.repos.d/amdgpu.repo
echo "baseurl=${amdgpu_baseurl}" >> /etc/yum.repos.d/amdgpu.repo
echo "enabled=1" >> /etc/yum.repos.d/amdgpu.repo
echo "gpgcheck=1" >> /etc/yum.repos.d/amdgpu.repo
echo "gpgkey=http://repo.radeon.com/rocm/rocm.gpg.key" >> /etc/yum.repos.d/amdgpu.repo

local rocm_baseurl="http://repo.radeon.com/rocm/yum/${ROCM_VERSION}"
echo "[ROCm]" > /etc/yum.repos.d/rocm.repo
Expand All @@ -147,29 +110,18 @@ install_centos() {

# precompiled miopen kernels; search for all unversioned packages
# if search fails it will abort this script; use true to avoid case where search fails
if [[ $(ver $ROCM_VERSION) -ge $(ver 5.5) ]]; then
MIOPENHIPGFX=$(yum -q search miopen-hip-gfx | grep miopen-hip-gfx | awk '{print $1}'| grep -F kdb. || true)
if [[ "x${MIOPENHIPGFX}" = x ]]; then
echo "miopen-hip-gfx package not available" && exit 1
else
yum install -y ${MIOPENHIPGFX}
fi
MIOPENHIPGFX=$(yum -q search miopen-hip-gfx | grep miopen-hip-gfx | awk '{print $1}'| grep -F kdb. || true)
if [[ "x${MIOPENHIPGFX}" = x ]]; then
echo "miopen-hip-gfx package not available" && exit 1
else
MIOPENKERNELS=$(yum -q search miopenkernels | grep miopenkernels- | awk '{print $1}'| grep -F kdb. || true)
if [[ "x${MIOPENKERNELS}" = x ]]; then
echo "miopenkernels package not available" && exit 1
else
yum install -y ${MIOPENKERNELS}
fi
yum install -y ${MIOPENHIPGFX}
fi

# ROCm 6.0 had a regression where journal_mode was enabled on the kdb files resulting in permission errors at runtime
if [[ $(ver $ROCM_VERSION) -ge $(ver 6.0) ]]; then
for kdb in /opt/rocm/share/miopen/db/*.kdb
do
sqlite3 $kdb "PRAGMA journal_mode=off; PRAGMA VACUUM;"
done
fi
for kdb in /opt/rocm/share/miopen/db/*.kdb
do
sqlite3 $kdb "PRAGMA journal_mode=off; PRAGMA VACUUM;"
done

# Cleanup
yum clean all
Expand Down
6 changes: 2 additions & 4 deletions .ci/docker/common/install_vision.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,7 @@ set -ex
install_ubuntu() {
apt-get update
apt-get install -y --no-install-recommends \
libopencv-dev \
libavcodec-dev
libopencv-dev

# Cleanup
apt-get autoclean && apt-get clean
Expand All @@ -19,8 +18,7 @@ install_centos() {
yum --enablerepo=extras install -y epel-release

yum install -y \
opencv-devel \
ffmpeg-devel
opencv-devel

# Cleanup
yum clean all
Expand Down
2 changes: 2 additions & 0 deletions .ci/docker/requirements-ci.txt
Original file line number Diff line number Diff line change
Expand Up @@ -310,3 +310,5 @@ lxml==5.0.0.
#Description: This is a requirement of unittest-xml-reporting

# Python-3.9 binaries

PyGithub==2.3.0
2 changes: 1 addition & 1 deletion .ci/docker/ubuntu-cuda/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ RUN if [ -n "${DB}" ]; then bash ./install_db.sh; fi
RUN rm install_db.sh
ENV INSTALLED_DB ${DB}

# (optional) Install vision packages like OpenCV and ffmpeg
# (optional) Install vision packages like OpenCV
ARG VISION
COPY ./common/install_vision.sh ./common/cache_vision_models.sh ./common/common_utils.sh ./
RUN if [ -n "${VISION}" ]; then bash ./install_vision.sh; fi
Expand Down
2 changes: 1 addition & 1 deletion .ci/docker/ubuntu-rocm/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ RUN if [ -n "${DB}" ]; then bash ./install_db.sh; fi
RUN rm install_db.sh
ENV INSTALLED_DB ${DB}

# (optional) Install vision packages like OpenCV and ffmpeg
# (optional) Install vision packages like OpenCV
ARG VISION
COPY ./common/install_vision.sh ./common/cache_vision_models.sh ./common/common_utils.sh ./
RUN if [ -n "${VISION}" ]; then bash ./install_vision.sh; fi
Expand Down
2 changes: 1 addition & 1 deletion .ci/docker/ubuntu-xpu/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ RUN if [ -n "${DB}" ]; then bash ./install_db.sh; fi
RUN rm install_db.sh
ENV INSTALLED_DB ${DB}

# (optional) Install vision packages like OpenCV and ffmpeg
# (optional) Install vision packages like OpenCV
ARG VISION
COPY ./common/install_vision.sh ./common/cache_vision_models.sh ./common/common_utils.sh ./
RUN if [ -n "${VISION}" ]; then bash ./install_vision.sh; fi
Expand Down
2 changes: 1 addition & 1 deletion .ci/docker/ubuntu/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ RUN if [ -n "${DB}" ]; then bash ./install_db.sh; fi
RUN rm install_db.sh
ENV INSTALLED_DB ${DB}

# (optional) Install vision packages like OpenCV and ffmpeg
# (optional) Install vision packages like OpenCV
ARG VISION
COPY ./common/install_vision.sh ./common/cache_vision_models.sh ./common/common_utils.sh ./
RUN if [ -n "${VISION}" ]; then bash ./install_vision.sh; fi
Expand Down
4 changes: 2 additions & 2 deletions .ci/pytorch/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -523,8 +523,8 @@ test_single_dynamo_benchmark() {
}

test_inductor_micro_benchmark() {
TEST_REPORTS_DIR=$(pwd)/test/test-micro-reports
python benchmarks/gpt_fast/benchmark.py
TEST_REPORTS_DIR=$(pwd)/test/test-reports
python benchmarks/gpt_fast/benchmark.py --output "${TEST_REPORTS_DIR}/gpt_fast_benchmark.csv"
}

test_dynamo_benchmark() {
Expand Down
9 changes: 7 additions & 2 deletions .circleci/scripts/binary_linux_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -96,8 +96,13 @@ if [[ "$PACKAGE_TYPE" == conda ]]; then
conda install \${EXTRA_CONDA_FLAGS} -y "\$pkg" --offline
)
elif [[ "$PACKAGE_TYPE" != libtorch ]]; then
pip install "\$pkg" --index-url "https://download.pytorch.org/whl/\${CHANNEL}/${DESIRED_CUDA}"
retry pip install -q numpy protobuf typing-extensions
if [[ "\$BUILD_ENVIRONMENT" != *s390x* ]]; then
pip install "\$pkg" --index-url "https://download.pytorch.org/whl/\${CHANNEL}/${DESIRED_CUDA}"
retry pip install -q numpy protobuf typing-extensions
else
pip install "\$pkg"
retry pip install -q numpy protobuf typing-extensions
fi
fi
if [[ "$PACKAGE_TYPE" == libtorch ]]; then
pkg="\$(ls /final_pkgs/*-latest.zip)"
Expand Down
1 change: 0 additions & 1 deletion .clang-tidy
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,5 @@ readability-simplify-subscript-expr,
readability-string-compare,
'
HeaderFilterRegex: '^(aten/|c10/|torch/).*$'
AnalyzeTemporaryDtors: false
WarningsAsErrors: '*'
...
1 change: 1 addition & 0 deletions .github/actionlint.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ self-hosted-runner:
- linux.8xlarge.nvidia.gpu
- linux.16xlarge.nvidia.gpu
- linux.g5.4xlarge.nvidia.gpu
- linux.s390x
- windows.4xlarge.nonephemeral
- windows.8xlarge.nvidia.gpu
- windows.8xlarge.nvidia.gpu.nonephemeral
Expand Down
11 changes: 10 additions & 1 deletion .github/actions/test-pytorch-binary/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ runs:
"${DOCKER_IMAGE}"
)
if [[ "${GPU_ARCH_TYPE}" != "rocm" && "${BUILD_ENVIRONMENT}" != "linux-aarch64-binary-manywheel" ]]; then
if [[ "${GPU_ARCH_TYPE}" != "rocm" && "${BUILD_ENVIRONMENT}" != "linux-aarch64-binary-manywheel" && "${BUILD_ENVIRONMENT}" != "linux-s390x-binary-manywheel" ]]; then
# Propagate download.pytorch.org IP to container. This is only needed on Linux non aarch64 runner
grep download.pytorch.org /etc/hosts | docker exec -i "${container_name}" bash -c "/bin/cat >> /etc/hosts"
fi
Expand All @@ -44,3 +44,12 @@ runs:
# Generate test script
docker exec -t -w "${PYTORCH_ROOT}" -e OUTPUT_SCRIPT="/run.sh" "${container_name}" bash -c "bash .circleci/scripts/binary_linux_test.sh"
docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash -x /run.sh"
- name: Cleanup docker
if: always() && env.BUILD_ENVIRONMENT == 'linux-s390x-binary-manywheel'
shell: bash
run: |
# on s390x stop the container for clean worker stop
# ignore expansion of "docker ps -q" since it could be empty
# shellcheck disable=SC2046
docker stop $(docker ps -q) || true
2 changes: 1 addition & 1 deletion .github/ci_commit_pins/xla.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
e3fc03314dab5f44e3ed9ccbba6c15fbca3285cd
6f0b61e5d782913a0fc7743812f2a8e522189111
16 changes: 14 additions & 2 deletions .github/scripts/generate_binary_build_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@
CPU_AARCH64_ARCH = ["cpu-aarch64"]


CPU_S390X_ARCH = ["cpu-s390x"]


PYTORCH_EXTRA_INSTALL_REQUIREMENTS = {
"11.8": (
"nvidia-cuda-nvrtc-cu11==11.8.89; platform_system == 'Linux' and platform_machine == 'x86_64' | " # noqa: B950
Expand Down Expand Up @@ -130,6 +133,8 @@ def arch_type(arch_version: str) -> str:
return "cpu-cxx11-abi"
elif arch_version in CPU_AARCH64_ARCH:
return "cpu-aarch64"
elif arch_version in CPU_S390X_ARCH:
return "cpu-s390x"
else: # arch_version should always be "cpu" in this case
return "cpu"

Expand All @@ -149,6 +154,7 @@ def arch_type(arch_version: str) -> str:
"cpu": f"pytorch/manylinux-builder:cpu-{DEFAULT_TAG}",
"cpu-cxx11-abi": f"pytorch/manylinuxcxx11-abi-builder:cpu-cxx11-abi-{DEFAULT_TAG}",
"cpu-aarch64": f"pytorch/manylinuxaarch64-builder:cpu-aarch64-{DEFAULT_TAG}",
"cpu-s390x": f"pytorch/manylinuxs390x-builder:cpu-s390x-{DEFAULT_TAG}",
}

CONDA_CONTAINER_IMAGES = {
Expand Down Expand Up @@ -205,6 +211,7 @@ def translate_desired_cuda(gpu_arch_type: str, gpu_arch_version: str) -> str:
"cpu": "cpu",
"cpu-aarch64": "cpu",
"cpu-cxx11-abi": "cpu-cxx11-abi",
"cpu-s390x": "cpu",
"cuda": f"cu{gpu_arch_version.replace('.', '')}",
"rocm": f"rocm{gpu_arch_version}",
}.get(gpu_arch_type, gpu_arch_version)
Expand Down Expand Up @@ -306,8 +313,8 @@ def generate_wheels_matrix(
python_versions: Optional[List[str]] = None,
) -> List[Dict[str, str]]:
package_type = "wheel"
if os == "linux" or os == "linux-aarch64":
# NOTE: We only build manywheel packages for x86_64 and aarch64 linux
if os == "linux" or os == "linux-aarch64" or os == "linux-s390x":
# NOTE: We only build manywheel packages for x86_64 and aarch64 and s390x linux
package_type = "manywheel"

if python_versions is None:
Expand All @@ -324,6 +331,10 @@ def generate_wheels_matrix(
# Only want the one arch as the CPU type is different and
# uses different build/test scripts
arches = ["cpu-aarch64"]
elif os == "linux-s390x":
# Only want the one arch as the CPU type is different and
# uses different build/test scripts
arches = ["cpu-s390x"]

ret: List[Dict[str, str]] = []
for python_version in python_versions:
Expand All @@ -334,6 +345,7 @@ def generate_wheels_matrix(
if arch_version == "cpu"
or arch_version == "cpu-cxx11-abi"
or arch_version == "cpu-aarch64"
or arch_version == "cpu-s390x"
else arch_version
)

Expand Down
Loading

0 comments on commit db38b81

Please sign in to comment.