Skip to content

Commit

Permalink
Update on "[inductor] add triton code to SchedulerNode.debug_str"
Browse files Browse the repository at this point in the history
Here is an example print: https://gist.github.com/shunting314/75c161368a833a535bd0d240b8099d7e 



cc ezyang msaroufim bdhirsh anijain2305 chauhang voznesenskym penguinwu EikanWang jgong5 Guobing-Chen XiaobingSuper zhuhaozhe blzheng wenzhe-nrv jiayisunx peterbell10 ipiszy yf225 chenyang78 kadeng muchulee8 ColinPeppler amjames desertfire

[ghstack-poisoned]
  • Loading branch information
shunting314 committed Apr 29, 2024
2 parents 9b13d45 + 31d3d64 commit 6e01b3c
Show file tree
Hide file tree
Showing 861 changed files with 23,933 additions and 46,543 deletions.
8 changes: 8 additions & 0 deletions .ci/docker/build.sh
Expand Up @@ -306,6 +306,12 @@ case "$image" in
DB=yes
VISION=yes
CONDA_CMAKE=yes
# snadampal: skipping sccache due to the following issue
# https://github.com/pytorch/pytorch/issues/121559
SKIP_SCCACHE_INSTALL=yes
# snadampal: skipping llvm src build install because the current version
# from pytorch/llvm:9.0.1 is x86 specific
SKIP_LLVM_SRC_BUILD_INSTALL=yes
;;
*)
# Catch-all for builds that are not hardcoded.
Expand Down Expand Up @@ -399,6 +405,8 @@ DOCKER_BUILDKIT=1 docker build \
--build-arg "EXECUTORCH=${EXECUTORCH}" \
--build-arg "BASEKIT_VERSION=${BASEKIT_VERSION}" \
--build-arg "ACL=${ACL:-}" \
--build-arg "SKIP_SCCACHE_INSTALL=${SKIP_SCCACHE_INSTALL:-}" \
--build-arg "SKIP_LLVM_SRC_BUILD_INSTALL=${SKIP_LLVM_SRC_BUILD_INSTALL:-}" \
-f $(dirname ${DOCKERFILE})/Dockerfile \
-t "$tmp_tag" \
"$@" \
Expand Down
2 changes: 1 addition & 1 deletion .ci/docker/ci_commit_pins/triton-rocm.txt
@@ -1 +1 @@
0a22a91d04c2b4a029a69a198eac390089c3e891
bbe6246e37d8aa791c67daaf9d9d61b26c9ccfdc
2 changes: 1 addition & 1 deletion .ci/docker/common/install_triton.sh
Expand Up @@ -13,7 +13,7 @@ conda_reinstall() {
}

if [ -n "${ROCM_VERSION}" ]; then
TRITON_REPO="https://github.com/ROCmSoftwarePlatform/triton"
TRITON_REPO="https://github.com/openai/triton"
TRITON_TEXT_FILE="triton-rocm"
elif [ -n "${BASEKIT_VERSION}" ]; then
TRITON_REPO="https://github.com/intel/intel-xpu-backend-for-triton"
Expand Down
15 changes: 7 additions & 8 deletions .ci/docker/requirements-ci.txt
Expand Up @@ -147,9 +147,9 @@ optree==0.11.0
#test_pointwise_ops.py, test_dtensor_ops.py, test_torchinductor.py, test_fx.py,
#test_fake_tensor.py, test_mps.py

pillow==10.2.0
pillow==10.3.0
#Description: Python Imaging Library fork
#Pinned versions: 10.2.0
#Pinned versions: 10.3.0
#test that import:

protobuf==3.20.2
Expand Down Expand Up @@ -228,13 +228,11 @@ scikit-image==0.20.0 ; python_version >= "3.10"
#Pinned versions: 0.20.3
#test that import:

scipy==1.6.3 ; python_version < "3.10"
scipy==1.8.1 ; python_version == "3.10"
scipy==1.10.1 ; python_version == "3.11"
scipy==1.10.1 ; python_version <= "3.11"
scipy==1.12.0 ; python_version == "3.12"
# Pin SciPy because of failing distribution tests (see #60347)
#Description: scientific python
#Pinned versions: 1.6.3
#Pinned versions: 1.10.1
#test that import: test_unary_ufuncs.py, test_torch.py,test_tensor_creation_ops.py
#test_spectral_ops.py, test_sparse_csr.py, test_reductions.py,test_nn.py
#test_linalg.py, test_binary_ufuncs.py
Expand Down Expand Up @@ -265,10 +263,11 @@ unittest-xml-reporting<=3.2.0,>=2.0.0
#Pinned versions:
#test that import:

#wheel not found on aarch64, and source build requires rust
lintrunner==0.10.7 ; platform_machine == "x86_64"
#lintrunner is supported on aarch64-linux only from 0.12.4 version
lintrunner==0.12.5 ; platform_machine == "aarch64"
#Description: all about linters!
#Pinned versions: 0.10.7
#Pinned versions: 0.10.7 on x86 and 0.12.5 on aarch64
#test that import:

rockset==1.0.3
Expand Down
6 changes: 5 additions & 1 deletion .ci/docker/ubuntu/Dockerfile
Expand Up @@ -169,9 +169,11 @@ RUN rm install_acl.sh
ENV INSTALLED_ACL ${ACL}

# Install ccache/sccache (do this last, so we get priority in PATH)
ARG SKIP_SCCACHE_INSTALL
COPY ./common/install_cache.sh install_cache.sh
ENV PATH /opt/cache/bin:$PATH
RUN bash ./install_cache.sh && rm install_cache.sh
RUN if [ -z "${SKIP_SCCACHE_INSTALL}" ]; then bash ./install_cache.sh; fi
RUN rm install_cache.sh

# Add jni.h for java host build
COPY ./common/install_jni.sh install_jni.sh
Expand All @@ -188,7 +190,9 @@ ARG BUILD_ENVIRONMENT
ENV BUILD_ENVIRONMENT ${BUILD_ENVIRONMENT}

# Install LLVM dev version (Defined in the pytorch/builder github repository)
ARG SKIP_LLVM_SRC_BUILD_INSTALL
COPY --from=pytorch/llvm:9.0.1 /opt/llvm /opt/llvm
RUN if [ -n "${SKIP_LLVM_SRC_BUILD_INSTALL}" ]; then set -eu; rm -rf /opt/llvm; fi

# AWS specific CUDA build guidance
ENV TORCH_CUDA_ARCH_LIST Maxwell
Expand Down
36 changes: 22 additions & 14 deletions .ci/pytorch/build.sh
Expand Up @@ -223,19 +223,23 @@ if [[ "${BUILD_ENVIRONMENT}" != *android* && "${BUILD_ENVIRONMENT}" != *cuda* ]]
export BUILD_STATIC_RUNTIME_BENCHMARK=ON
fi

# Workaround for dind-rootless userid mapping (https://github.com/pytorch/ci-infra/issues/96)
WORKSPACE_ORIGINAL_OWNER_ID=$(stat -c '%u' "/var/lib/jenkins/workspace")
cleanup_workspace() {
echo "sudo may print the following warning message that can be ignored. The chown command will still run."
echo " sudo: setrlimit(RLIMIT_STACK): Operation not permitted"
echo "For more details refer to https://github.com/sudo-project/sudo/issues/42"
sudo chown -R "$WORKSPACE_ORIGINAL_OWNER_ID" /var/lib/jenkins/workspace
}
# Disable shellcheck SC2064 as we want to parse the original owner immediately.
# shellcheck disable=SC2064
trap_add cleanup_workspace EXIT
sudo chown -R jenkins /var/lib/jenkins/workspace
git config --global --add safe.directory /var/lib/jenkins/workspace
# Do not change workspace permissions for ROCm CI jobs
# as it can leave workspace with bad permissions for cancelled jobs
if [[ "$BUILD_ENVIRONMENT" != *rocm* ]]; then
# Workaround for dind-rootless userid mapping (https://github.com/pytorch/ci-infra/issues/96)
WORKSPACE_ORIGINAL_OWNER_ID=$(stat -c '%u' "/var/lib/jenkins/workspace")
cleanup_workspace() {
echo "sudo may print the following warning message that can be ignored. The chown command will still run."
echo " sudo: setrlimit(RLIMIT_STACK): Operation not permitted"
echo "For more details refer to https://github.com/sudo-project/sudo/issues/42"
sudo chown -R "$WORKSPACE_ORIGINAL_OWNER_ID" /var/lib/jenkins/workspace
}
# Disable shellcheck SC2064 as we want to parse the original owner immediately.
# shellcheck disable=SC2064
trap_add cleanup_workspace EXIT
sudo chown -R jenkins /var/lib/jenkins/workspace
git config --global --add safe.directory /var/lib/jenkins/workspace
fi

if [[ "$BUILD_ENVIRONMENT" == *-bazel-* ]]; then
set -e
Expand Down Expand Up @@ -372,4 +376,8 @@ if [[ "$BUILD_ENVIRONMENT" != *libtorch* && "$BUILD_ENVIRONMENT" != *bazel* ]];
python tools/stats/export_test_times.py
fi

print_sccache_stats
# snadampal: skipping it till sccache support added for aarch64
# https://github.com/pytorch/pytorch/issues/121559
if [[ "$BUILD_ENVIRONMENT" != *aarch64* ]]; then
print_sccache_stats
fi
35 changes: 22 additions & 13 deletions .ci/pytorch/test.sh
Expand Up @@ -9,19 +9,23 @@ set -ex
# shellcheck source=./common.sh
source "$(dirname "${BASH_SOURCE[0]}")/common.sh"

# Workaround for dind-rootless userid mapping (https://github.com/pytorch/ci-infra/issues/96)
WORKSPACE_ORIGINAL_OWNER_ID=$(stat -c '%u' "/var/lib/jenkins/workspace")
cleanup_workspace() {
echo "sudo may print the following warning message that can be ignored. The chown command will still run."
echo " sudo: setrlimit(RLIMIT_STACK): Operation not permitted"
echo "For more details refer to https://github.com/sudo-project/sudo/issues/42"
sudo chown -R "$WORKSPACE_ORIGINAL_OWNER_ID" /var/lib/jenkins/workspace
}
# Disable shellcheck SC2064 as we want to parse the original owner immediately.
# shellcheck disable=SC2064
trap_add cleanup_workspace EXIT
sudo chown -R jenkins /var/lib/jenkins/workspace
git config --global --add safe.directory /var/lib/jenkins/workspace
# Do not change workspace permissions for ROCm CI jobs
# as it can leave workspace with bad permissions for cancelled jobs
if [[ "$BUILD_ENVIRONMENT" != *rocm* ]]; then
# Workaround for dind-rootless userid mapping (https://github.com/pytorch/ci-infra/issues/96)
WORKSPACE_ORIGINAL_OWNER_ID=$(stat -c '%u' "/var/lib/jenkins/workspace")
cleanup_workspace() {
echo "sudo may print the following warning message that can be ignored. The chown command will still run."
echo " sudo: setrlimit(RLIMIT_STACK): Operation not permitted"
echo "For more details refer to https://github.com/sudo-project/sudo/issues/42"
sudo chown -R "$WORKSPACE_ORIGINAL_OWNER_ID" /var/lib/jenkins/workspace
}
# Disable shellcheck SC2064 as we want to parse the original owner immediately.
# shellcheck disable=SC2064
trap_add cleanup_workspace EXIT
sudo chown -R jenkins /var/lib/jenkins/workspace
git config --global --add safe.directory /var/lib/jenkins/workspace
fi

echo "Environment variables:"
env
Expand Down Expand Up @@ -177,6 +181,11 @@ if [[ "$BUILD_ENVIRONMENT" != *-bazel-* ]] ; then
export PATH="$HOME/.local/bin:$PATH"
fi

if [[ "$BUILD_ENVIRONMENT" == *aarch64* ]]; then
# TODO: revisit this once the CI is stabilized on aarch64 linux
export VALGRIND=OFF
fi

install_tlparse

# DANGER WILL ROBINSON. The LD_PRELOAD here could cause you problems
Expand Down
1 change: 1 addition & 0 deletions .flake8
Expand Up @@ -54,6 +54,7 @@ per-file-ignores =
torch/ao/quantization/fx/_decomposed.py: TOR901
torch/distributed/_functional_collectives.py: TOR901
torch/distributed/_spmd/data_parallel.py: TOR901
torch/distributed/_tensor/_collective_utils.py: TOR901
optional-ascii-coding = True
exclude =
./.git,
Expand Down
2 changes: 1 addition & 1 deletion .github/actions/download-build-artifacts/action.yml
Expand Up @@ -25,7 +25,7 @@ runs:
s3-bucket: ${{ inputs.s3-bucket }}

- name: Download PyTorch Build Artifacts from GHA
if: inputs.use-gha
if: ${{ inputs.use-gha }}
uses: actions/download-artifact@v3
with:
name: ${{ inputs.name }}
Expand Down
8 changes: 8 additions & 0 deletions .github/actions/filter-test-configs/action.yml
Expand Up @@ -13,6 +13,13 @@ inputs:
required: true
type: string
description: JSON description of what test configs to run.
selected-test-configs:
required: false
type: string
description: |
A comma-separated list of test configurations from the test matrix to keep,
The empty list means we are going to keep every configurations by defaults
default: ""
job-name:
type: string
required: false
Expand Down Expand Up @@ -126,6 +133,7 @@ runs:
--workflow "${GITHUB_WORKFLOW}" \
--job-name "${JOB_NAME}" \
--test-matrix "${{ inputs.test-matrix }}" \
--selected-test-configs "${{ inputs.selected-test-configs }}" \
--pr-number "${PR_NUMBER}" \
--tag "${TAG}" \
--event-name "${EVENT_NAME}" \
Expand Down

0 comments on commit 6e01b3c

Please sign in to comment.