Skip to content

Commit

Permalink
Update on "Freeze fuse two mms"
Browse files Browse the repository at this point in the history
Improves llama_v2 perf locally from 1.48x -> 1.55x. 

A good future rewrite would be to unify the freezing batching with the other batching rules that yanboliang & co were working on. I want to wait for the forthcoming pre-dispatch changes to settle down first.

cc voznesenskym penguinwu EikanWang jgong5 Guobing-Chen XiaobingSuper zhuhaozhe blzheng Xia-Weiwen wenzhe-nrv jiayisunx peterbell10 ipiszy yf225 chenyang78 kadeng muchulee8 aakhundov ColinPeppler

[ghstack-poisoned]
  • Loading branch information
eellison committed Oct 18, 2023
2 parents bfccf13 + 8137b5c commit 0d8a9d4
Show file tree
Hide file tree
Showing 336 changed files with 7,350 additions and 4,666 deletions.
6 changes: 6 additions & 0 deletions .ci/docker/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,12 @@ case "$image" in
CONDA_CMAKE=yes
TRITON=yes
;;
pytorch-linux-jammy-py3-clang15-asan)
ANACONDA_PYTHON_VERSION=3.10
CLANG_VERSION=15
CONDA_CMAKE=yes
VISION=yes
;;
pytorch-linux-jammy-py3.8-gcc11)
ANACONDA_PYTHON_VERSION=3.8
GCC_VERSION=11
Expand Down
4 changes: 3 additions & 1 deletion .ci/docker/common/install_base.sh
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,9 @@ install_ubuntu() {
maybe_libiomp_dev="libiomp-dev"
fi

if [[ "$CLANG_VERSION" == 12 ]]; then
if [[ "$CLANG_VERSION" == 15 ]]; then
maybe_libomp_dev="libomp-15-dev"
elif [[ "$CLANG_VERSION" == 12 ]]; then
maybe_libomp_dev="libomp-12-dev"
elif [[ "$CLANG_VERSION" == 10 ]]; then
maybe_libomp_dev="libomp-10-dev"
Expand Down
4 changes: 2 additions & 2 deletions .ci/docker/requirements-ci.txt
Original file line number Diff line number Diff line change
Expand Up @@ -75,10 +75,10 @@ librosa>=0.6.2 ; python_version < "3.11"
#Pinned versions:
#test that import:

mypy==1.4.1
mypy==1.6.0
# Pin MyPy version because new errors are likely to appear with each release
#Description: linter
#Pinned versions: 1.4.1
#Pinned versions: 1.6.0
#test that import: test_typing.py, test_type_hints.py

networkx==2.8.8
Expand Down
4 changes: 2 additions & 2 deletions .ci/pytorch/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ if [[ "$BUILD_ENVIRONMENT" == *asan* ]]; then
export PYTORCH_TEST_WITH_ASAN=1
export PYTORCH_TEST_WITH_UBSAN=1
# TODO: Figure out how to avoid hard-coding these paths
export ASAN_SYMBOLIZER_PATH=/usr/lib/llvm-12/bin/llvm-symbolizer
export ASAN_SYMBOLIZER_PATH=/usr/lib/llvm-15/bin/llvm-symbolizer
export TORCH_USE_RTLD_GLOBAL=1
# NB: We load libtorch.so with RTLD_GLOBAL for UBSAN, unlike our
# default behavior.
Expand Down Expand Up @@ -182,7 +182,7 @@ if [[ "$BUILD_ENVIRONMENT" == *asan* ]]; then
# have, and it applies to child processes.

# TODO: get rid of the hardcoded path
export LD_PRELOAD=/usr/lib/llvm-12/lib/clang/12.0.1/lib/linux/libclang_rt.asan-x86_64.so
export LD_PRELOAD=/usr/lib/llvm-15/lib/clang/15.0.7/lib/linux/libclang_rt.asan-x86_64.so
# Disable valgrind for asan
export VALGRIND=OFF
# Increase stack size, because ASAN red zones use more stack
Expand Down
1 change: 1 addition & 0 deletions .ci/pytorch/win-test-helpers/build_pytorch.bat
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,7 @@ python -c "import os, glob; os.system('python -mpip install --no-index --no-deps
python tools/stats/export_test_times.py
copy /Y ".pytorch-test-times.json" "%PYTORCH_FINAL_PACKAGE_DIR%"
copy /Y ".pytorch-test-file-ratings.json" "%PYTORCH_FINAL_PACKAGE_DIR%"
copy /Y ".pytorch-test-class-ratings.json" "%PYTORCH_FINAL_PACKAGE_DIR%"

:: Also save build/.ninja_log as an artifact
copy /Y "build\.ninja_log" "%PYTORCH_FINAL_PACKAGE_DIR%\"
Expand Down
1 change: 1 addition & 0 deletions .ci/pytorch/win-test-helpers/test_python_jit_legacy.bat
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ call %SCRIPT_HELPERS_DIR%\setup_pytorch_env.bat
echo Copying over test times file
copy /Y "%PYTORCH_FINAL_PACKAGE_DIR_WIN%\.pytorch-test-times.json" "%PROJECT_DIR_WIN%"
copy /Y "%PYTORCH_FINAL_PACKAGE_DIR_WIN%\.pytorch-test-file-ratings.json" "%PROJECT_DIR_WIN%"
copy /Y "%PYTORCH_FINAL_PACKAGE_DIR_WIN%\.pytorch-test-class-ratings.json" "%PROJECT_DIR_WIN%"

pushd test

Expand Down
1 change: 1 addition & 0 deletions .ci/pytorch/win-test-helpers/test_python_shard.bat
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ if "%SHARD_NUMBER%" == "1" (
echo Copying over test times file
copy /Y "%PYTORCH_FINAL_PACKAGE_DIR_WIN%\.pytorch-test-times.json" "%PROJECT_DIR_WIN%"
copy /Y "%PYTORCH_FINAL_PACKAGE_DIR_WIN%\.pytorch-test-file-ratings.json" "%PROJECT_DIR_WIN%"
copy /Y "%PYTORCH_FINAL_PACKAGE_DIR_WIN%\.pytorch-test-class-ratings.json" "%PROJECT_DIR_WIN%"

echo Run nn tests
python run_test.py --exclude-jit-executor --exclude-distributed-tests --shard "%SHARD_NUMBER%" "%NUM_TEST_SHARDS%" --verbose
Expand Down
2 changes: 1 addition & 1 deletion .circleci/config.yml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion .circleci/verbatim-sources/job-specs/job-specs-custom.yml
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@
- run:
name: Archive artifacts into zip
command: |
zip -1 -r artifacts.zip dist/ build/.ninja_log build/compile_commands.json .pytorch-test-times.json .pytorch-test-file-ratings.json
zip -1 -r artifacts.zip dist/ build/.ninja_log build/compile_commands.json .pytorch-test-times.json .pytorch-test-file-ratings.json .pytorch-test-class-ratings.json
cp artifacts.zip /Users/distiller/workspace
- persist_to_workspace:
Expand Down
2 changes: 1 addition & 1 deletion .github/ci_commit_pins/xla.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
b84f79803f64354a8be3af9e5b53349f65bd7494
c9aa49c345266b3899318c2af5b3fb9aeca56069
1 change: 1 addition & 0 deletions .github/labeler.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
"ciflow/inductor":
- torch/_decomp/**
- torch/_dynamo/**
- torch/_export/**
- torch/_inductor/**
- benchmarks/dynamo/**
- torch/_subclasses/fake_tensor.py
Expand Down
3 changes: 0 additions & 3 deletions .github/merge_rules.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -125,9 +125,6 @@

- name: PrimTorch
patterns:
- aten/src/ATen/native_functions.yaml
- aten/src/ATen/native/**
- test/**
- torch/_meta_registrations.py
- torch/_decomp/**
- torch/_refs/**
Expand Down
File renamed without changes.
2 changes: 1 addition & 1 deletion .github/workflows/_linux-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ jobs:
- name: Archive artifacts into zip
if: inputs.build-generates-artifacts && steps.build.outcome != 'skipped'
run: |
zip -1 -r artifacts.zip dist/ build/custom_test_artifacts build/lib build/bin .pytorch-test-times.json .pytorch-test-file-ratings.json
zip -1 -r artifacts.zip dist/ build/custom_test_artifacts build/lib build/bin .pytorch-test-times.json .pytorch-test-file-ratings.json .pytorch-test-class-ratings.json
- name: Store PyTorch Build Artifacts on S3
uses: seemethere/upload-artifact-s3@v5
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/_mac-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ jobs:
- name: Archive artifacts into zip
if: inputs.build-generates-artifacts && steps.build.outcome != 'skipped'
run: |
zip -1 -r artifacts.zip dist/ build/.ninja_log build/compile_commands.json .pytorch-test-times.json .pytorch-test-file-ratings.json
zip -1 -r artifacts.zip dist/ build/.ninja_log build/compile_commands.json .pytorch-test-times.json .pytorch-test-file-ratings.json .pytorch-test-class-ratings.json
- name: Store PyTorch Build Artifacts on GHA
uses: actions/upload-artifact@v3
Expand Down
20 changes: 5 additions & 15 deletions .github/workflows/_run_android_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -41,25 +41,17 @@ jobs:
strategy:
matrix: ${{ fromJSON(needs.filter.outputs.test-matrix) }}
fail-fast: false
# NB: This job can only run on GitHub Linux runner atm. This is an ok thing though
# because that runner is ephemeral and could access upload secrets
runs-on: ${{ matrix.runner }}
env:
# GitHub runner installs Android SDK on this path
ANDROID_ROOT: /usr/local/lib/android
ANDROID_NDK_VERSION: '21.4.7075529'
BUILD_LITE_INTERPRETER: ${{ matrix.use_lite_interpreter }}
# 4 of them are supported atm: armeabi-v7a, arm64-v8a, x86, x86_64
SUPPORT_ABI: '${{ matrix.support_abi }}'
steps:
# [see note: pytorch repo ref]
- name: Checkout PyTorch
uses: pytorch/pytorch/.github/actions/checkout-pytorch@main

- name: Setup miniconda
uses: pytorch/test-infra/.github/actions/setup-miniconda@main
with:
python-version: 3.8
environment-file: .github/requirements/conda-env-${{ runner.os }}-${{ runner.arch }}.txt
environment-file: .github/requirements/conda-env-${{ runner.os }}-${{ runner.arch }}

- name: Install NDK
uses: nick-fields/retry@v2.8.2
Expand All @@ -68,12 +60,12 @@ jobs:
max_attempts: 3
retry_wait_seconds: 90
command: |
set -eux
# Install NDK 21 after GitHub update
# https://github.com/actions/virtual-environments/issues/5595
ANDROID_ROOT="/usr/local/lib/android"
ANDROID_SDK_ROOT="${ANDROID_ROOT}/sdk"
ANDROID_NDK="${ANDROID_SDK_ROOT}/ndk-bundle"
ANDROID_NDK_VERSION="21.4.7075529"
SDKMANAGER="${ANDROID_SDK_ROOT}/cmdline-tools/latest/bin/sdkmanager"
# NB: This step downloads and installs NDK, thus it could be flaky.
Expand All @@ -94,10 +86,8 @@ jobs:
- name: Build PyTorch Android
run: |
set -eux
echo "CMAKE_PREFIX_PATH=${CONDA_PREFIX:-"$(dirname "$(which conda)")/../"}" >> "${GITHUB_ENV}"
${CONDA_RUN} ./scripts/build_pytorch_android.sh "${SUPPORT_ABI}"
${CONDA_RUN} ./scripts/build_pytorch_android.sh x86
- name: Run tests
uses: reactivecircus/android-emulator-runner@v2
Expand Down
48 changes: 0 additions & 48 deletions .github/workflows/build-android-binaries.yml

This file was deleted.

2 changes: 1 addition & 1 deletion .github/workflows/docker-builds.yml
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ jobs:
- docker-image-name: pytorch-linux-focal-py3-clang9-android-ndk-r19c
- docker-image-name: pytorch-linux-jammy-py3.8-gcc11
- docker-image-name: pytorch-linux-jammy-py3.8-gcc11-inductor-benchmarks
- docker-image-name: pytorch-linux-jammy-py3-clang12-asan
- docker-image-name: pytorch-linux-jammy-py3-clang15-asan
- docker-image-name: pytorch-linux-focal-py3-clang10-onnx
- docker-image-name: pytorch-linux-focal-linter
- docker-image-name: pytorch-linux-jammy-cuda11.8-cudnn8-py3.9-linter
Expand Down
9 changes: 1 addition & 8 deletions .github/workflows/periodic.yml
Original file line number Diff line number Diff line change
Expand Up @@ -189,14 +189,7 @@ jobs:
with:
test-matrix: |
{ include: [
{ config: 'default',
shard: 1,
num_shards: 1,
runner: 'ubuntu-20.04-16x',
use_lite_interpreter: 1,
# Just set x86 for testing here
support_abi: 'x86',
},
{ config: "default", shard: 1, num_shards: 1, runner: "ubuntu-20.04-16x" },
]}
linux-vulkan-focal-py3_11-clang10-build:
Expand Down
20 changes: 10 additions & 10 deletions .github/workflows/pull.yml
Original file line number Diff line number Diff line change
Expand Up @@ -75,12 +75,12 @@ jobs:
{ config: "default", shard: 1, num_shards: 1 },
]}
linux-jammy-py3_9-clang12-asan-build:
name: linux-jammy-py3.9-clang12-asan
linux-jammy-py3_10-clang15-asan-build:
name: linux-jammy-py3.10-clang15-asan
uses: ./.github/workflows/_linux-build.yml
with:
build-environment: linux-jammy-py3.9-clang12-asan
docker-image-name: pytorch-linux-jammy-py3-clang12-asan
build-environment: linux-jammy-py3.10-clang15-asan
docker-image-name: pytorch-linux-jammy-py3-clang15-asan
test-matrix: |
{ include: [
{ config: "default", shard: 1, num_shards: 6, runner: "linux.4xlarge" },
Expand All @@ -92,14 +92,14 @@ jobs:
]}
sync-tag: asan-build

linux-jammy-py3_9-clang12-asan-test:
name: linux-jammy-py3.9-clang12-asan
linux-jammy-py3_10-clang15-asan-test:
name: linux-jammy-py3.10-clang15-asan
uses: ./.github/workflows/_linux-test.yml
needs: linux-jammy-py3_9-clang12-asan-build
needs: linux-jammy-py3_10-clang15-asan-build
with:
build-environment: linux-jammy-py3.9-clang12-asan
docker-image: ${{ needs.linux-jammy-py3_9-clang12-asan-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-jammy-py3_9-clang12-asan-build.outputs.test-matrix }}
build-environment: linux-jammy-py3.10-clang15-asan
docker-image: ${{ needs.linux-jammy-py3_10-clang15-asan-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-jammy-py3_10-clang15-asan-build.outputs.test-matrix }}
sync-tag: asan-test

linux-focal-py3_8-clang10-onnx-build:
Expand Down
20 changes: 10 additions & 10 deletions .github/workflows/slow.yml
Original file line number Diff line number Diff line change
Expand Up @@ -104,25 +104,25 @@ jobs:
docker-image: ${{ needs.linux-focal-rocm5_6-py3_8-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-focal-rocm5_6-py3_8-build.outputs.test-matrix }}

linux-jammy-py3_9-clang12-asan-build:
name: linux-jammy-py3.9-clang12-asan
linux-jammy-py3_10-clang15-asan-build:
name: linux-jammy-py3.10-clang15-asan
uses: ./.github/workflows/_linux-build.yml
with:
build-environment: linux-jammy-py3.9-clang12-asan
docker-image-name: pytorch-linux-jammy-py3-clang12-asan
build-environment: linux-jammy-py3.10-clang15-asan
docker-image-name: pytorch-linux-jammy-py3-clang15-asan
test-matrix: |
{ include: [
{ config: "slow", shard: 1, num_shards: 2, runner: "linux.4xlarge" },
{ config: "slow", shard: 2, num_shards: 2, runner: "linux.4xlarge" },
]}
sync-tag: asan-build

linux-jammy-py3_9-clang12-asan-test:
name: linux-jammy-py3.9-clang12-asan
linux-jammy-py3_10-clang15-asan-test:
name: linux-jammy-py3.10-clang15-asan
uses: ./.github/workflows/_linux-test.yml
needs: linux-jammy-py3_9-clang12-asan-build
needs: linux-jammy-py3_10-clang15-asan-build
with:
build-environment: linux-jammy-py3.9-clang12-asan
docker-image: ${{ needs.linux-jammy-py3_9-clang12-asan-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-jammy-py3_9-clang12-asan-build.outputs.test-matrix }}
build-environment: linux-jammy-py3.10-clang15-asan
docker-image: ${{ needs.linux-jammy-py3_10-clang15-asan-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-jammy-py3_10-clang15-asan-build.outputs.test-matrix }}
sync-tag: asan-test
5 changes: 1 addition & 4 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ coverage.xml
**/.pytorch-slow-tests.json
**/.pytorch-test-times.json
**/.pytorch-test-file-ratings.json
**/.pytorch-test-class-ratings.json
*/*.pyc
*/*.so*
*/**/__pycache__
Expand Down Expand Up @@ -364,7 +365,3 @@ venv/
# Log files
*.log
sweep/

# Android build artifacts
android/pytorch_android/.cxx
android/pytorch_android_torchvision/.cxx
5 changes: 2 additions & 3 deletions .lintrunner.toml
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ init_command = [
'--dry-run={{DRYRUN}}',
'numpy==1.24.3',
'expecttest==0.1.6',
'mypy==1.4.1',
'mypy==1.6.0',
'types-requests==2.27.25',
'types-PyYAML==6.0.7',
'types-tabulate==0.8.8',
Expand Down Expand Up @@ -265,7 +265,6 @@ exclude_patterns = [
'torch/csrc/autograd/functions/**',
'torch/csrc/autograd/generated/**',
'torch/csrc/autograd/profiler_legacy.cpp',
'torch/csrc/CudaIPCTypes.cpp',
'torch/csrc/cuda/**',
'torch/csrc/dynamo/*',
'torch/csrc/distributed/**/*',
Expand Down Expand Up @@ -2660,7 +2659,7 @@ init_command = [
'python3',
'tools/linter/adapters/pip_init.py',
'--dry-run={{DRYRUN}}',
'ruff==0.0.291',
'ruff==0.1.0',
]
is_formatter = true

Expand Down
3 changes: 1 addition & 2 deletions BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -1643,8 +1643,7 @@ cc_library(
name = "shm",
srcs = glob(["torch/lib/libshm/*.cpp"]),
deps = [
":torch_headers",
"//c10",
":torch",
],
)

Expand Down
1 change: 0 additions & 1 deletion android/pytorch_android/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@ android {
println 'Build pytorch_jni'
exclude 'org/pytorch/LiteModuleLoader.java'
exclude 'org/pytorch/LiteNativePeer.java'
exclude 'org/pytorch/LitePyTorchAndroid.java'
} else {
println 'Build pytorch_jni_lite'
}
Expand Down

0 comments on commit 0d8a9d4

Please sign in to comment.