Skip to content

Commit

Permalink
Update on "fix aminmax output resize issue when input is a zero dimen…
Browse files Browse the repository at this point in the history
…sion tensor"


Fix #96042

### before
```
>>> torch.aminmax(torch.tensor(1, device='cpu'), dim=0, keepdim=True)
__main__:1: UserWarning: An output with one or more elements was resized since it had shape [], which does not match the required output shape [1]. This behavior is deprecated, and in a future PyTorch release outputs will not be resized unless they have zero elements. You can explicitly reuse an out tensor t by resizing it, inplace, to zero elements with t.resize_(0). (Triggered internally at ../aten/src/ATen/native/Resize.cpp:24.)
torch.return_types.aminmax(
min=tensor([1]),
max=tensor([1]))
>>> torch.aminmax(torch.tensor(1, device='cpu'), dim=0, keepdim=False)
torch.return_types.aminmax(
min=tensor(1),
max=tensor(1))
```
### after
```
>>> torch.aminmax(torch.tensor(1, device='cpu'), dim=0, keepdim=True)
torch.return_types.aminmax(
min=tensor(1),
max=tensor(1))
>>> torch.aminmax(torch.tensor(1, device='cpu'), dim=0, keepdim=False)
torch.return_types.aminmax(
min=tensor(1),
max=tensor(1))

```

Marked the following test as expected_fail:
`test_vmap.py TestVmapOperatorsOpInfoCPU.test_op_has_batch_rule_aminmax_cpu_float32`

Given input shape of (2), the loop out is shape (2), the batched vmap out is (2, 1), which mismatched.
The loop out will calculate twice on a tensor shape of ( ): without this patch, the output is (1), and then stacked into (2, 1); with this patch, the output is ( ), then stacked into (2).

cc jgong5 XiaobingSuper sanchitintel ashokei jingxu10

[ghstack-poisoned]
  • Loading branch information
mingfeima committed Mar 15, 2023
2 parents dc73df2 + 3082ece commit efc5fa6
Show file tree
Hide file tree
Showing 343 changed files with 16,415 additions and 11,558 deletions.
2 changes: 1 addition & 1 deletion .ci/docker/ci_commit_pins/triton.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
b8b470bc597c1c5bd03682c09fe3e6b7c53787fd
2c32f4399986045ff25cae201ed3b16d922a9d3b
20 changes: 12 additions & 8 deletions .ci/docker/common/install_conda.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ if [ -n "$ANACONDA_PYTHON_VERSION" ]; then
BASE_URL="https://repo.anaconda.com/miniconda"

MAJOR_PYTHON_VERSION=$(echo "$ANACONDA_PYTHON_VERSION" | cut -d . -f 1)
MINOR_PYTHON_VERSION=$(echo "$ANACONDA_PYTHON_VERSION" | cut -d . -f 2)

case "$MAJOR_PYTHON_VERSION" in
2)
Expand Down Expand Up @@ -52,23 +53,26 @@ if [ -n "$ANACONDA_PYTHON_VERSION" ]; then
# Install PyTorch conda deps, as per https://github.com/pytorch/pytorch README
CONDA_COMMON_DEPS="astunparse pyyaml mkl=2021.4.0 mkl-include=2021.4.0 setuptools"
if [ "$ANACONDA_PYTHON_VERSION" = "3.11" ]; then
# Install llvm-8 as it is required to compile llvmlite-0.30.0 from source
# TODO: Stop using `-c malfet`
conda_install numpy=1.23.5 ${CONDA_COMMON_DEPS} llvmdev=8.0.0 -c malfet
conda_install numpy=1.23.5 ${CONDA_COMMON_DEPS} -c malfet
elif [ "$ANACONDA_PYTHON_VERSION" = "3.10" ]; then
# Install llvm-8 as it is required to compile llvmlite-0.30.0 from source
conda_install numpy=1.21.2 ${CONDA_COMMON_DEPS} llvmdev=8.0.0
conda_install numpy=1.21.2 ${CONDA_COMMON_DEPS}
elif [ "$ANACONDA_PYTHON_VERSION" = "3.9" ]; then
# Install llvm-8 as it is required to compile llvmlite-0.30.0 from source
conda_install numpy=1.19.2 ${CONDA_COMMON_DEPS} llvmdev=8.0.0
conda_install numpy=1.19.2 ${CONDA_COMMON_DEPS}
elif [ "$ANACONDA_PYTHON_VERSION" = "3.8" ]; then
# Install llvm-8 as it is required to compile llvmlite-0.30.0 from source
conda_install numpy=1.18.5 ${CONDA_COMMON_DEPS} llvmdev=8.0.0
conda_install numpy=1.18.5 ${CONDA_COMMON_DEPS}
else
# Install `typing-extensions` for 3.7
conda_install numpy=1.18.5 ${CONDA_COMMON_DEPS} typing-extensions
fi

# This is only supported in 3.8 upward
if [ "$MINOR_PYTHON_VERSION" -gt "7" ]; then
# Install llvm-8 as it is required to compile llvmlite-0.30.0 from source
# and libpython-static for torch deploy
conda_install llvmdev=8.0.0 "libpython-static=${ANACONDA_PYTHON_VERSION}"
fi

# Use conda cmake in some cases. Conda cmake will be newer than our supported
# min version (3.5 for xenial and 3.10 for bionic), so we only do it in those
# following builds that we know should use conda. Specifically, Ubuntu bionic
Expand Down
1 change: 1 addition & 0 deletions .ci/docker/triton_version.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
2.1.0
3 changes: 2 additions & 1 deletion .ci/docker/ubuntu-cuda/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -91,8 +91,9 @@ ARG TRITON
COPY ./common/install_triton.sh install_triton.sh
COPY ./common/common_utils.sh common_utils.sh
COPY ci_commit_pins/triton.txt triton.txt
COPY triton_version.txt triton_version.txt
RUN if [ -n "${TRITON}" ]; then bash ./install_triton.sh; fi
RUN rm install_triton.sh common_utils.sh triton.txt
RUN rm install_triton.sh common_utils.sh triton.txt triton_version.txt

# Install ccache/sccache (do this last, so we get priority in PATH)
COPY ./common/install_cache.sh install_cache.sh
Expand Down
12 changes: 0 additions & 12 deletions .ci/pytorch/common_utils.sh
Original file line number Diff line number Diff line change
Expand Up @@ -157,21 +157,9 @@ function install_tabulate() {
pip_install tabulate
}

function setup_torchdeploy_deps(){
conda install -y -n "py_${ANACONDA_PYTHON_VERSION}" "libpython-static=${ANACONDA_PYTHON_VERSION}"
local CC
local CXX
CC="$(which gcc)"
CXX="$(which g++)"
export CC
export CXX
pip install --upgrade pip
}

function checkout_install_torchdeploy() {
local commit
commit=$(get_pinned_commit multipy)
setup_torchdeploy_deps
pushd ..
git clone --recurse-submodules https://github.com/pytorch/multipy.git
pushd multipy
Expand Down
3 changes: 0 additions & 3 deletions .ci/pytorch/multigpu-test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@ time python test/run_test.py --verbose -i distributed/checkpoint/test_checkpoint
time python test/run_test.py --verbose -i distributed/checkpoint/test_file_system_checkpoint
time python test/run_test.py --verbose -i distributed/_shard/sharding_spec/test_sharding_spec
time python test/run_test.py --verbose -i distributed/_shard/sharding_plan/test_sharding_plan
time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/test_megatron_prototype
time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/test_sharded_tensor
time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/test_sharded_tensor_reshard
time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/ops/test_chunk
Expand All @@ -36,12 +35,10 @@ time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/ops/
time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/ops/test_embedding_bag
time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/ops/test_binary_cmp
time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/ops/test_init
time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/ops/test_linear
time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/ops/test_math_ops
time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/ops/test_matrix_ops
time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/ops/test_softmax
time python test/run_test.py --verbose -i distributed/_shard/sharded_optim/test_sharded_optim
time python test/run_test.py --verbose -i distributed/_shard/test_partial_tensor
# Other tests
time python test/run_test.py --verbose -i test_cuda_primary_ctx
time python test/run_test.py --verbose -i test_optim -- -k optimizers_with_varying_tensors
Expand Down
9 changes: 5 additions & 4 deletions .ci/pytorch/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -310,7 +310,7 @@ test_single_dynamo_benchmark() {
elif [[ "${TEST_CONFIG}" == *perf* ]]; then
# MKL_THREADING_LAYER=GNU to mitigate https://github.com/pytorch/pytorch/issues/37377
MKL_THREADING_LAYER=GNU python benchmarks/dynamo/runner.py --suites="$suite" \
--base-sha="$BASE_SHA" --output-dir="$TEST_REPORTS_DIR" "${partition_flags[@]}" \
--base-sha="$BASE_SHA" "${partition_flags[@]}" \
--no-graphs --no-update-archive --no-gh-comment "$@"
else
python "benchmarks/dynamo/$suite.py" \
Expand All @@ -325,6 +325,7 @@ test_single_dynamo_benchmark() {

test_dynamo_benchmark() {
# Usage: test_dynamo_benchmark huggingface 0
TEST_REPORTS_DIR=$(pwd)/test/test-reports

local suite="$1"
shift
Expand All @@ -335,8 +336,8 @@ test_dynamo_benchmark() {
test_single_dynamo_benchmark "amp" "$suite" "$shard_id" --training --amp "$@"
elif [[ "${TEST_CONFIG}" == *perf* ]]; then
# Performance test training only, for float32 and amp
test_single_dynamo_benchmark "amp" "$suite" "$shard_id" --training --dtypes=amp "$@"
test_single_dynamo_benchmark "float32" "$suite" "$shard_id" --training --dtypes=float32 "$@"
test_single_dynamo_benchmark "amp" "$suite" "$shard_id" --training --dtypes=amp --output-dir="$TEST_REPORTS_DIR"/amp "$@"
test_single_dynamo_benchmark "float32" "$suite" "$shard_id" --training --dtypes=float32 --output-dir="$TEST_REPORTS_DIR"/float32 "$@"
else
# Check inference with --float32
test_single_dynamo_benchmark "inference" "$suite" "$shard_id" --float32 "$@"
Expand Down Expand Up @@ -647,7 +648,7 @@ build_xla() {
apply_patches
SITE_PACKAGES="$(python -c 'from distutils.sysconfig import get_python_lib; print(get_python_lib())')"
# These functions are defined in .circleci/common.sh in pytorch/xla repo
install_deps_pytorch_xla $XLA_DIR $USE_CACHE
retry install_deps_pytorch_xla $XLA_DIR $USE_CACHE
CMAKE_PREFIX_PATH="${SITE_PACKAGES}/torch:${CMAKE_PREFIX_PATH}" XLA_SANDBOX_BUILD=1 build_torch_xla $XLA_DIR
assert_git_not_dirty
}
Expand Down
10 changes: 7 additions & 3 deletions .clang-format
Original file line number Diff line number Diff line change
Expand Up @@ -60,9 +60,6 @@ MacroBlockBegin: ''
MacroBlockEnd: ''
MaxEmptyLinesToKeep: 1
NamespaceIndentation: None
ObjCBlockIndentWidth: 2
ObjCSpaceAfterProperty: false
ObjCSpaceBeforeProtocolList: false
PenaltyBreakBeforeFirstCallParameter: 1
PenaltyBreakComment: 300
PenaltyBreakFirstLessLess: 120
Expand All @@ -85,4 +82,11 @@ SpacesInSquareBrackets: false
Standard: Cpp11
TabWidth: 8
UseTab: Never
---
Language: ObjC
ColumnLimit: 120
AlignAfterOpenBracket: Align
ObjCBlockIndentWidth: 2
ObjCSpaceAfterProperty: false
ObjCSpaceBeforeProtocolList: false
...
2 changes: 1 addition & 1 deletion .github/ci_commit_pins/vision.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
5850f370c03d941f97c7bd53f99a83abb0b9dd01
7d2acaa7d7fc600fa08fca18e9230f8651147025
3 changes: 3 additions & 0 deletions .github/labeler.yml
Original file line number Diff line number Diff line change
Expand Up @@ -54,3 +54,6 @@
- aten/src/ATen/quantized/**
- aten/src/ATen/native/quantized/cpu/**
- test/quantization/**

"ciflow/trunk":
- .ci/docker/ci_commit_pins/triton.txt
26 changes: 26 additions & 0 deletions .github/merge_rules.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,8 @@
- fegin
- kumpera
- yhcharles
- kiukchung
- d4l3k
mandatory_checks_name:
- EasyCLA
- Lint
Expand Down Expand Up @@ -351,6 +353,30 @@
- Lint
- pull

- name: functorch
patterns:
- functorch/**
- test/functorch/**
- torch/_C/__init__.pyi.in
- torch/__init__.py
- torch/csrc/functorch/**
- torch/_functorch/**
- torch/func/**
- aten/src/ATen/functorch/**
- docs/source/func**
- '**vmap**'
- '**functorch**'
- '**pytree**'
approved_by:
- kshiteej12345
- srossross
- chillee
- zou3519
mandatory_checks_name:
- EasyCLA
- Lint
- pull

- name: ROCm
patterns:
- '**rocm**'
Expand Down
37 changes: 29 additions & 8 deletions .github/scripts/build_triton_wheel.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,15 @@
import sys
import shutil
SCRIPT_DIR = Path(__file__).parent
REPO_DIR = SCRIPT_DIR.parent.parent

def read_triton_pin() -> str:
with open(SCRIPT_DIR.parent / "ci_commit_pins" / "triton.txt") as f:
with open(REPO_DIR / ".ci" / "docker" / "ci_commit_pins" / "triton.txt") as f:
return f.read().strip()


def read_triton_version() -> str:
with open(REPO_DIR / ".ci" / "docker" / "triton_version.txt") as f:
return f.read().strip()


Expand All @@ -19,26 +25,35 @@ def check_and_replace(inp: str, src: str, dst: str) -> str:
return inp.replace(src, dst)


def patch_setup_py(path: Path, *, version: str = "2.0.0", name: str = "triton") -> None:
def patch_setup_py(path: Path, *, version: str, name: str = "triton") -> None:
with open(path) as f:
orig = f.read()
# Replace name
orig = check_and_replace(orig, "name=\"triton\",", f"name=\"{name}\",")
# Replace version
orig = check_and_replace(orig, "version=\"2.0.0\",", f"version=\"{version}\",")
orig = check_and_replace(orig, f"version=\"{read_triton_version()}\",", f"version=\"{version}\",")
with open(path, "w") as f:
f.write(orig)


def patch_init_py(path: Path, *, version: str) -> None:
with open(path) as f:
orig = f.read()
# Replace version
orig = check_and_replace(orig, "__version__ = '2.0.0'", f"__version__ = \"{version}\"")
with open(path, "w") as f:
f.write(orig)


def build_triton(commit_hash: str, build_conda: bool = False, py_version : Optional[str] = None) -> Path:
def build_triton(*, version: str, commit_hash: str, build_conda: bool = False, py_version : Optional[str] = None) -> Path:
with TemporaryDirectory() as tmpdir:
triton_basedir = Path(tmpdir) / "triton"
triton_pythondir = triton_basedir / "python"
check_call(["git", "clone", "https://github.com/openai/triton"], cwd=tmpdir)
check_call(["git", "checkout", commit_hash], cwd=triton_basedir)
if build_conda:
with open(triton_basedir / "meta.yaml", "w") as meta:
print(f"package:\n name: torchtriton\n version: 2.0.0+{commit_hash[:10]}\n", file=meta)
print(f"package:\n name: torchtriton\n version: {version}+{commit_hash[:10]}\n", file=meta)
print("source:\n path: .\n", file=meta)
print("build:\n string: py{{py}}\n number: 1\n script: cd python; "
"python setup.py install --single-version-externally-managed --record=record.txt\n", file=meta)
Expand All @@ -47,6 +62,7 @@ def build_triton(commit_hash: str, build_conda: bool = False, py_version : Optio
print("about:\n home: https://github.com/openai/triton\n license: MIT\n summary:"
" 'A language and compiler for custom Deep Learning operation'", file=meta)

patch_init_py(triton_pythondir / "triton" / "__init__.py", version=f"{version}+{commit_hash[:10]}")
if py_version is None:
py_version = f"{sys.version_info.major}.{sys.version_info.minor}"
check_call(["conda", "build", "--python", py_version,
Expand All @@ -55,7 +71,8 @@ def build_triton(commit_hash: str, build_conda: bool = False, py_version : Optio
shutil.copy(conda_path, Path.cwd())
return Path.cwd() / conda_path.name

patch_setup_py(triton_pythondir / "setup.py", name="pytorch-triton", version=f"2.0.0+{commit_hash[:10]}")
patch_setup_py(triton_pythondir / "setup.py", name="pytorch-triton", version=f"{version}+{commit_hash[:10]}")
patch_init_py(triton_pythondir / "triton" / "__init__.py", version=f"{version}+{commit_hash[:10]}")
check_call([sys.executable, "setup.py", "bdist_wheel"], cwd=triton_pythondir)
whl_path = list((triton_pythondir / "dist").glob("*.whl"))[0]
shutil.copy(whl_path, Path.cwd())
Expand All @@ -67,9 +84,13 @@ def main() -> None:
parser = ArgumentParser("Build Triton binaries")
parser.add_argument("--build-conda", action="store_true")
parser.add_argument("--py-version", type=str)
parser.add_argument("--commit-hash", type=str, default=read_triton_pin())
parser.add_argument("--triton-version", type=str, default=read_triton_version())
args = parser.parse_args()
pin = read_triton_pin()
build_triton(pin, build_conda=args.build_conda, py_version=args.py_version)
build_triton(commit_hash=args.commit_hash,
version=args.triton_version,
build_conda=args.build_conda,
py_version=args.py_version)


if __name__ == "__main__":
Expand Down
35 changes: 3 additions & 32 deletions .github/scripts/test_trymerge.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,6 @@

from trymerge import (
find_matching_merge_rule,
get_land_checkrun_conclusions,
validate_land_time_checks,
gh_graphql,
gh_get_team_members,
read_merge_rules,
Expand All @@ -25,7 +23,6 @@
PostCommentError,
FlakyRule,
categorize_checks,
get_combined_checks_from_pr_and_land_validation,
get_rockset_results,
main as trymerge_main,
get_classifications,
Expand Down Expand Up @@ -111,9 +108,6 @@ def __init__(self) -> None:
self.pr_num = 76123
self.dry_run = True
self.comment_id = 0
self.on_mandatory = False
self.on_green = False
self.land_checks = False
self.reason = 'this is for testing'

return Object()
Expand All @@ -128,9 +122,6 @@ def mock_merge(pr_num: int, repo: GitRepo,
dry_run: bool = False,
skip_mandatory_checks: bool = False,
comment_id: Optional[int] = None,
mandatory_only: bool = False,
on_green: bool = False,
land_checks: bool = False,
timeout_minutes: int = 400,
stale_pr_days: int = 3) -> None:
pass
Expand Down Expand Up @@ -346,20 +337,6 @@ def test_cancelled_gets_ignored(self, *args: Any) -> None:
self.assertTrue(len(lint_checks) > 0)
self.assertTrue(all([conclusions[name].status == "SUCCESS" for name in lint_checks]))

def test_get_many_land_checks(self, *args: Any) -> None:
""" Tests that all checkruns can be fetched for a commit
"""
conclusions = get_land_checkrun_conclusions('pytorch', 'pytorch', '6882717f73deffb692219ccd1fd6db258d8ed684')
self.assertEqual(len(conclusions), 98)
self.assertTrue("pull / linux-docs / build-docs (cpp)" in conclusions.keys())

def test_failed_land_checks(self, *args: Any) -> None:
""" Tests that PR with Land Checks fail with a RunTime error
"""
self.assertRaisesRegex(RuntimeError,
".*Failed to merge; some land checks failed.*",
lambda: validate_land_time_checks('pytorch', 'pytorch', '6882717f73deffb692219ccd1fd6db258d8ed684'))

@mock.patch('trymerge.gh_get_pr_info', return_value=mock_gh_get_info())
@mock.patch('trymerge.parse_args', return_value=mock_parse_args(True, False))
@mock.patch('trymerge.try_revert', side_effect=mock_revert)
Expand All @@ -376,10 +353,7 @@ def test_main_force(self, mock_merge: Any, mock_parse_args: Any, *args: Any) ->
mock.ANY,
dry_run=mock.ANY,
skip_mandatory_checks=True,
comment_id=mock.ANY,
on_green=False,
land_checks=False,
mandatory_only=False)
comment_id=mock.ANY)

@mock.patch('trymerge.gh_get_pr_info', return_value=mock_gh_get_info())
@mock.patch('trymerge.parse_args', return_value=mock_parse_args(False, False))
Expand All @@ -390,10 +364,7 @@ def test_main_merge(self, mock_merge: Any, *args: Any) -> None:
mock.ANY,
dry_run=mock.ANY,
skip_mandatory_checks=False,
comment_id=mock.ANY,
on_green=False,
land_checks=False,
mandatory_only=False)
comment_id=mock.ANY)

@mock.patch('trymerge.read_merge_rules', side_effect=mocked_read_merge_rules)
def test_revert_rules(self, *args: Any) -> None:
Expand Down Expand Up @@ -453,7 +424,7 @@ class TestBypassFailures(TestCase):
def test_get_classifications(self, *args: Any) -> None:
flaky_rules = [FlakyRule("distributed", ["##[error]The operation was canceled."])]
pr = GitHubPR("pytorch", "pytorch", 92863)
checks = get_combined_checks_from_pr_and_land_validation(pr, None)
checks = pr.get_checkrun_conclusions()
checks = get_classifications(pr.last_commit()['oid'], pr.get_merge_base(), checks, flaky_rules)
self.assertTrue(
checks[
Expand Down

0 comments on commit efc5fa6

Please sign in to comment.