Skip to content

Commit

Permalink
Update on "Support randn_like() for NT"
Browse files Browse the repository at this point in the history
To satisfy an internal ask.

[ghstack-poisoned]
  • Loading branch information
jbschlosser committed Mar 13, 2023
2 parents bf54268 + 3c0f0b0 commit 2b4d3c2
Show file tree
Hide file tree
Showing 400 changed files with 19,507 additions and 14,236 deletions.
2 changes: 1 addition & 1 deletion .ci/docker/ci_commit_pins/triton.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
b8b470bc597c1c5bd03682c09fe3e6b7c53787fd
2c32f4399986045ff25cae201ed3b16d922a9d3b
6 changes: 3 additions & 3 deletions .ci/docker/requirements-ci.txt
Original file line number Diff line number Diff line change
Expand Up @@ -244,9 +244,9 @@ unittest-xml-reporting<=3.2.0,>=2.0.0
#Pinned versions:
#test that import:

lintrunner==0.9.2
#Description: all about linters
#Pinned versions: 0.9.2
lintrunner==0.10.7
#Description: all about linters!
#Pinned versions: 0.10.7
#test that import:

rockset==1.0.3
Expand Down
1 change: 1 addition & 0 deletions .ci/docker/triton_version.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
2.1.0
3 changes: 2 additions & 1 deletion .ci/docker/ubuntu-cuda/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -91,8 +91,9 @@ ARG TRITON
COPY ./common/install_triton.sh install_triton.sh
COPY ./common/common_utils.sh common_utils.sh
COPY ci_commit_pins/triton.txt triton.txt
COPY triton_version.txt triton_version.txt
RUN if [ -n "${TRITON}" ]; then bash ./install_triton.sh; fi
RUN rm install_triton.sh common_utils.sh triton.txt
RUN rm install_triton.sh common_utils.sh triton.txt triton_version.txt

# Install ccache/sccache (do this last, so we get priority in PATH)
COPY ./common/install_cache.sh install_cache.sh
Expand Down
5 changes: 2 additions & 3 deletions .ci/onnx/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -61,10 +61,9 @@ if [[ "$BUILD_ENVIRONMENT" == *onnx* ]]; then
pip install -q --user --no-use-pep517 "git+https://github.com/pytorch/vision.git@$(cat .github/ci_commit_pins/vision.txt)"
pip install -q --user transformers==4.25.1
pip install -q --user ninja flatbuffers==2.0 numpy==1.22.4 onnxruntime==1.14.0 beartype==0.10.4
# TODO: change this when onnx 1.13.1 is released.
pip install --no-use-pep517 'onnx @ git+https://github.com/onnx/onnx@e192ba01e438d22ca2dedd7956e28e3551626c91'
pip install -q --user onnx==1.13.1
# TODO: change this when onnx-script is on testPypi
pip install 'onnx-script @ git+https://github.com/microsoft/onnx-script@0298154caf6b46fc4e30abba034095c1290c26e3'
pip install 'onnx-script @ git+https://github.com/microsoft/onnx-script@29241e15f5182be1384f1cf6ba203d7e2e125196'
# numba requires numpy <= 1.20, onnxruntime requires numpy >= 1.21.
# We don't actually need it for our tests, but it's imported if it's present, so uninstall.
pip uninstall -q --yes numba
Expand Down
3 changes: 0 additions & 3 deletions .ci/pytorch/multigpu-test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@ time python test/run_test.py --verbose -i distributed/checkpoint/test_checkpoint
time python test/run_test.py --verbose -i distributed/checkpoint/test_file_system_checkpoint
time python test/run_test.py --verbose -i distributed/_shard/sharding_spec/test_sharding_spec
time python test/run_test.py --verbose -i distributed/_shard/sharding_plan/test_sharding_plan
time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/test_megatron_prototype
time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/test_sharded_tensor
time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/test_sharded_tensor_reshard
time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/ops/test_chunk
Expand All @@ -36,12 +35,10 @@ time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/ops/
time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/ops/test_embedding_bag
time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/ops/test_binary_cmp
time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/ops/test_init
time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/ops/test_linear
time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/ops/test_math_ops
time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/ops/test_matrix_ops
time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/ops/test_softmax
time python test/run_test.py --verbose -i distributed/_shard/sharded_optim/test_sharded_optim
time python test/run_test.py --verbose -i distributed/_shard/test_partial_tensor
# Other tests
time python test/run_test.py --verbose -i test_cuda_primary_ctx
time python test/run_test.py --verbose -i test_optim -- -k optimizers_with_varying_tensors
Expand Down
25 changes: 17 additions & 8 deletions .ci/pytorch/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -298,13 +298,19 @@ test_single_dynamo_benchmark() {

local partition_flags=()
if [[ -n "$NUM_TEST_SHARDS" && -n "$shard_id" ]]; then
partition_flags=( --total-partitions 2 --partition-id "$shard_id" )
partition_flags=( --total-partitions "$NUM_TEST_SHARDS" --partition-id "$shard_id" )
fi

if [[ "${TEST_CONFIG}" == *perf* ]]; then
if [[ "${TEST_CONFIG}" == *perf_compare* ]]; then
python "benchmarks/dynamo/$suite.py" \
--ci --performance --disable-cudagraphs \
"${DYNAMO_BENCHMARK_FLAGS[@]}" \
"$@" "${partition_flags[@]}" \
--output "$TEST_REPORTS_DIR/${name}_${suite}.csv"
elif [[ "${TEST_CONFIG}" == *perf* ]]; then
# MKL_THREADING_LAYER=GNU to mitigate https://github.com/pytorch/pytorch/issues/37377
MKL_THREADING_LAYER=GNU python benchmarks/dynamo/runner.py --suites="$suite" \
--base-sha="$BASE_SHA" --output-dir="$TEST_REPORTS_DIR" "${partition_flags[@]}" \
--base-sha="$BASE_SHA" "${partition_flags[@]}" \
--no-graphs --no-update-archive --no-gh-comment "$@"
else
python "benchmarks/dynamo/$suite.py" \
Expand All @@ -319,21 +325,24 @@ test_single_dynamo_benchmark() {

test_dynamo_benchmark() {
# Usage: test_dynamo_benchmark huggingface 0
TEST_REPORTS_DIR=$(pwd)/test/test-reports

local suite="$1"
shift
local shard_id="$1"
shift

if [[ "${TEST_CONFIG}" == *perf* ]]; then
if [[ "${TEST_CONFIG}" == *perf_compare* ]]; then
test_single_dynamo_benchmark "amp" "$suite" "$shard_id" --training --amp "$@"
elif [[ "${TEST_CONFIG}" == *perf* ]]; then
# Performance test training only, for float32 and amp
test_single_dynamo_benchmark "amp" "$suite" "$shard_id" --training --dtypes=amp "$@"
test_single_dynamo_benchmark "float32" "$suite" "$shard_id" --training --dtypes=float32 "$@"
test_single_dynamo_benchmark "amp" "$suite" "$shard_id" --training --dtypes=amp --output-dir="$TEST_REPORTS_DIR"/amp "$@"
test_single_dynamo_benchmark "float32" "$suite" "$shard_id" --training --dtypes=float32 --output-dir="$TEST_REPORTS_DIR"/float32 "$@"
else
# Check inference with --float32
test_single_dynamo_benchmark "inference" "$suite" "$shard_id" --float32 "$@"

if [[ "${TEST_CONFIG}" != *cpu_accuracy* && "${TEST_CONFIG}" != *dynamic* ]]; then
if [[ "${TEST_CONFIG}" != *cpu_accuracy* ]]; then
# Check training with --amp
test_single_dynamo_benchmark "training" "$suite" "$shard_id" --training --amp "$@"
fi
Expand Down Expand Up @@ -639,7 +648,7 @@ build_xla() {
apply_patches
SITE_PACKAGES="$(python -c 'from distutils.sysconfig import get_python_lib; print(get_python_lib())')"
# These functions are defined in .circleci/common.sh in pytorch/xla repo
install_deps_pytorch_xla $XLA_DIR $USE_CACHE
retry install_deps_pytorch_xla $XLA_DIR $USE_CACHE
CMAKE_PREFIX_PATH="${SITE_PACKAGES}/torch:${CMAKE_PREFIX_PATH}" XLA_SANDBOX_BUILD=1 build_torch_xla $XLA_DIR
assert_git_not_dirty
}
Expand Down
10 changes: 7 additions & 3 deletions .clang-format
Original file line number Diff line number Diff line change
Expand Up @@ -60,9 +60,6 @@ MacroBlockBegin: ''
MacroBlockEnd: ''
MaxEmptyLinesToKeep: 1
NamespaceIndentation: None
ObjCBlockIndentWidth: 2
ObjCSpaceAfterProperty: false
ObjCSpaceBeforeProtocolList: false
PenaltyBreakBeforeFirstCallParameter: 1
PenaltyBreakComment: 300
PenaltyBreakFirstLessLess: 120
Expand All @@ -85,4 +82,11 @@ SpacesInSquareBrackets: false
Standard: Cpp11
TabWidth: 8
UseTab: Never
---
Language: ObjC
ColumnLimit: 120
AlignAfterOpenBracket: Align
ObjCBlockIndentWidth: 2
ObjCSpaceAfterProperty: false
ObjCSpaceBeforeProtocolList: false
...
8 changes: 7 additions & 1 deletion .github/actions/setup-linux/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,13 @@ runs:
# Pulled from instance metadata endpoint for EC2
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
category=$1
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
# If it is GCP runner (runner name contains gcp), do not run this
runner_name_str=${{ runner.name }}
if [[ $runner_name_str != *"gcp"* ]]; then
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
else
echo "Runner is from Google Cloud Platform, No info on ec2 metadata"
fi
}
echo "ami-id: $(get_ec2_metadata ami-id)"
echo "instance-id: $(get_ec2_metadata instance-id)"
Expand Down
2 changes: 1 addition & 1 deletion .github/ci_commit_pins/vision.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
beb4bb706b5e13009cb5d5586505c6d2896d184a
7d2acaa7d7fc600fa08fca18e9230f8651147025
1 change: 1 addition & 0 deletions .github/pytorch-probot.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ ciflow_push_tags:
- ciflow/binaries_libtorch
- ciflow/binaries_wheel
- ciflow/inductor
- ciflow/inductor-perf-compare
- ciflow/inductor-perf-test-nightly
- ciflow/mps
- ciflow/nightly
Expand Down
2 changes: 1 addition & 1 deletion .github/requirements-gha-cache.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
# .ci/docker/requirements-ci.txt
boto3==1.19.12
jinja2==3.0.1
lintrunner==0.9.2
lintrunner==0.10.7
ninja==1.10.0.post1
nvidia-ml-py==11.525.84
pyyaml==6.0
Expand Down
37 changes: 29 additions & 8 deletions .github/scripts/build_triton_wheel.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,15 @@
import sys
import shutil
SCRIPT_DIR = Path(__file__).parent
REPO_DIR = SCRIPT_DIR.parent.parent

def read_triton_pin() -> str:
with open(SCRIPT_DIR.parent / "ci_commit_pins" / "triton.txt") as f:
with open(REPO_DIR / ".ci" / "docker" / "ci_commit_pins" / "triton.txt") as f:
return f.read().strip()


def read_triton_version() -> str:
with open(REPO_DIR / ".ci" / "docker" / "triton_version.txt") as f:
return f.read().strip()


Expand All @@ -19,26 +25,35 @@ def check_and_replace(inp: str, src: str, dst: str) -> str:
return inp.replace(src, dst)


def patch_setup_py(path: Path, *, version: str = "2.0.0", name: str = "triton") -> None:
def patch_setup_py(path: Path, *, version: str, name: str = "triton") -> None:
with open(path) as f:
orig = f.read()
# Replace name
orig = check_and_replace(orig, "name=\"triton\",", f"name=\"{name}\",")
# Replace version
orig = check_and_replace(orig, "version=\"2.0.0\",", f"version=\"{version}\",")
orig = check_and_replace(orig, f"version=\"{read_triton_version()}\",", f"version=\"{version}\",")
with open(path, "w") as f:
f.write(orig)


def patch_init_py(path: Path, *, version: str) -> None:
with open(path) as f:
orig = f.read()
# Replace version
orig = check_and_replace(orig, "__version__ = '2.0.0'", f"__version__ = \"{version}\"")
with open(path, "w") as f:
f.write(orig)


def build_triton(commit_hash: str, build_conda: bool = False, py_version : Optional[str] = None) -> Path:
def build_triton(*, version: str, commit_hash: str, build_conda: bool = False, py_version : Optional[str] = None) -> Path:
with TemporaryDirectory() as tmpdir:
triton_basedir = Path(tmpdir) / "triton"
triton_pythondir = triton_basedir / "python"
check_call(["git", "clone", "https://github.com/openai/triton"], cwd=tmpdir)
check_call(["git", "checkout", commit_hash], cwd=triton_basedir)
if build_conda:
with open(triton_basedir / "meta.yaml", "w") as meta:
print(f"package:\n name: torchtriton\n version: 2.0.0+{commit_hash[:10]}\n", file=meta)
print(f"package:\n name: torchtriton\n version: {version}+{commit_hash[:10]}\n", file=meta)
print("source:\n path: .\n", file=meta)
print("build:\n string: py{{py}}\n number: 1\n script: cd python; "
"python setup.py install --single-version-externally-managed --record=record.txt\n", file=meta)
Expand All @@ -47,6 +62,7 @@ def build_triton(commit_hash: str, build_conda: bool = False, py_version : Optio
print("about:\n home: https://github.com/openai/triton\n license: MIT\n summary:"
" 'A language and compiler for custom Deep Learning operation'", file=meta)

patch_init_py(triton_pythondir / "triton" / "__init__.py", version=f"{version}+{commit_hash[:10]}")
if py_version is None:
py_version = f"{sys.version_info.major}.{sys.version_info.minor}"
check_call(["conda", "build", "--python", py_version,
Expand All @@ -55,7 +71,8 @@ def build_triton(commit_hash: str, build_conda: bool = False, py_version : Optio
shutil.copy(conda_path, Path.cwd())
return Path.cwd() / conda_path.name

patch_setup_py(triton_pythondir / "setup.py", name="pytorch-triton", version=f"2.0.0+{commit_hash[:10]}")
patch_setup_py(triton_pythondir / "setup.py", name="pytorch-triton", version=f"{version}+{commit_hash[:10]}")
patch_init_py(triton_pythondir / "triton" / "__init__.py", version=f"{version}+{commit_hash[:10]}")
check_call([sys.executable, "setup.py", "bdist_wheel"], cwd=triton_pythondir)
whl_path = list((triton_pythondir / "dist").glob("*.whl"))[0]
shutil.copy(whl_path, Path.cwd())
Expand All @@ -67,9 +84,13 @@ def main() -> None:
parser = ArgumentParser("Build Triton binaries")
parser.add_argument("--build-conda", action="store_true")
parser.add_argument("--py-version", type=str)
parser.add_argument("--commit-hash", type=str, default=read_triton_pin())
parser.add_argument("--triton-version", type=str, default=read_triton_version())
args = parser.parse_args()
pin = read_triton_pin()
build_triton(pin, build_conda=args.build_conda, py_version=args.py_version)
build_triton(commit_hash=args.commit_hash,
version=args.triton_version,
build_conda=args.build_conda,
py_version=args.py_version)


if __name__ == "__main__":
Expand Down
12 changes: 0 additions & 12 deletions .github/scripts/generate_ci_workflows.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,18 +271,6 @@ class OperatingSystem:
isolated_workflow=True,
),
),
BinaryBuildWorkflow(
os=OperatingSystem.MACOS,
package_type="libtorch",
abi_version=generate_binary_build_matrix.PRE_CXX11_ABI,
build_configs=generate_binary_build_matrix.generate_libtorch_matrix(
OperatingSystem.MACOS, generate_binary_build_matrix.PRE_CXX11_ABI
),
ciflow_config=CIFlowConfig(
labels={LABEL_CIFLOW_BINARIES, LABEL_CIFLOW_BINARIES_LIBTORCH},
isolated_workflow=True,
),
),
BinaryBuildWorkflow(
os=OperatingSystem.MACOS_ARM64,
package_type="wheel",
Expand Down
35 changes: 3 additions & 32 deletions .github/scripts/test_trymerge.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,6 @@

from trymerge import (
find_matching_merge_rule,
get_land_checkrun_conclusions,
validate_land_time_checks,
gh_graphql,
gh_get_team_members,
read_merge_rules,
Expand All @@ -25,7 +23,6 @@
PostCommentError,
FlakyRule,
categorize_checks,
get_combined_checks_from_pr_and_land_validation,
get_rockset_results,
main as trymerge_main,
get_classifications,
Expand Down Expand Up @@ -111,9 +108,6 @@ def __init__(self) -> None:
self.pr_num = 76123
self.dry_run = True
self.comment_id = 0
self.on_mandatory = False
self.on_green = False
self.land_checks = False
self.reason = 'this is for testing'

return Object()
Expand All @@ -128,9 +122,6 @@ def mock_merge(pr_num: int, repo: GitRepo,
dry_run: bool = False,
skip_mandatory_checks: bool = False,
comment_id: Optional[int] = None,
mandatory_only: bool = False,
on_green: bool = False,
land_checks: bool = False,
timeout_minutes: int = 400,
stale_pr_days: int = 3) -> None:
pass
Expand Down Expand Up @@ -346,20 +337,6 @@ def test_cancelled_gets_ignored(self, *args: Any) -> None:
self.assertTrue(len(lint_checks) > 0)
self.assertTrue(all([conclusions[name].status == "SUCCESS" for name in lint_checks]))

def test_get_many_land_checks(self, *args: Any) -> None:
""" Tests that all checkruns can be fetched for a commit
"""
conclusions = get_land_checkrun_conclusions('pytorch', 'pytorch', '6882717f73deffb692219ccd1fd6db258d8ed684')
self.assertEqual(len(conclusions), 98)
self.assertTrue("pull / linux-docs / build-docs (cpp)" in conclusions.keys())

def test_failed_land_checks(self, *args: Any) -> None:
""" Tests that PR with Land Checks fail with a RunTime error
"""
self.assertRaisesRegex(RuntimeError,
".*Failed to merge; some land checks failed.*",
lambda: validate_land_time_checks('pytorch', 'pytorch', '6882717f73deffb692219ccd1fd6db258d8ed684'))

@mock.patch('trymerge.gh_get_pr_info', return_value=mock_gh_get_info())
@mock.patch('trymerge.parse_args', return_value=mock_parse_args(True, False))
@mock.patch('trymerge.try_revert', side_effect=mock_revert)
Expand All @@ -376,10 +353,7 @@ def test_main_force(self, mock_merge: Any, mock_parse_args: Any, *args: Any) ->
mock.ANY,
dry_run=mock.ANY,
skip_mandatory_checks=True,
comment_id=mock.ANY,
on_green=False,
land_checks=False,
mandatory_only=False)
comment_id=mock.ANY)

@mock.patch('trymerge.gh_get_pr_info', return_value=mock_gh_get_info())
@mock.patch('trymerge.parse_args', return_value=mock_parse_args(False, False))
Expand All @@ -390,10 +364,7 @@ def test_main_merge(self, mock_merge: Any, *args: Any) -> None:
mock.ANY,
dry_run=mock.ANY,
skip_mandatory_checks=False,
comment_id=mock.ANY,
on_green=False,
land_checks=False,
mandatory_only=False)
comment_id=mock.ANY)

@mock.patch('trymerge.read_merge_rules', side_effect=mocked_read_merge_rules)
def test_revert_rules(self, *args: Any) -> None:
Expand Down Expand Up @@ -453,7 +424,7 @@ class TestBypassFailures(TestCase):
def test_get_classifications(self, *args: Any) -> None:
flaky_rules = [FlakyRule("distributed", ["##[error]The operation was canceled."])]
pr = GitHubPR("pytorch", "pytorch", 92863)
checks = get_combined_checks_from_pr_and_land_validation(pr, None)
checks = pr.get_checkrun_conclusions()
checks = get_classifications(pr.last_commit()['oid'], pr.get_merge_base(), checks, flaky_rules)
self.assertTrue(
checks[
Expand Down

0 comments on commit 2b4d3c2

Please sign in to comment.