Skip to content

Commit

Permalink
Update on "Fix hybrid sparse COO tensor conversion to meta tensor"
Browse files Browse the repository at this point in the history
As in the title.

Addresses a bug reported in #117907 (comment)




cc alexsamardzic nikitaved cpuhrsch amjames bhosmer jcaip

[ghstack-poisoned]
  • Loading branch information
pearu committed Apr 29, 2024
2 parents 9510ac7 + db204db commit 3bb888e
Show file tree
Hide file tree
Showing 256 changed files with 3,161 additions and 38,971 deletions.
8 changes: 8 additions & 0 deletions .ci/docker/build.sh
Expand Up @@ -306,6 +306,12 @@ case "$image" in
DB=yes
VISION=yes
CONDA_CMAKE=yes
# snadampal: skipping sccache due to the following issue
# https://github.com/pytorch/pytorch/issues/121559
SKIP_SCCACHE_INSTALL=yes
# snadampal: skipping llvm src build install because the current version
# from pytorch/llvm:9.0.1 is x86 specific
SKIP_LLVM_SRC_BUILD_INSTALL=yes
;;
*)
# Catch-all for builds that are not hardcoded.
Expand Down Expand Up @@ -399,6 +405,8 @@ DOCKER_BUILDKIT=1 docker build \
--build-arg "EXECUTORCH=${EXECUTORCH}" \
--build-arg "BASEKIT_VERSION=${BASEKIT_VERSION}" \
--build-arg "ACL=${ACL:-}" \
--build-arg "SKIP_SCCACHE_INSTALL=${SKIP_SCCACHE_INSTALL:-}" \
--build-arg "SKIP_LLVM_SRC_BUILD_INSTALL=${SKIP_LLVM_SRC_BUILD_INSTALL:-}" \
-f $(dirname ${DOCKERFILE})/Dockerfile \
-t "$tmp_tag" \
"$@" \
Expand Down
5 changes: 3 additions & 2 deletions .ci/docker/requirements-ci.txt
Expand Up @@ -263,10 +263,11 @@ unittest-xml-reporting<=3.2.0,>=2.0.0
#Pinned versions:
#test that import:

#wheel not found on aarch64, and source build requires rust
lintrunner==0.10.7 ; platform_machine == "x86_64"
#lintrunner is supported on aarch64-linux only from 0.12.4 version
lintrunner==0.12.5 ; platform_machine == "aarch64"
#Description: all about linters!
#Pinned versions: 0.10.7
#Pinned versions: 0.10.7 on x86 and 0.12.5 on aarch64
#test that import:

rockset==1.0.3
Expand Down
6 changes: 5 additions & 1 deletion .ci/docker/ubuntu/Dockerfile
Expand Up @@ -169,9 +169,11 @@ RUN rm install_acl.sh
ENV INSTALLED_ACL ${ACL}

# Install ccache/sccache (do this last, so we get priority in PATH)
ARG SKIP_SCCACHE_INSTALL
COPY ./common/install_cache.sh install_cache.sh
ENV PATH /opt/cache/bin:$PATH
RUN bash ./install_cache.sh && rm install_cache.sh
RUN if [ -z "${SKIP_SCCACHE_INSTALL}" ]; then bash ./install_cache.sh; fi
RUN rm install_cache.sh

# Add jni.h for java host build
COPY ./common/install_jni.sh install_jni.sh
Expand All @@ -188,7 +190,9 @@ ARG BUILD_ENVIRONMENT
ENV BUILD_ENVIRONMENT ${BUILD_ENVIRONMENT}

# Install LLVM dev version (Defined in the pytorch/builder github repository)
ARG SKIP_LLVM_SRC_BUILD_INSTALL
COPY --from=pytorch/llvm:9.0.1 /opt/llvm /opt/llvm
RUN if [ -n "${SKIP_LLVM_SRC_BUILD_INSTALL}" ]; then set -eu; rm -rf /opt/llvm; fi

# AWS specific CUDA build guidance
ENV TORCH_CUDA_ARCH_LIST Maxwell
Expand Down
6 changes: 5 additions & 1 deletion .ci/pytorch/build.sh
Expand Up @@ -376,4 +376,8 @@ if [[ "$BUILD_ENVIRONMENT" != *libtorch* && "$BUILD_ENVIRONMENT" != *bazel* ]];
python tools/stats/export_test_times.py
fi

print_sccache_stats
# snadampal: skipping it till sccache support added for aarch64
# https://github.com/pytorch/pytorch/issues/121559
if [[ "$BUILD_ENVIRONMENT" != *aarch64* ]]; then
print_sccache_stats
fi
5 changes: 5 additions & 0 deletions .ci/pytorch/test.sh
Expand Up @@ -181,6 +181,11 @@ if [[ "$BUILD_ENVIRONMENT" != *-bazel-* ]] ; then
export PATH="$HOME/.local/bin:$PATH"
fi

if [[ "$BUILD_ENVIRONMENT" == *aarch64* ]]; then
# TODO: revisit this once the CI is stabilized on aarch64 linux
export VALGRIND=OFF
fi

install_tlparse

# DANGER WILL ROBINSON. The LD_PRELOAD here could cause you problems
Expand Down
1 change: 1 addition & 0 deletions .github/pytorch-probot.yml
Expand Up @@ -8,6 +8,7 @@ ciflow_push_tags:
- ciflow/binaries_wheel
- ciflow/inductor
- ciflow/inductor-perf-compare
- ciflow/linux-aarch64
- ciflow/mps
- ciflow/nightly
- ciflow/periodic
Expand Down
38 changes: 38 additions & 0 deletions .github/workflows/linux-aarch64.yml
@@ -0,0 +1,38 @@
name: linux-aarch64

on:
# For testing purposes, removeme later
pull_request:
push:
tags:
- ciflow/linux-aarch64/*
workflow_dispatch:

concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }} but found ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
cancel-in-progress: true

jobs:
linux-jammy-aarch64-py3_10-build:
name: linux-jammy-aarch64-py3.10
uses: ./.github/workflows/_linux-build.yml
with:
build-environment: linux-jammy-aarch64-py3.10
docker-image-name: pytorch-linux-jammy-aarch64-py3.10-gcc11
runner: linux.arm64.2xlarge
test-matrix: |
{ include: [
{ config: "default", shard: 1, num_shards: 1, runner: "linux.arm64.2xlarge" },
]}
linux-jammy-aarch64-py3_10-test:
name: linux-jammy-aarch64-py3.10
uses: ./.github/workflows/_linux-test.yml
needs: linux-jammy-aarch64-py3_10-build
permissions:
id-token: write
contents: read
with:
build-environment: linux-jammy-aarch64-py3.10
docker-image: ${{ needs.linux-jammy-aarch64-py3_10-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-jammy-aarch64-py3_10-build.outputs.test-matrix }}
3 changes: 0 additions & 3 deletions .lintrunner.toml
Expand Up @@ -1051,8 +1051,6 @@ exclude_patterns = [
'test/quantization/fx/test_numeric_suite_fx.py',
'test/quantization/fx/test_quantize_fx.py',
'test/quantization/fx/test_subgraph_rewriter.py',
'test/test_custom_op_testing.py',
'test/test_dataloader.py',
'test/test_datapipe.py',
'test/test_decomp.py',
'test/test_deploy.py',
Expand All @@ -1065,7 +1063,6 @@ exclude_patterns = [
'test/test_function_schema.py',
'test/test_functional_autograd_benchmark.py',
'test/test_functional_optim.py',
'test/test_functionalization.py',
'test/test_functionalization_of_rng_ops.py',
'test/test_futures.py',
'test/test_fx.py',
Expand Down
52 changes: 1 addition & 51 deletions BUILD.bazel
Expand Up @@ -446,30 +446,13 @@ cu_library(
# caffe2
CAFFE2_COPTS = COMMON_COPTS + [
"-Dcaffe2_EXPORTS",
"-DCAFFE2_USE_GLOO",
"-DCAFFE2_USE_CUDNN",
"-DCAFFE2_BUILD_MAIN_LIB",
"-fvisibility-inlines-hidden",
"-fno-math-errno",
"-fno-trapping-math",
]

filegroup(
name = "caffe2_contrib_srcs",
srcs = [
"caffe2/contrib/aten/aten_op.cc",
"caffe2/contrib/gloo/allgather_ops.cc",
"caffe2/contrib/gloo/allreduce_ops.cc",
"caffe2/contrib/gloo/barrier_ops.cc",
"caffe2/contrib/gloo/broadcast_ops.cc",
"caffe2/contrib/gloo/common.cc",
"caffe2/contrib/gloo/common_world_ops.cc",
"caffe2/contrib/gloo/context.cc",
"caffe2/contrib/gloo/reduce_scatter_ops.cc",
"caffe2/contrib/gloo/store_handler.cc",
],
)

filegroup(
name = "caffe2_core_srcs",
srcs = [
Expand Down Expand Up @@ -1024,10 +1007,6 @@ filegroup(
filegroup(
name = "caffe2_cuda_cpp_srcs",
srcs = [
"caffe2/contrib/aten/aten_op_gpu.cc",
"caffe2/contrib/gloo/allreduce_ops_gpu.cc",
"caffe2/contrib/gloo/broadcast_ops_gpu.cc",
"caffe2/contrib/gloo/common_world_ops_gpu.cc",
"caffe2/core/blob_serialization_gpu.cc",
"caffe2/core/common_cudnn.cc",
"caffe2/core/common_gpu.cc",
Expand Down Expand Up @@ -1271,35 +1250,10 @@ cc_library(
],
)

py_binary(
name = "gen_op",
srcs = ["caffe2/contrib/aten/gen_op.py"],
deps = ["//torchgen"],
)

genrule(
name = "generated_caffe2_aten_op_headers",
srcs = [
"caffe2/contrib/aten/aten_op_template.h",
"aten/src/ATen/Declarations.yaml",
],
outs = ["caffe2/caffe2/contrib/aten/gen_aten_op.h"],
cmd = """
$(location :gen_op) \
--output_prefix gen_ \
--install_dir $(@D) \
--aten_root `dirname $(location aten/src/ATen/Declarations.yaml)`/../.. \
--template_dir `dirname $(location caffe2/contrib/aten/aten_op_template.h)` \
--yaml_dir `dirname $(location aten/src/ATen/Declarations.yaml)`""",
tools = [":gen_op"],
)

cc_library(
name = "caffe2_headers",
hdrs = glob(
[
"caffe2/contrib/aten/*.h",
"caffe2/contrib/gloo/*.h",
"caffe2/core/*.h",
"caffe2/core/nomnigraph/include/nomnigraph/Converters/*.h",
"caffe2/core/nomnigraph/include/nomnigraph/Generated/*.h",
Expand Down Expand Up @@ -1338,10 +1292,9 @@ cc_library(
) + if_cuda(glob([
"caffe2/**/*.cuh",
"caffe2/image/*.h",
])) + [":generated_caffe2_aten_op_headers"],
])),
copts = CAFFE2_COPTS,
includes = [
"caffe2/contrib/aten",
"caffe2/core/nomnigraph/include",
],
visibility = ["//visibility:public"],
Expand Down Expand Up @@ -1385,7 +1338,6 @@ cc_library(
"caffe2/db/create_db_op.cc",
"caffe2/db/protodb.cc",
"caffe2/share/contrib/depthwise/depthwise3x3_conv_op.cc",
":caffe2_contrib_srcs",
":caffe2_core_srcs",
":caffe2_distributed_srcs",
":caffe2_ideep_srcs",
Expand Down Expand Up @@ -1419,7 +1371,6 @@ cc_library(
"@fbgemm//:fbgemm_src_headers",
"@fmt",
"@foxi",
"@gloo",
"@onnx",
] + if_cuda(
[
Expand Down Expand Up @@ -1467,7 +1418,6 @@ cu_library(
"@cuda//:curand",
"@cudnn",
"@eigen",
"@gloo",
"@tensorpipe//:tensorpipe_cuda",
],
alwayslink = True,
Expand Down
8 changes: 4 additions & 4 deletions benchmarks/dynamo/common.py
Expand Up @@ -2478,7 +2478,7 @@ def record_status(accuracy_status, dynamo_start_stats):
if isinstance(e, torch.cuda.OutOfMemoryError)
else "eager_1st_run_fail"
)
log.exception(e)
log.exception("")
return record_status(accuracy_status, dynamo_start_stats=start_stats)
finally:
del model_copy
Expand All @@ -2499,7 +2499,7 @@ def record_status(accuracy_status, dynamo_start_stats):
if isinstance(e, torch.cuda.OutOfMemoryError)
else "eager_2nd_run_fail"
)
log.exception(e)
log.exception("")
return record_status(accuracy_status, dynamo_start_stats=start_stats)
finally:
del model_copy
Expand Down Expand Up @@ -2551,7 +2551,7 @@ def record_status(accuracy_status, dynamo_start_stats):
with maybe_enable_compiled_autograd(self.args.compiled_autograd):
new_result = optimized_model_iter_fn(model_copy, example_inputs)
except Exception as e:
log.exception(e)
log.exception("")
print(
"TorchDynamo optimized model failed to run because of following error"
)
Expand Down Expand Up @@ -2653,7 +2653,7 @@ def check_tolerance(
optimized_model_iter_fn = optimize_ctx(self.run_n_iterations)
new_result = optimized_model_iter_fn(model, example_inputs)
except Exception as e:
log.exception(e)
log.exception("")
print(
"TorchDynamo optimized model failed to run because of following error"
)
Expand Down
2 changes: 1 addition & 1 deletion benchmarks/dynamo/runner.py
Expand Up @@ -1452,7 +1452,7 @@ def update(self):
try:
RegressionTracker(self.args).diff()
except Exception as e:
logging.exception(e)
logging.exception("")
with open(f"{self.args.output_dir}/gh_regression.txt", "w") as gh_fh:
gh_fh.write("")

Expand Down
18 changes: 0 additions & 18 deletions caffe2/CMakeLists.txt
Expand Up @@ -59,23 +59,7 @@ if(INTERN_BUILD_ATEN_OPS)

# Generate the headers wrapped by our operator
file(GLOB_RECURSE torchgen_python "${PROJECT_SOURCE_DIR}/torchgen/*.py")
add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/contrib/aten/aten_op.h
COMMAND
"${PYTHON_EXECUTABLE}" ${CMAKE_CURRENT_SOURCE_DIR}/contrib/aten/gen_op.py
--aten_root=${CMAKE_CURRENT_SOURCE_DIR}/../aten
--template_dir=${CMAKE_CURRENT_SOURCE_DIR}/contrib/aten
--yaml_dir=${CMAKE_BINARY_DIR}/aten/src/ATen
--install_dir=${CMAKE_CURRENT_BINARY_DIR}/contrib/aten
DEPENDS
${torchgen_python}
${CMAKE_BINARY_DIR}/aten/src/ATen/Declarations.yaml
${CMAKE_CURRENT_SOURCE_DIR}/contrib/aten/gen_op.py
${CMAKE_CURRENT_SOURCE_DIR}/contrib/aten/aten_op_template.h)

add_custom_target(__aten_op_header_gen
DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/contrib/aten/aten_op.h)
add_library(aten_op_header_gen INTERFACE)
add_dependencies(aten_op_header_gen __aten_op_header_gen)

# Add source, includes, and libs to lists
list(APPEND Caffe2_CPU_SRCS ${ATen_CPU_SRCS})
Expand Down Expand Up @@ -132,7 +116,6 @@ endif()

# Skip modules that are not used by libtorch mobile yet.
if(BUILD_CAFFE2 AND NOT INTERN_BUILD_MOBILE)
add_subdirectory(contrib)
add_subdirectory(predictor)
add_subdirectory(predictor/emulator)
add_subdirectory(core/nomnigraph)
Expand All @@ -141,7 +124,6 @@ if(BUILD_CAFFE2 AND NOT INTERN_BUILD_MOBILE)
endif()
add_subdirectory(db)
add_subdirectory(distributed)
# add_subdirectory(experiments) # note, we may remove this folder at some point
add_subdirectory(ideep)
add_subdirectory(image)
add_subdirectory(video)
Expand Down
37 changes: 0 additions & 37 deletions caffe2/contrib/CMakeLists.txt

This file was deleted.

Empty file removed caffe2/contrib/__init__.py
Empty file.
12 changes: 0 additions & 12 deletions caffe2/contrib/aten/CMakeLists.txt

This file was deleted.

0 comments on commit 3bb888e

Please sign in to comment.