adjust package name; remove xfail for unexpected pass on "[ONNX] Re-p…

…urpose 'name' field of GraphProto" Previously, the top level GraphProto is hardcoded with name "torch_jit", and the subgraphs "torch_jit_{count}". It does not offer any insight to the graph, but rather encodes the graph producer as jit (torchscript). This is no longer true now that the graph can also be produced from dynamo. As a naive first step, this PR re-purposes the name, to "main_graph", and "sub_graph_{count}" respectively. More delicate processing can be done to name the subgraphs with respect to their parent node or module. This can be done as follow ups. [ghstack-poisoned]
pytorch · Aug 22, 2023 · 51e2e68 · 51e2e68
2 parents db3f4c0 + ca292c4
commit 51e2e68
Show file tree

Hide file tree

Showing 327 changed files with 6,390 additions and 6,075 deletions.
diff --git a/.ci/pytorch/multigpu-test.sh b/.ci/pytorch/multigpu-test.sh
@@ -36,8 +36,9 @@ time python test/run_test.py --verbose -i distributed/test_functional_api
 
 
 # DTensor tests
-time python test/run_test.py --verbose -i distributed/_tensor/test_device_mesh.py
-time python test/run_test.py --verbose -i distributed/_tensor/test_random_ops.py
+time python test/run_test.py --verbose -i distributed/_tensor/test_device_mesh
+time python test/run_test.py --verbose -i distributed/_tensor/test_random_ops
+time python test/run_test.py --verbose -i distributed/_tensor/test_dtensor_compile
 
 # DTensor/TP tests
 time python test/run_test.py --verbose -i distributed/tensor/parallel/test_ddp_2d_parallel

diff --git a/.github/ci_commit_pins/vision.txt b/.github/ci_commit_pins/vision.txt
@@ -1 +1 @@
-a7501e13087ec74af9f52ec155ec1948f6318c90
+2c44ebaeece31b0cc9a7385e406312f741333ab5
diff --git a/.github/ci_commit_pins/xla.txt b/.github/ci_commit_pins/xla.txt
@@ -1 +1 @@
-709c75a04d9b35d83a9509e1534a8aa2046b8912
+c3c16ccac41cb2db6ba88fb31342f4af62c7e15a
diff --git a/.github/scripts/comment_on_pr.py b/.github/scripts/comment_on_pr.py
@@ -24,7 +24,7 @@ def main() -> None:
     job_link = f"[job]({run_url})" if run_url is not None else "job"
     msg = (
         f"The {args.action} {job_link} was canceled. If you believe this is a mistake,"
-        + f"then you can re trigger it through [pytorch-bot]({BOT_COMMANDS_WIKI})."
+        + f" then you can re trigger it through [pytorch-bot]({BOT_COMMANDS_WIKI})."
     )
 
     gh_post_pr_comment(org, project, args.pr_num, msg)

diff --git a/.github/workflows/upload-test-stats.yml b/.github/workflows/upload-test-stats.yml
@@ -85,7 +85,7 @@ jobs:
 
   check-api-rate:
     if: ${{ always() }}
-    runs-on: [self-hosted, linux.2xlarge]
+    runs-on: ubuntu-latest
     continue-on-error: true
     environment: pytorchbot-env
     steps:

diff --git a/.lintrunner.toml b/.lintrunner.toml
@@ -197,6 +197,7 @@ include_patterns = [
     'torch/_inductor/lowering.py',
     'torch/_inductor/metrics.py',
     'torch/_inductor/select_algorithm.py',
+    'torch/_inductor/triton_heuristics.py',
     'torch/_inductor/wrapper_benchmark.py',
     'torch/_inductor/fx_passes/post_grad.py',
     'torch/_inductor/kernel/mm_common.py',
@@ -1214,6 +1215,7 @@ exclude_patterns = [
     'test/fx/test_source_matcher_utils.py',
     'test/fx/test_subgraph_rewriter.py',
     'test/fx/test_z3_gradual_types.py',
+    'test/fx/test_fx_split.py',
     'test/jit/__init__.py',
     'test/jit/_imported_class_test/__init__.py',
     'test/jit/_imported_class_test/bar.py',
@@ -2642,7 +2644,7 @@ command = [
 
 [[linter]]
 code = 'RUFF'
-include_patterns = ['**/*.py']
+include_patterns = ['**/*.py', '**/*.pyi']
 exclude_patterns = [
     'caffe2/**',
     'functorch/docs/**',
@@ -2664,6 +2666,6 @@ init_command = [
     'python3',
     'tools/linter/adapters/pip_init.py',
     '--dry-run={{DRYRUN}}',
-    'ruff==0.0.280',
+    'ruff==0.0.285',
 ]
 is_formatter = true
diff --git a/aten/src/ATen/DLConvertor.cpp b/aten/src/ATen/DLConvertor.cpp
@@ -1,9 +1,6 @@
 #include <ATen/DLConvertor.h>
 #include <ATen/Functions.h>
 
-#include <iostream>
-#include <sstream>
-
 using namespace std;
 namespace at {
 

diff --git a/aten/src/ATen/autocast_mode.cpp b/aten/src/ATen/autocast_mode.cpp
@@ -1,6 +1,5 @@
 #include <ATen/autocast_mode.h>
 
-#include <iostream>
 #include <exception>
 #include <mutex>
 #include <ATen/CachedTensorUtils.h>

diff --git a/aten/src/ATen/core/ATen_pch.h b/aten/src/ATen/core/ATen_pch.h
@@ -24,20 +24,12 @@
 //    Where the threshold can be tweaked until c10 and some of ATen
 //    core are included but TORCH_ASSERT_NO_OPERATORS still passes.
 
-#include <cassert>
-#include <cctype>
 #include <cerrno>
-#include <climits>
-#include <clocale>
 #include <cmath>
 #include <cstddef>
 #include <cstdint>
-#include <cstdio>
 #include <cstdlib>
 #include <cstring>
-#include <ctime>
-#include <cwchar>
-#include <cwctype>
 
 #include <algorithm>
 #include <array>
@@ -46,31 +38,25 @@
 #include <complex>
 #include <deque>
 #include <exception>
-#include <fstream>
 #include <functional>
 #include <initializer_list>
 #include <iomanip>
-#include <ios>
 #include <iosfwd>
-#include <istream>
 #include <iterator>
 #include <limits>
-#include <locale>
+#include <list>
 #include <map>
 #include <memory>
 #include <mutex>
 #include <new>
 #include <numeric>
 #include <ostream>
-#include <ratio>
-#include <set>
 #include <sstream>
 #include <stdexcept>
-#include <streambuf>
 #include <string>
-#include <system_error>
 #include <tuple>
 #include <type_traits>
+#include <typeindex>
 #include <typeinfo>
 #include <unordered_map>
 #include <unordered_set>
@@ -80,51 +66,51 @@
 #include <c10/core/Allocator.h>
 #include <c10/core/AutogradState.h>
 #include <c10/core/Backend.h>
-#include <c10/core/CopyBytes.h>
 #include <c10/core/DefaultDtype.h>
 #include <c10/core/Device.h>
-#include <c10/core/DeviceGuard.h>
 #include <c10/core/DeviceType.h>
 #include <c10/core/DispatchKey.h>
 #include <c10/core/DispatchKeySet.h>
 #include <c10/core/GeneratorImpl.h>
-#include <c10/core/GradMode.h>
 #include <c10/core/InferenceMode.h>
 #include <c10/core/Layout.h>
 #include <c10/core/MemoryFormat.h>
+#include <c10/core/OptionalRef.h>
 #include <c10/core/QScheme.h>
+#include <c10/core/Scalar.h>
 #include <c10/core/ScalarType.h>
 #include <c10/core/ScalarTypeToTypeMeta.h>
 #include <c10/core/Storage.h>
 #include <c10/core/StorageImpl.h>
-#include <c10/core/Stream.h>
+#include <c10/core/SymBool.h>
+#include <c10/core/SymFloat.h>
 #include <c10/core/SymInt.h>
 #include <c10/core/SymIntArrayRef.h>
+#include <c10/core/SymNodeImpl.h>
 #include <c10/core/TensorImpl.h>
 #include <c10/core/TensorOptions.h>
 #include <c10/core/UndefinedTensorImpl.h>
 #include <c10/core/WrapDimMinimal.h>
-#include <c10/core/impl/DeviceGuardImplInterface.h>
-#include <c10/core/impl/InlineDeviceGuard.h>
 #include <c10/core/impl/LocalDispatchKeySet.h>
 #include <c10/core/impl/PyInterpreter.h>
 #include <c10/core/impl/SizesAndStrides.h>
-#include <c10/core/impl/VirtualGuardImpl.h>
 
 #include <c10/macros/Export.h>
 #include <c10/macros/Macros.h>
 
 #include <c10/util/AlignOf.h>
-#include <c10/util/Array.h>
 #include <c10/util/ArrayRef.h>
 #include <c10/util/BFloat16.h>
-#include <c10/util/Backtrace.h>
 #include <c10/util/C++17.h>
 #include <c10/util/ConstexprCrc.h>
 #include <c10/util/Deprecated.h>
+#include <c10/util/DimVector.h>
 #include <c10/util/Exception.h>
 #include <c10/util/ExclusivelyOwned.h>
 #include <c10/util/Flags.h>
+#include <c10/util/Float8_e4m3fn.h>
+#include <c10/util/Float8_e5m2.h>
+#include <c10/util/FunctionRef.h>
 #include <c10/util/Half.h>
 #include <c10/util/IdWrapper.h>
 #include <c10/util/Logging.h>
@@ -143,8 +129,10 @@
 #include <c10/util/TypeTraits.h>
 #include <c10/util/UniqueVoidPtr.h>
 #include <c10/util/accumulate.h>
+#include <c10/util/bit_cast.h>
+#include <c10/util/bits.h>
 #include <c10/util/complex.h>
-#include <c10/util/flat_hash_map.h>
+#include <c10/util/floating_point_utils.h>
 #include <c10/util/in_place.h>
 #include <c10/util/intrusive_ptr.h>
 #include <c10/util/irange.h>
@@ -160,7 +148,10 @@
 #include <c10/util/string_utils.h>
 #include <c10/util/string_view.h>
 #include <c10/util/typeid.h>
+#include <c10/util/variant.h>
 
+#include <ATen/StorageUtils.h>
+#include <ATen/core/ATen_fwd.h>
 #include <ATen/core/DeprecatedTypeProperties.h>
 #include <ATen/core/DeprecatedTypePropertiesRegistry.h>
 #include <ATen/core/DimVector.h>

diff --git a/aten/src/ATen/core/Range.cpp b/aten/src/ATen/core/Range.cpp
@@ -1,6 +1,6 @@
 #include <ATen/core/Range.h>
 
-#include <iostream>
+#include <ostream>
 
 namespace at {
 

diff --git a/aten/src/ATen/core/dispatch/Dispatcher.h b/aten/src/ATen/core/dispatch/Dispatcher.h
@@ -18,6 +18,10 @@
 #include <ATen/core/grad_mode.h>
 #include <ATen/core/enum_tag.h>
 
+#ifndef NDEBUG
+#include <iostream>
+#endif
+
 namespace c10 {
 
 TORCH_API bool show_dispatch_trace();

diff --git a/aten/src/ATen/core/function_schema_inl.h b/aten/src/ATen/core/function_schema_inl.h
@@ -1,5 +1,6 @@
 #pragma once
-#include <iostream>
+#include <ostream>
+#include <sstream>
 
 // note: windows build doesn't find symbols in operator files unless
 // this is a header file

diff --git a/aten/src/ATen/core/interned_strings.cpp b/aten/src/ATen/core/interned_strings.cpp
@@ -4,7 +4,6 @@
 #include <ATen/core/interned_strings.h>
 #include <cstdint>
 #include <cstring>
-#include <iostream>
 #include <mutex>
 #include <sstream>
 #include <string>

diff --git a/aten/src/ATen/core/union_type.cpp b/aten/src/ATen/core/union_type.cpp
@@ -7,7 +7,8 @@
 #include <ATen/core/type_factory.h>
 #include <c10/macros/Macros.h>
 #include <c10/util/irange.h>
-#include <iostream>
+#include <ostream>
+#include <sstream>
 #include <utility>
 
 namespace c10 {

diff --git a/aten/src/ATen/cpu/vec/functional_bfloat16.h b/aten/src/ATen/cpu/vec/functional_bfloat16.h
@@ -404,6 +404,41 @@ inline void map(
   }
 }
 
+template <typename scalar_t, typename Op,
+          typename std::enable_if_t<is_reduced_floating_point_v<scalar_t>, int> = 0>
+inline void map(
+    const Op& vec_fun,
+    scalar_t* output_data,
+    const float* input_data,
+    int64_t size) {
+  using bVec = vec::Vectorized<scalar_t>;
+  using fVec = vec::Vectorized<float>;
+  int64_t d = 0;
+  for (; d < size - (size % bVec::size()); d += bVec::size()) {
+    fVec data_fvec0 = fVec::loadu(input_data + d);
+    fVec data_fvec1 = fVec::loadu(input_data + d + fVec::size());
+    fVec output_fvec0 = vec_fun(data_fvec0);
+    fVec output_fvec1 = vec_fun(data_fvec1);
+    bVec output_bvec = convert_from_float<scalar_t>(output_fvec0, output_fvec1);
+    output_bvec.store(output_data + d);
+  }
+  if (size - d > 0) {
+    fVec data_fvec0, data_fvec1;
+    if (size - d > fVec::size()) {
+      data_fvec0 = fVec::loadu(input_data + d);
+      data_fvec1 = fVec::loadu(input_data + d + fVec::size(), size - d - fVec::size());
+    } else {
+      // choose to align with behaviour of bVec::loadu(ptr, size),
+      // which leaves data_fvec1 uninitialized
+      data_fvec0 = fVec::loadu(input_data + d, size - d);
+    }
+    fVec output_fvec0 = vec_fun(data_fvec0);
+    fVec output_fvec1 = vec_fun(data_fvec1);
+    bVec output_bvec = convert_from_float<scalar_t>(output_fvec0, output_fvec1);
+    output_bvec.store(output_data + d, size - d);
+  }
+}
+
 template <typename scalar_t, typename Op,
           typename std::enable_if_t<is_reduced_floating_point_v<scalar_t>, int> = 0>
 inline void map2(

diff --git a/aten/src/ATen/cpu/vec/vec256/vec256_int.h b/aten/src/ATen/cpu/vec/vec256/vec256_int.h
@@ -7,7 +7,6 @@
 #include <ATen/cpu/vec/vec_base.h>
 #include <c10/macros/Macros.h>
 #include <c10/util/irange.h>
-#include <iostream>
 
 namespace at {
 namespace vec {

diff --git a/aten/src/ATen/cpu/vec/vec256/vec256_qint.h b/aten/src/ATen/cpu/vec/vec256/vec256_qint.h
@@ -14,7 +14,6 @@
 
 #include <array>
 #include <cmath>
-#include <iostream>
 
 // This file defines Vectorized<> for the quantized types.
 //

diff --git a/aten/src/ATen/cpu/vec/vec512/vec512.h b/aten/src/ATen/cpu/vec/vec512/vec512.h
@@ -18,7 +18,7 @@
 #include <cstddef>
 #include <cstdint>
 #include <cstring>
-#include <iostream>
+#include <ostream>
 
 namespace at {
 namespace vec {

diff --git a/aten/src/ATen/cpu/vec/vec_base.h b/aten/src/ATen/cpu/vec/vec_base.h
@@ -21,7 +21,6 @@
 #include <functional>
 #include <cmath>
 #include <type_traits>
-#include <bitset>
 #include <climits>
 
 #include <ATen/cpu/vec/intrinsics.h>

diff --git a/aten/src/ATen/cpu/vec/vec_half.h b/aten/src/ATen/cpu/vec/vec_half.h
diff --git a/aten/src/ATen/functorch/TensorWrapper.cpp b/aten/src/ATen/functorch/TensorWrapper.cpp
@@ -11,6 +11,8 @@
 #include <torch/library.h>
 #include <ATen/core/dispatch/Dispatcher.h>
 
+#include <iostream>
+
 namespace at {
 namespace functorch {