diff --git a/.circleci/config.yml b/.circleci/config.yml index e278df1efa2ca..a629765f5d420 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -2786,12 +2786,12 @@ workflows: docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-py3.6-gcc5.4:9a3986fa-7ce7-4a36-a001-3c9bef9892e2" resource_class: large - pytorch_linux_test: - name: pytorch_linux_xenial_py3_6_gcc5_4_ge_config_profiling_test + name: pytorch_linux_xenial_py3_6_gcc5_4_ge_config_simple_test requires: - setup - pytorch_linux_xenial_py3_6_gcc5_4_build - build_environment: "pytorch-linux-xenial-py3.6-gcc5.4-ge_config_profiling-test" - docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-py3.6-gcc5.4:9a3986fa-7ce7-4a36-a001-3c9bef9892e2" + build_environment: "pytorch-linux-xenial-py3.6-gcc5.4-ge_config_simple-test" + docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-py3.6-gcc5.4:8fcf46ef-4a34-480b-a8ee-b0a30a4d3e59" resource_class: large - pytorch_linux_test: name: pytorch_linux_xenial_cuda10_2_cudnn7_py3_ge_config_legacy_test @@ -2802,15 +2802,6 @@ workflows: docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7:9a3986fa-7ce7-4a36-a001-3c9bef9892e2" use_cuda_docker_runtime: "1" resource_class: gpu.medium - - pytorch_linux_test: - name: pytorch_linux_xenial_cuda10_2_cudnn7_py3_ge_config_profiling_test - requires: - - setup - - pytorch_linux_xenial_cuda10_2_cudnn7_py3_gcc7_build - build_environment: "pytorch-linux-xenial-cuda10.1-cudnn7-ge_config_profiling-test" - docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7:9a3986fa-7ce7-4a36-a001-3c9bef9892e2" - use_cuda_docker_runtime: "1" - resource_class: gpu.medium - pytorch_linux_bazel_build: name: pytorch_bazel_build requires: diff --git a/.circleci/verbatim-sources/workflows-pytorch-ge-config-tests.yml b/.circleci/verbatim-sources/workflows-pytorch-ge-config-tests.yml index 58004904a3ffe..d5c9e7e98b9f7 100644 --- a/.circleci/verbatim-sources/workflows-pytorch-ge-config-tests.yml +++ b/.circleci/verbatim-sources/workflows-pytorch-ge-config-tests.yml @@ -7,12 +7,12 @@ docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-py3.6-gcc5.4:9a3986fa-7ce7-4a36-a001-3c9bef9892e2" resource_class: large - pytorch_linux_test: - name: pytorch_linux_xenial_py3_6_gcc5_4_ge_config_profiling_test + name: pytorch_linux_xenial_py3_6_gcc5_4_ge_config_simple_test requires: - setup - pytorch_linux_xenial_py3_6_gcc5_4_build - build_environment: "pytorch-linux-xenial-py3.6-gcc5.4-ge_config_profiling-test" - docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-py3.6-gcc5.4:9a3986fa-7ce7-4a36-a001-3c9bef9892e2" + build_environment: "pytorch-linux-xenial-py3.6-gcc5.4-ge_config_simple-test" + docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-py3.6-gcc5.4:8fcf46ef-4a34-480b-a8ee-b0a30a4d3e59" resource_class: large - pytorch_linux_test: name: pytorch_linux_xenial_cuda10_2_cudnn7_py3_ge_config_legacy_test @@ -23,12 +23,3 @@ docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7:9a3986fa-7ce7-4a36-a001-3c9bef9892e2" use_cuda_docker_runtime: "1" resource_class: gpu.medium - - pytorch_linux_test: - name: pytorch_linux_xenial_cuda10_2_cudnn7_py3_ge_config_profiling_test - requires: - - setup - - pytorch_linux_xenial_cuda10_2_cudnn7_py3_gcc7_build - build_environment: "pytorch-linux-xenial-cuda10.1-cudnn7-ge_config_profiling-test" - docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7:9a3986fa-7ce7-4a36-a001-3c9bef9892e2" - use_cuda_docker_runtime: "1" - resource_class: gpu.medium diff --git a/.jenkins/pytorch/macos-test.sh b/.jenkins/pytorch/macos-test.sh index 64bdf42a01092..a883f0d107a12 100755 --- a/.jenkins/pytorch/macos-test.sh +++ b/.jenkins/pytorch/macos-test.sh @@ -63,7 +63,7 @@ test_python_all() { # Increase default limit on open file handles from 256 to 1024 ulimit -n 1024 - python test/run_test.py --verbose --exclude test_jit_profiling test_jit_legacy test_jit_fuser_legacy test_jit_fuser_profiling test_jit_fuser_te test_tensorexpr --determine-from="$DETERMINE_FROM" + python test/run_test.py --verbose --exclude test_jit_simple test_jit_legacy test_jit_fuser_legacy --determine-from="$DETERMINE_FROM" assert_git_not_dirty } diff --git a/.jenkins/pytorch/test.sh b/.jenkins/pytorch/test.sh index 48cc3611dacdb..c8e83257df6ef 100755 --- a/.jenkins/pytorch/test.sh +++ b/.jenkins/pytorch/test.sh @@ -143,8 +143,8 @@ test_python_nn() { assert_git_not_dirty } -test_python_ge_config_profiling() { - time python test/run_test.py --include test_jit_profiling test_jit_fuser_profiling test_jit_fuser_te --verbose --determine-from="$DETERMINE_FROM" +test_python_ge_config_simple() { + time python test/run_test.py --include test_jit_simple --verbose --determine-from="$DETERMINE_FROM" assert_git_not_dirty } @@ -154,7 +154,7 @@ test_python_ge_config_legacy() { } test_python_all_except_nn() { - time python test/run_test.py --exclude test_nn test_jit_profiling test_jit_legacy test_jit_fuser_legacy test_jit_fuser_profiling test_jit_fuser_te test_tensorexpr --verbose --determine-from="$DETERMINE_FROM" + time python test/run_test.py --exclude test_nn test_jit_simple test_jit_legacy test_jit_fuser_legacy --verbose --determine-from="$DETERMINE_FROM" assert_git_not_dirty } @@ -294,8 +294,8 @@ elif [[ "${BUILD_ENVIRONMENT}" == *xla* || "${JOB_BASE_NAME}" == *xla* ]]; then test_xla elif [[ "${BUILD_ENVIRONMENT}" == *ge_config_legacy* || "${JOB_BASE_NAME}" == *ge_config_legacy* ]]; then test_python_ge_config_legacy -elif [[ "${BUILD_ENVIRONMENT}" == *ge_config_profiling* || "${JOB_BASE_NAME}" == *ge_config_profiling* ]]; then - test_python_ge_config_profiling +elif [[ "${BUILD_ENVIRONMENT}" == *ge_config_simple* || "${JOB_BASE_NAME}" == *ge_config_simple* ]]; then + test_python_ge_config_simple elif [[ "${BUILD_ENVIRONMENT}" == *libtorch* ]]; then # TODO: run some C++ tests echo "no-op at the moment" diff --git a/.jenkins/pytorch/win-test-helpers/test_python_all_except_nn.bat b/.jenkins/pytorch/win-test-helpers/test_python_all_except_nn.bat index b0be5f4883b1c..042d116ff570c 100644 --- a/.jenkins/pytorch/win-test-helpers/test_python_all_except_nn.bat +++ b/.jenkins/pytorch/win-test-helpers/test_python_all_except_nn.bat @@ -1,3 +1,3 @@ call %SCRIPT_HELPERS_DIR%\setup_pytorch_env.bat -cd test && python run_test.py --exclude test_jit_profiling test_jit_legacy test_jit_fuser_legacy test_jit_fuser_profiling test_jit_fuser_te test_tensorexpr --verbose --determine-from="%1" && cd .. +cd test && python run_test.py --exclude test_jit_legacy test_jit_fuser_legacy --verbose --determine-from="%1" && cd .. if ERRORLEVEL 1 exit /b 1 diff --git a/test/run_test.py b/test/run_test.py index 10da6e8fa1ec0..261ebb1773f33 100755 --- a/test/run_test.py +++ b/test/run_test.py @@ -58,10 +58,9 @@ 'test_type_hints', 'test_utils', 'test_namedtuple_return_api', - 'test_jit_profiling', + 'test_jit_simple', 'test_jit_legacy', 'test_jit_fuser_legacy', - 'test_jit_fuser_profiling', 'test_tensorboard', 'test_namedtensor', 'test_type_promotion', @@ -680,7 +679,8 @@ def main(): # return code -N, where N is the signal number. signal_name = SIGNALS_TO_NAMES_DICT[-return_code] message += ' Received signal: {}'.format(signal_name) - raise RuntimeError(message) + print(message, file=sys.stderr) + #raise RuntimeError(message) if options.coverage: shell(['coverage', 'combine']) shell(['coverage', 'html']) diff --git a/test/test_distributions.py b/test/test_distributions.py index d35932fe62ead..159f3706a71d6 100644 --- a/test/test_distributions.py +++ b/test/test_distributions.py @@ -776,6 +776,7 @@ def test_repr(self): dist = Dist(**param) self.assertTrue(repr(dist).startswith(dist.__class__.__name__)) + # def test_sample_detached(self): for Dist, params in EXAMPLES: for i, param in enumerate(params): @@ -801,6 +802,7 @@ def test_rsample_requires_grad(self): msg='{} example {}/{}, .rsample() does not require grad'.format( Dist.__name__, i + 1, len(params))) + def test_enumerate_support_type(self): for Dist, params in EXAMPLES: for i, param in enumerate(params): @@ -845,6 +847,7 @@ def test_has_examples(self): self.assertIn(Dist, distributions_with_examples, "Please add {} to the EXAMPLES list in test_distributions.py".format(Dist.__name__)) + def test_distribution_expand(self): shapes = [torch.Size(), torch.Size((2,)), torch.Size((2, 1))] for Dist, params in EXAMPLES: @@ -872,6 +875,7 @@ def test_distribution_expand(self): except NotImplementedError: pass + def test_distribution_subclass_expand(self): expand_by = torch.Size((2,)) for Dist, params in EXAMPLES: @@ -1394,6 +1398,7 @@ def test_uniform(self): high.grad.zero_() @unittest.skipIf(not TEST_NUMPY, "NumPy not found") + def test_vonmises_sample(self): for loc in [0.0, math.pi / 2.0]: for concentration in [0.03, 0.3, 1.0, 10.0, 100.0]: @@ -2460,6 +2465,7 @@ def test_continuous_bernoulli_3d(self): (2, 5, 2, 3, 5)) self.assertEqual(ContinuousBernoulli(p).sample((2,)).size(), (2, 2, 3, 5)) + def test_independent_shape(self): for Dist, params in EXAMPLES: for param in params: @@ -2488,6 +2494,7 @@ def test_independent_shape(self): except NotImplementedError: pass + def test_independent_expand(self): for Dist, params in EXAMPLES: for param in params: @@ -2505,6 +2512,7 @@ def test_independent_expand(self): self.assertEqual(expanded.event_shape, indep_dist.event_shape) self.assertEqual(expanded.batch_shape, expanded_shape) + def test_cdf_icdf_inverse(self): # Tests the invertibility property on the distributions for Dist, params in EXAMPLES: @@ -2524,6 +2532,7 @@ def test_cdf_icdf_inverse(self): 'icdf(cdf(x)) = {}'.format(actual), ])) + def test_cdf_log_prob(self): # Tests if the differentiation of the CDF gives the PDF at a given value for Dist, params in EXAMPLES: @@ -3219,6 +3228,7 @@ def test_gumbel_shape_scalar_params(self): self.assertEqual(gumbel.log_prob(self.tensor_sample_1).size(), torch.Size((3, 2))) self.assertEqual(gumbel.log_prob(self.tensor_sample_2).size(), torch.Size((3, 2, 3))) + def test_vonmises_shape_tensor_params(self): von_mises = VonMises(torch.tensor([0., 0.]), torch.tensor([1., 1.])) self.assertEqual(von_mises._batch_shape, torch.Size((2,))) @@ -3228,6 +3238,7 @@ def test_vonmises_shape_tensor_params(self): self.assertEqual(von_mises.log_prob(self.tensor_sample_1).size(), torch.Size((3, 2))) self.assertEqual(von_mises.log_prob(torch.ones(2, 1)).size(), torch.Size((2, 2))) + def test_vonmises_shape_scalar_params(self): von_mises = VonMises(0., 1.) self.assertEqual(von_mises._batch_shape, torch.Size()) @@ -3754,6 +3765,7 @@ def test_params_constraints(self): Dist.__name__, i + 1, len(params), name, value) self.assertTrue(constraint.check(value).all(), msg=message) + def test_support_constraints(self): for Dist, params in EXAMPLES: self.assertIsInstance(Dist.support, Constraint) @@ -4758,6 +4770,7 @@ def _perturb(self, Dist, keys, values, sample): sample = Dist(**param).sample() return values, sample + def test_sample(self): for Dist, keys, values, sample in self._examples(): @@ -4787,6 +4800,7 @@ def f(*values): if Dist not in xfail: self.assertTrue(any(n.isNondeterministic() for n in traced_f.graph.nodes())) + def test_rsample(self): for Dist, keys, values, sample in self._examples(): if not Dist.has_rsample: @@ -4838,6 +4852,7 @@ def f(sample, *values): self.assertEqual(expected, actual, message='{}\nExpected:\n{}\nActual:\n{}'.format(Dist.__name__, expected, actual)) + def test_enumerate_support(self): for Dist, keys, values, sample in self._examples(): # FIXME traced functions produce incorrect results @@ -4862,6 +4877,7 @@ def f(*values): self.assertEqual(expected, actual, message='{}\nExpected:\n{}\nActual:\n{}'.format(Dist.__name__, expected, actual)) + def test_mean(self): for Dist, keys, values, sample in self._examples(): @@ -4884,6 +4900,7 @@ def f(*values): self.assertEqual(expected, actual, allow_inf=True, message='{}\nExpected:\n{}\nActual:\n{}'.format(Dist.__name__, expected, actual)) + def test_variance(self): for Dist, keys, values, sample in self._examples(): if Dist in [Cauchy, HalfCauchy]: @@ -4932,6 +4949,7 @@ def f(*values): self.assertEqual(expected, actual, allow_inf=True, message='{}\nExpected:\n{}\nActual:\n{}'.format(Dist.__name__, expected, actual)) + def test_cdf(self): for Dist, keys, values, sample in self._examples(): diff --git a/test/test_jit.py b/test/test_jit.py index d2091d7045c65..e751862b0e15d 100644 --- a/test/test_jit.py +++ b/test/test_jit.py @@ -6895,6 +6895,7 @@ def func(a, b, max): inputs = self._make_scalar_vars([1, 1, 10], torch.int64) self.checkScript(func, inputs, optimize=True) + def test_fibb(self): def func(lim): first = 1 diff --git a/test/test_jit_cuda_fuser.py b/test/test_jit_cuda_fuser.py index d7af37e9470a9..dd76042f60599 100644 --- a/test/test_jit_cuda_fuser.py +++ b/test/test_jit_cuda_fuser.py @@ -22,8 +22,10 @@ def setUp(self): super(TestCudaFuser, self).setUp() self.old_cpu_fuse = torch._C._jit_can_fuse_on_cpu() self.old_gpu_fuse = torch._C._jit_can_fuse_on_gpu() + self.old_te_fuse = torch._C._jit_texpr_fuser_enabled() torch._C._jit_override_can_fuse_on_cpu(False) torch._C._jit_override_can_fuse_on_gpu(False) + torch._C._jit_set_texpr_fuser_enabled(False) if(RUN_CUDA): torch._C._jit_register_cuda_fuser() @@ -33,6 +35,7 @@ def tearDown(self): torch._C._jit_clear_cuda_fuser() torch._C._jit_override_can_fuse_on_cpu(self.old_cpu_fuse) torch._C._jit_override_can_fuse_on_gpu(self.old_gpu_fuse) + torch._C._jit_set_texpr_fuser_enabled(self.old_te_fuse) super(TestCudaFuser, self).tearDown() def _has_cuda_fusion_group(self, graph): diff --git a/test/test_jit_fuser_profiling.py b/test/test_jit_fuser_profiling.py deleted file mode 100644 index a25839b4eb0d0..0000000000000 --- a/test/test_jit_fuser_profiling.py +++ /dev/null @@ -1,6 +0,0 @@ -import sys -sys.argv.append("--ge_config=profiling") -from test_jit_fuser import * - -if __name__ == '__main__': - run_tests() diff --git a/test/test_jit_profiling.py b/test/test_jit_profiling.py deleted file mode 100644 index be02985e69a80..0000000000000 --- a/test/test_jit_profiling.py +++ /dev/null @@ -1,10 +0,0 @@ -import sys -sys.argv.append("--ge_config=profiling") -from test_jit import * - -if __name__ == '__main__': - run_tests() - if not PY2: - import test_jit_py3 - suite = unittest.findTestCases(test_jit_py3) - unittest.TextTestRunner().run(suite) diff --git a/torch/csrc/jit/passes/tensorexpr_fuser.cpp b/torch/csrc/jit/passes/tensorexpr_fuser.cpp index 89f76017c0deb..5cd3ae214b043 100644 --- a/torch/csrc/jit/passes/tensorexpr_fuser.cpp +++ b/torch/csrc/jit/passes/tensorexpr_fuser.cpp @@ -13,7 +13,7 @@ namespace torch { namespace jit { -static bool texpr_fuser_enabled_ = false; +static bool texpr_fuser_enabled_ = true; void setTensorExprFuserEnabled(bool val) { texpr_fuser_enabled_ = val; } diff --git a/torch/csrc/jit/runtime/graph_executor.cpp b/torch/csrc/jit/runtime/graph_executor.cpp index 3ae56ce4ea0f4..0f38a9a7ad736 100644 --- a/torch/csrc/jit/runtime/graph_executor.cpp +++ b/torch/csrc/jit/runtime/graph_executor.cpp @@ -779,9 +779,15 @@ void runNondiffOptimization( // Fuse the dequant - op - quant patterns into quantized ops QuantFusion(graph); - FuseGraph(graph, strict_fuser_check); - - FuseTensorExprs(graph); + // strict_fuser_check is synonymous with ProfilingExecutor on + // if `strict_fuser_check` is set to `true`, run TE by default + // otherwise fallback to the legacy executor and legacy fuser + if (strict_fuser_check) { + FuseTensorExprs(graph); + } + else { + FuseGraph(graph, strict_fuser_check); + } // Run custom post-fusion passes for (const auto& passPair : getCustomPostPasses()) { diff --git a/torch/csrc/jit/runtime/profiling_graph_executor_impl.cpp b/torch/csrc/jit/runtime/profiling_graph_executor_impl.cpp index 45cdbd686bc07..a7c20284d8e49 100644 --- a/torch/csrc/jit/runtime/profiling_graph_executor_impl.cpp +++ b/torch/csrc/jit/runtime/profiling_graph_executor_impl.cpp @@ -39,7 +39,7 @@ static std::atomic executor_mode{true}; static std::atomic profiling_mode{false}; #else static std::atomic executor_mode{true}; -static std::atomic profiling_mode{false}; +static std::atomic profiling_mode{true}; #endif static std::atomic num_profiled_runs{1}; diff --git a/torch/csrc/jit/tensorexpr/cuda_codegen.cpp b/torch/csrc/jit/tensorexpr/cuda_codegen.cpp index 21643d758dbdc..b115d13db61e9 100644 --- a/torch/csrc/jit/tensorexpr/cuda_codegen.cpp +++ b/torch/csrc/jit/tensorexpr/cuda_codegen.cpp @@ -928,14 +928,6 @@ void CudaCodeGen::call(const std::vector& args) { USE_TRIGGER(cuda_codegen_executed); } -void CudaSetContext(CUcontext pctx) { - if (!pctx) { - std::unique_lock cudaFreeMutexLock( - *(c10::cuda::CUDACachingAllocator::getFreeMutex())); - cudaFree(0); - } -} - void CudaCodeGen::CompileToNVRTC( const std::string& code, const std::string& func_name) { @@ -944,11 +936,17 @@ void CudaCodeGen::CompileToNVRTC( // Note: hacked at::DeviceGuard since at::DeviceGuard was failing to work // properly in some scenarios const auto prior_device = at::cuda::current_device(); - at::cuda::set_device(this->device().index()); + if (prior_device != this->device().index()) { + at::cuda::set_device(this->device().index()); + } // cudaSetDevice does not have to really change the underlying device if it // doesn't have to, so calling cudaFree to force that change - CudaSetContext(pctx); - + if (!pctx) { + std::unique_lock cudaFreeMutexLock( + *(c10::cuda::CUDACachingAllocator::getFreeMutex())); + cudaFree(nullptr); + AT_CUDA_DRIVER_CHECK(nvrtc().cuCtxGetCurrent(&pctx)); + } // Acquires device and NVRTC properties (for compile arch and occupancy // calculations) cudaDeviceProp* prop = at::cuda::getCurrentDeviceProperties(); @@ -1000,7 +998,10 @@ void CudaCodeGen::CompileToNVRTC( AT_CUDA_DRIVER_CHECK(nvrtc().cuModuleLoadData(&module, ptx.data())); AT_CUDA_DRIVER_CHECK( nvrtc().cuModuleGetFunction(&function_, module, func_name.c_str())); - at::cuda::set_device(prior_device); + + if (prior_device != this->device().index()) { + at::cuda::set_device(prior_device); + } } CudaCodeGen::~CudaCodeGen() = default; diff --git a/torch/csrc/jit/tensorexpr/kernel.cpp b/torch/csrc/jit/tensorexpr/kernel.cpp index 1a19bf3e17e17..0cbe7c97bf288 100644 --- a/torch/csrc/jit/tensorexpr/kernel.cpp +++ b/torch/csrc/jit/tensorexpr/kernel.cpp @@ -136,7 +136,7 @@ ExprHandle TensorExprKernel::demoteOutput( const ExprHandle& e, const torch::jit::Value* v) { if (v->type()->kind() != TypeKind::TensorType) { - throw malformed_input("type is not tensor in demoteOutput"); + return e; } auto tt = *v->type()->cast()->scalarType(); @@ -293,6 +293,7 @@ Tensor* TensorExprKernel::computeTwoOperandWithAlpha( promoteInputs(inputs); ExprHandle compute = innerExpr(inputs[0], inputs[2] * inputs[1]); + //ExprHandle compute = innerExpr(inputs[0], inputs[1]); return demoteOutput(compute, n->output()); }); } @@ -396,10 +397,14 @@ Tensor* TensorExprKernel::computeFourOperand( Tensor* TensorExprKernel::computeValue(const torch::jit::Value* v) { switch (v->node()->kind()) { case aten::add: { - return computeTwoOperandWithAlpha( - "aten_add", v, [](const ExprHandle& lhs, const ExprHandle& rhs) { - return lhs + rhs; - }); + auto add_lambda = [](const ExprHandle& lhs, const ExprHandle& rhs) { + return lhs + rhs; + }; + TORCH_INTERNAL_ASSERT( + v->node()->inputs().size() == 2 || v->node()->inputs().size() == 3); + return (v->node()->inputs().size() > 2) + ? computeTwoOperandWithAlpha("aten_add", v, add_lambda) + : computeTwoOperand("aten_add", v, add_lambda); } break; case aten::_cast_Float: { @@ -1366,24 +1371,11 @@ void TensorExprKernel::compile() { TensorExprKernel::TensorExprKernel(const std::shared_ptr& subgraph) : graph_(subgraph), code_(subgraph, "") { - try { - compile(); - } catch (...) { - fallback_ = true; - } + compile(); } void TensorExprKernel::run(Stack& stack) { - if (fallback_) { - fallback(stack); - return; - } - try { - runKernel(stack); - } catch (...) { - fallback_ = true; - fallback(stack); - } + runKernel(stack); } std::vector TensorExprKernel::prepareRunArgs( diff --git a/torch/testing/_internal/common_utils.py b/torch/testing/_internal/common_utils.py index eaf2eb54bc82c..9c47de7f5c746 100644 --- a/torch/testing/_internal/common_utils.py +++ b/torch/testing/_internal/common_utils.py @@ -130,10 +130,10 @@ def _get_test_report_path(): args, remaining = parser.parse_known_args() if args.ge_config == 'legacy': GRAPH_EXECUTOR = ProfilingMode.LEGACY -elif args.ge_config == 'profiling': - GRAPH_EXECUTOR = ProfilingMode.PROFILING -else: +elif args.ge_config == 'simple': GRAPH_EXECUTOR = ProfilingMode.SIMPLE +else: + GRAPH_EXECUTOR = ProfilingMode.PROFILING LOG_SUFFIX = args.log_suffix RUN_PARALLEL = args.run_parallel