From b404ae5df86314497a6d2c267cd0a5b490a091ac Mon Sep 17 00:00:00 2001
From: Simon Fan <xmfan@meta.com>
Date: Mon, 13 May 2024 21:46:15 -0700
Subject: [PATCH 1/2] [compiled autograd] Fix flaky tests

[ghstack-poisoned]
---
 test/inductor/test_compiled_autograd.py   | 59 +++++++++++++++++++++++
 torch/_dynamo/compiled_autograd.py        |  7 +++
 torch/testing/_internal/logging_tensor.py |  4 +-
 3 files changed, 68 insertions(+), 2 deletions(-)

diff --git a/test/inductor/test_compiled_autograd.py b/test/inductor/test_compiled_autograd.py
index 074d075fc848c..7379df6b8a802 100644
--- a/test/inductor/test_compiled_autograd.py
+++ b/test/inductor/test_compiled_autograd.py
@@ -51,6 +51,14 @@ def hook3(gI, gO):
 
 
 class TestCompiledAutograd(TestCase):
+    def setUp(self):
+        super().setUp()
+        compiled_autograd.reset()
+
+    def tearDown(self):
+        super().tearDown()
+        compiled_autograd.reset()
+
     def check_output_and_recompiles(
         self, fn, count=1, compiler_fn=compiler_fn, compile_fn=False
     ):
@@ -322,6 +330,9 @@ def bytecode_hook(code, out_code):
             handle.remove()
 
     def test_inputs_aliasing_bytecode_stack_restore(self):
+        import logging
+
+        logging.getLogger().setLevel(logging.WARNING)
         from torch.testing._internal.logging_tensor import LoggingTensor
 
         # Create a graph that allows inputs stealing
@@ -752,6 +763,54 @@ def backward(ctx, gO_1, gO_2, gO_3):
 
         self.check_output_and_recompiles(fn, count=2)
 
+    @unittest.skipIf(not HAS_CUDA, "requires cuda")
+    def test_logging_tensor_flaky(self):
+        # when you first run some test using triton and then run test_inputs_aliasing_bytecode_stack_restore
+        # resulting in:
+        #   - pytest: `TypeError: unsupported operand type(s) for +: 'Tensor' and 'LoggingTensor'`
+        #   - python: `TypeError: not all arguments converted during string formatting`
+
+        # 1. some triton involving test
+        def fn():
+            def _fn(x):
+                return x
+
+            x = torch.arange(
+                1, 10, requires_grad=True, dtype=torch.float16, device="cuda"
+            )
+            out = _fn(x)
+            loss = out.sum()
+            loss.backward()
+
+        with compiled_autograd.enable(compiler_fn):
+            fn()
+
+        import logging
+
+        logging.getLogger().setLevel(
+            logging.WARNING
+        )  # triton setup overwrote it to INFO
+        # 2. test_inputs_aliasing_bytecode_stack_restore
+        from torch.testing._internal.logging_tensor import LoggingTensor
+
+        def forward(inputs):
+            add = inputs[0] + 1
+            add_1 = add + inputs[1]
+            out = add_1.cpu()
+            return (out,)
+
+        gm = torch.fx.symbolic_trace(forward)
+        print(gm.print_readable())
+        torch._dynamo.utils.set_locals_to_steal(gm, ["inputs"])
+        compiled_fn = torch.compile(gm)
+
+        inputs = [
+            torch.ones(1000000, dtype=torch.float32),
+            LoggingTensor(torch.ones(1)),
+        ]
+
+        compiled_fn(inputs)
+
     @unittest.skipIf(not HAS_CUDA, "requires cuda")
     def test_custom_fn_output_metadata(self):
         def my_compiler_fn(gm):
diff --git a/torch/_dynamo/compiled_autograd.py b/torch/_dynamo/compiled_autograd.py
index e8e61042d4746..f630547ead73b 100644
--- a/torch/_dynamo/compiled_autograd.py
+++ b/torch/_dynamo/compiled_autograd.py
@@ -319,3 +319,10 @@ def disable():
         if prior:
             compiled_autograd_enabled = True
         torch._C._dynamo.compiled_autograd.set_autograd_compiler(prior)
+
+
+# return to starting state of a new process
+def reset():
+    compiled_autograd_enable = False
+    assert compiled_autograd_enabled_count == 0
+    torch._C._dynamo.compiled_autograd.set_autograd_compiler(None)
diff --git a/torch/testing/_internal/logging_tensor.py b/torch/testing/_internal/logging_tensor.py
index 5ddd537474404..840f5bd2709bb 100644
--- a/torch/testing/_internal/logging_tensor.py
+++ b/torch/testing/_internal/logging_tensor.py
@@ -11,6 +11,7 @@
 import functools
 from torch._C._profiler import gather_traceback, symbolize_tracebacks
 
+logger = logging.getLogger("LoggingTensor")
 
 _dtype_abbrs = {
     torch.bfloat16: "bf16",
@@ -136,7 +137,7 @@ def emit(self, record):
             self.tracebacks_list.append(record.traceback)
 
 def log_input(name: str, var: object):
-    logging.getLogger("LoggingTensor").info("input", (name,), {}, var)  # noqa: PLE1205
+    logger.info("input", (name,), {}, var)  # noqa: PLE1205
 
 class GatherTraceback(logging.Filter):
     def __init__(self, python=True, script=True, cpp=False):
@@ -151,7 +152,6 @@ def filter(self, record):
 @contextlib.contextmanager
 def capture_logs(is_mode=False, python_tb=False, script_tb=False, cpp_tb=False) -> Iterator[List[str]]:
     collect_traceback = python_tb or script_tb or cpp_tb
-    logger = logging.getLogger("LoggingTensor")
     log_list: List[str] = []
     tracebacks_list: List[str] = []
     handler = LoggingTensorHandler(

From de06f1652d5c7bc197e5eba84243302051ae727c Mon Sep 17 00:00:00 2001
From: Simon Fan <xmfan@meta.com>
Date: Tue, 14 May 2024 08:30:57 -0700
Subject: [PATCH 2/2] Update on "[compiled autograd] Fix LoggingTensor flaky
 test"

LoggingTensor fails consistently when root logger level is INFO or lower
By default, root logger should be WARNING
But, triton driver initialization will overwrite root logger to INFO, which causes flakiness: https://github.com/pytorch/pytorch/issues/126143


cc voznesenskym penguinwu EikanWang jgong5 Guobing-Chen XiaobingSuper zhuhaozhe blzheng wenzhe-nrv jiayisunx peterbell10 ipiszy yf225 chenyang78 kadeng muchulee8 ColinPeppler amjames desertfire chauhang

[ghstack-poisoned]
---
 test/inductor/test_compiled_autograd.py   | 11 ++++-------
 torch/_dynamo/compiled_autograd.py        |  2 +-
 torch/testing/_internal/logging_tensor.py |  2 +-
 3 files changed, 6 insertions(+), 9 deletions(-)

diff --git a/test/inductor/test_compiled_autograd.py b/test/inductor/test_compiled_autograd.py
index 7379df6b8a802..201dd4a3c77d0 100644
--- a/test/inductor/test_compiled_autograd.py
+++ b/test/inductor/test_compiled_autograd.py
@@ -1,5 +1,6 @@
 # Owner(s): ["module: inductor"]
 import functools
+import logging
 import re
 import sys
 import unittest
@@ -51,11 +52,11 @@ def hook3(gI, gO):
 
 
 class TestCompiledAutograd(TestCase):
-    def setUp(self):
+    def setUp(self) -> None:
         super().setUp()
         compiled_autograd.reset()
 
-    def tearDown(self):
+    def tearDown(self) -> None:
         super().tearDown()
         compiled_autograd.reset()
 
@@ -330,8 +331,6 @@ def bytecode_hook(code, out_code):
             handle.remove()
 
     def test_inputs_aliasing_bytecode_stack_restore(self):
-        import logging
-
         logging.getLogger().setLevel(logging.WARNING)
         from torch.testing._internal.logging_tensor import LoggingTensor
 
@@ -764,7 +763,7 @@ def backward(ctx, gO_1, gO_2, gO_3):
         self.check_output_and_recompiles(fn, count=2)
 
     @unittest.skipIf(not HAS_CUDA, "requires cuda")
-    def test_logging_tensor_flaky(self):
+    def test_logging_tensor_flaky(self) -> None:
         # when you first run some test using triton and then run test_inputs_aliasing_bytecode_stack_restore
         # resulting in:
         #   - pytest: `TypeError: unsupported operand type(s) for +: 'Tensor' and 'LoggingTensor'`
@@ -785,8 +784,6 @@ def _fn(x):
         with compiled_autograd.enable(compiler_fn):
             fn()
 
-        import logging
-
         logging.getLogger().setLevel(
             logging.WARNING
         )  # triton setup overwrote it to INFO
diff --git a/torch/_dynamo/compiled_autograd.py b/torch/_dynamo/compiled_autograd.py
index f630547ead73b..386d0b4dd4ae0 100644
--- a/torch/_dynamo/compiled_autograd.py
+++ b/torch/_dynamo/compiled_autograd.py
@@ -322,7 +322,7 @@ def disable():
 
 
 # return to starting state of a new process
-def reset():
+def reset() -> None:
     compiled_autograd_enable = False
     assert compiled_autograd_enabled_count == 0
     torch._C._dynamo.compiled_autograd.set_autograd_compiler(None)
diff --git a/torch/testing/_internal/logging_tensor.py b/torch/testing/_internal/logging_tensor.py
index 840f5bd2709bb..8b7faf45b3c3c 100644
--- a/torch/testing/_internal/logging_tensor.py
+++ b/torch/testing/_internal/logging_tensor.py
@@ -136,7 +136,7 @@ def emit(self, record):
         if self.tracebacks_list is not None:
             self.tracebacks_list.append(record.traceback)
 
-def log_input(name: str, var: object):
+def log_input(name: str, var: object) -> None:
     logger.info("input", (name,), {}, var)  # noqa: PLE1205
 
 class GatherTraceback(logging.Filter):