Skip to content

Conversation

@narendasan
Copy link
Collaborator

Description

Adds a number of test cases for the unowned tensor feature for pre allocated tensors including C++ and API tests

@cehongwang Feel free to change names or tests as you see fi t

Fixes # (issue)

Type of change

Please delete options that are not relevant and/or add your own.

  • Test cases

Checklist:

  • My code follows the style guidelines of this project (You can use the linters)
  • I have performed a self-review of my own code
  • I have commented my code, particularly in hard-to-understand areas and hacks
  • I have made corresponding changes to the documentation
  • I have added tests to verify my fix or my feature
  • New and existing unit tests pass locally with my changes
  • I have added the relevant labels to my PR in so that relevant reviewers are notified

@meta-cla meta-cla bot added the cla signed label Jan 6, 2026
@narendasan narendasan requested a review from cehongwang January 6, 2026 00:41
Copy link

@github-actions github-actions bot left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There are some changes that do not conform to Python style guidelines:

--- /home/runner/work/TensorRT/TensorRT/tests/py/dynamo/runtime/test_pre_allocated_outputs.py	2026-01-06 00:40:57.487398+00:00
+++ /home/runner/work/TensorRT/TensorRT/tests/py/dynamo/runtime/test_pre_allocated_outputs.py	2026-01-06 00:41:41.621584+00:00
@@ -163,16 +163,18 @@
            self.assertTrue(output_tensors[0] is new_output_tensors[0])
            # Run to run, output of output engine is reallocated
            self.assertTrue(output_tensors[1] is not new_output_tensors[1])

    @parameterized.expand(
-       [
-           ("python_runtime", True),
-           ("cpp_runtime", False),
-       ]
-    )
-    def test_pre_allocated_outputs_unowned_outputs_api_check(self, _, use_python_runtime):
+        [
+            ("python_runtime", True),
+            ("cpp_runtime", False),
+        ]
+    )
+    def test_pre_allocated_outputs_unowned_outputs_api_check(
+        self, _, use_python_runtime
+    ):
        class SampleModel(torch.nn.Module):
            def forward(self, x):
                return torch.softmax(x * 7 + 2, dim=0)

        model = SampleModel().eval().cuda()
@@ -191,20 +193,36 @@
        )

        with torchtrt.runtime.enable_pre_allocated_outputs(optimized_model):
            _ = optimized_model(inputs[0])
            if use_python_runtime:
-                self.assertTrue(all(seen == expected for seen, expected in zip([
-                    optimized_model._run_on_acc_0.are_output_tensors_unowned(),
-                    optimized_model._run_on_acc_2.are_output_tensors_unowned()
-                ], [False, True])))
+                self.assertTrue(
+                    all(
+                        seen == expected
+                        for seen, expected in zip(
+                            [
+                                optimized_model._run_on_acc_0.are_output_tensors_unowned(),
+                                optimized_model._run_on_acc_2.are_output_tensors_unowned(),
+                            ],
+                            [False, True],
+                        )
+                    )
+                )

            else:
-                self.assertTrue(all(seen == expected for seen, expected in zip([
-                    optimized_model._run_on_acc_0.engine.are_output_tensors_unowned(),
-                    optimized_model._run_on_acc_2.engine.are_output_tensors_unowned()
-                ], [False, True])))
+                self.assertTrue(
+                    all(
+                        seen == expected
+                        for seen, expected in zip(
+                            [
+                                optimized_model._run_on_acc_0.engine.are_output_tensors_unowned(),
+                                optimized_model._run_on_acc_2.engine.are_output_tensors_unowned(),
+                            ],
+                            [False, True],
+                        )
+                    )
+                )

    @parameterized.expand(
        [
            ("python_runtime", True),
            ("cpp_runtime", False),
@@ -256,12 +274,13 @@
            check_dtype=True,
        )

        torch._dynamo.reset()

-
-    def test_pre_allocated_outputs_unowned_outputs_multiple_outputs_py_api_check_no_realloc(self):
+    def test_pre_allocated_outputs_unowned_outputs_multiple_outputs_py_api_check_no_realloc(
+        self,
+    ):
        class SampleModel(torch.nn.Module):
            def forward(self, x):
                y = torch.ops.aten.mul(x, 7)
                z = torch.ops.aten.add(y, 2)
                a = torch.ops.aten.softmax(z, dim=0)
@@ -306,11 +325,13 @@
        [
            ("python_runtime", True),
            ("cpp_runtime", False),
        ]
    )
-    def test_pre_allocated_outputs_unowned_outputs_multiple_outputs_api_check(self, _, use_python_runtime):
+    def test_pre_allocated_outputs_unowned_outputs_multiple_outputs_api_check(
+        self, _, use_python_runtime
+    ):
        class SampleModel(torch.nn.Module):
            def forward(self, x):
                y = torch.ops.aten.mul(x, 7)
                z = torch.ops.aten.add(y, 2)
                a = torch.ops.aten.softmax(z, dim=0)
@@ -332,28 +353,46 @@
        )

        with torchtrt.runtime.enable_pre_allocated_outputs(optimized_model):
            _ = optimized_model(inputs[0])
            if use_python_runtime:
-                self.assertTrue(all(seen == expected for seen, expected in zip([
-                    optimized_model._run_on_acc_0.are_output_tensors_unowned(),
-                    optimized_model._run_on_acc_2.are_output_tensors_unowned()
-                ], [True, True])))
+                self.assertTrue(
+                    all(
+                        seen == expected
+                        for seen, expected in zip(
+                            [
+                                optimized_model._run_on_acc_0.are_output_tensors_unowned(),
+                                optimized_model._run_on_acc_2.are_output_tensors_unowned(),
+                            ],
+                            [True, True],
+                        )
+                    )
+                )

            else:
-                self.assertTrue(all(seen == expected for seen, expected in zip([
-                    optimized_model._run_on_acc_0.engine.are_output_tensors_unowned(),
-                    optimized_model._run_on_acc_2.engine.are_output_tensors_unowned()
-                ], [True, True])))
-
-    @parameterized.expand(
-        [
-            ("python_runtime", True),
-            ("cpp_runtime", False),
-        ]
-    )
-    def test_pre_allocated_outputs_unowned_outputs_multi_outputs(self, _, use_python_runtime):
+                self.assertTrue(
+                    all(
+                        seen == expected
+                        for seen, expected in zip(
+                            [
+                                optimized_model._run_on_acc_0.engine.are_output_tensors_unowned(),
+                                optimized_model._run_on_acc_2.engine.are_output_tensors_unowned(),
+                            ],
+                            [True, True],
+                        )
+                    )
+                )
+
+    @parameterized.expand(
+        [
+            ("python_runtime", True),
+            ("cpp_runtime", False),
+        ]
+    )
+    def test_pre_allocated_outputs_unowned_outputs_multi_outputs(
+        self, _, use_python_runtime
+    ):
        class SampleModel(torch.nn.Module):
            def forward(self, x):
                y = torch.ops.aten.mul(x, 7)
                z = torch.ops.aten.add(y, 2)
                a = torch.ops.aten.softmax(z, dim=0)

Copy link

@github-actions github-actions bot left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There are some changes that do not conform to Python style guidelines:

--- /home/runner/work/TensorRT/TensorRT/tests/py/dynamo/runtime/test_pre_allocated_outputs.py	2026-01-06 00:41:47.593434+00:00
+++ /home/runner/work/TensorRT/TensorRT/tests/py/dynamo/runtime/test_pre_allocated_outputs.py	2026-01-06 00:42:31.114551+00:00
@@ -163,16 +163,18 @@
            self.assertTrue(output_tensors[0] is new_output_tensors[0])
            # Run to run, output of output engine is reallocated
            self.assertTrue(output_tensors[1] is not new_output_tensors[1])

    @parameterized.expand(
-       [
-           ("python_runtime", True),
-           ("cpp_runtime", False),
-       ]
-    )
-    def test_pre_allocated_outputs_unowned_outputs_api_check(self, _, use_python_runtime):
+        [
+            ("python_runtime", True),
+            ("cpp_runtime", False),
+        ]
+    )
+    def test_pre_allocated_outputs_unowned_outputs_api_check(
+        self, _, use_python_runtime
+    ):
        class SampleModel(torch.nn.Module):
            def forward(self, x):
                return torch.softmax(x * 7 + 2, dim=0)

        model = SampleModel().eval().cuda()
@@ -191,20 +193,36 @@
        )

        with torchtrt.runtime.enable_pre_allocated_outputs(optimized_model):
            _ = optimized_model(inputs[0])
            if use_python_runtime:
-                self.assertTrue(all(seen == expected for seen, expected in zip([
-                    optimized_model._run_on_acc_0.are_output_tensors_unowned(),
-                    optimized_model._run_on_acc_2.are_output_tensors_unowned()
-                ], [False, True])))
+                self.assertTrue(
+                    all(
+                        seen == expected
+                        for seen, expected in zip(
+                            [
+                                optimized_model._run_on_acc_0.are_output_tensors_unowned(),
+                                optimized_model._run_on_acc_2.are_output_tensors_unowned(),
+                            ],
+                            [False, True],
+                        )
+                    )
+                )

            else:
-                self.assertTrue(all(seen == expected for seen, expected in zip([
-                    optimized_model._run_on_acc_0.engine.are_output_tensors_unowned(),
-                    optimized_model._run_on_acc_2.engine.are_output_tensors_unowned()
-                ], [False, True])))
+                self.assertTrue(
+                    all(
+                        seen == expected
+                        for seen, expected in zip(
+                            [
+                                optimized_model._run_on_acc_0.engine.are_output_tensors_unowned(),
+                                optimized_model._run_on_acc_2.engine.are_output_tensors_unowned(),
+                            ],
+                            [False, True],
+                        )
+                    )
+                )

    @parameterized.expand(
        [
            ("python_runtime", True),
            ("cpp_runtime", False),
@@ -256,12 +274,13 @@
            check_dtype=True,
        )

        torch._dynamo.reset()

-
-    def test_pre_allocated_outputs_unowned_outputs_multiple_outputs_py_api_check_no_realloc(self):
+    def test_pre_allocated_outputs_unowned_outputs_multiple_outputs_py_api_check_no_realloc(
+        self,
+    ):
        class SampleModel(torch.nn.Module):
            def forward(self, x):
                y = torch.ops.aten.mul(x, 7)
                z = torch.ops.aten.add(y, 2)
                a = torch.ops.aten.softmax(z, dim=0)
@@ -306,11 +325,13 @@
        [
            ("python_runtime", True),
            ("cpp_runtime", False),
        ]
    )
-    def test_pre_allocated_outputs_unowned_outputs_multiple_outputs_api_check(self, _, use_python_runtime):
+    def test_pre_allocated_outputs_unowned_outputs_multiple_outputs_api_check(
+        self, _, use_python_runtime
+    ):
        class SampleModel(torch.nn.Module):
            def forward(self, x):
                y = torch.ops.aten.mul(x, 7)
                z = torch.ops.aten.add(y, 2)
                a = torch.ops.aten.softmax(z, dim=0)
@@ -332,28 +353,46 @@
        )

        with torchtrt.runtime.enable_pre_allocated_outputs(optimized_model):
            _ = optimized_model(inputs[0])
            if use_python_runtime:
-                self.assertTrue(all(seen == expected for seen, expected in zip([
-                    optimized_model._run_on_acc_0.are_output_tensors_unowned(),
-                    optimized_model._run_on_acc_2.are_output_tensors_unowned()
-                ], [True, True])))
+                self.assertTrue(
+                    all(
+                        seen == expected
+                        for seen, expected in zip(
+                            [
+                                optimized_model._run_on_acc_0.are_output_tensors_unowned(),
+                                optimized_model._run_on_acc_2.are_output_tensors_unowned(),
+                            ],
+                            [True, True],
+                        )
+                    )
+                )

            else:
-                self.assertTrue(all(seen == expected for seen, expected in zip([
-                    optimized_model._run_on_acc_0.engine.are_output_tensors_unowned(),
-                    optimized_model._run_on_acc_2.engine.are_output_tensors_unowned()
-                ], [True, True])))
-
-    @parameterized.expand(
-        [
-            ("python_runtime", True),
-            ("cpp_runtime", False),
-        ]
-    )
-    def test_pre_allocated_outputs_unowned_outputs_multi_outputs(self, _, use_python_runtime):
+                self.assertTrue(
+                    all(
+                        seen == expected
+                        for seen, expected in zip(
+                            [
+                                optimized_model._run_on_acc_0.engine.are_output_tensors_unowned(),
+                                optimized_model._run_on_acc_2.engine.are_output_tensors_unowned(),
+                            ],
+                            [True, True],
+                        )
+                    )
+                )
+
+    @parameterized.expand(
+        [
+            ("python_runtime", True),
+            ("cpp_runtime", False),
+        ]
+    )
+    def test_pre_allocated_outputs_unowned_outputs_multi_outputs(
+        self, _, use_python_runtime
+    ):
        class SampleModel(torch.nn.Module):
            def forward(self, x):
                y = torch.ops.aten.mul(x, 7)
                z = torch.ops.aten.add(y, 2)
                a = torch.ops.aten.softmax(z, dim=0)

@cehongwang cehongwang merged commit 0bef55b into graph-break-overhead Jan 6, 2026
7 of 8 checks passed
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

3 participants