tests: Adding additional test cases for the unowned tensor feature #3993

narendasan · 2026-01-06T00:40:43Z

Description

Adds a number of test cases for the unowned tensor feature for pre allocated tensors including C++ and API tests

@cehongwang Feel free to change names or tests as you see fi t

Fixes # (issue)

Type of change

Please delete options that are not relevant and/or add your own.

Test cases

Checklist:

My code follows the style guidelines of this project (You can use the linters)
I have performed a self-review of my own code
I have commented my code, particularly in hard-to-understand areas and hacks
I have made corresponding changes to the documentation
I have added tests to verify my fix or my feature
New and existing unit tests pass locally with my changes
I have added the relevant labels to my PR in so that relevant reviewers are notified

github-actions

There are some changes that do not conform to Python style guidelines:

--- /home/runner/work/TensorRT/TensorRT/tests/py/dynamo/runtime/test_pre_allocated_outputs.py	2026-01-06 00:40:57.487398+00:00
+++ /home/runner/work/TensorRT/TensorRT/tests/py/dynamo/runtime/test_pre_allocated_outputs.py	2026-01-06 00:41:41.621584+00:00
@@ -163,16 +163,18 @@
            self.assertTrue(output_tensors[0] is new_output_tensors[0])
            # Run to run, output of output engine is reallocated
            self.assertTrue(output_tensors[1] is not new_output_tensors[1])

    @parameterized.expand(
-       [
-           ("python_runtime", True),
-           ("cpp_runtime", False),
-       ]
-    )
-    def test_pre_allocated_outputs_unowned_outputs_api_check(self, _, use_python_runtime):
+        [
+            ("python_runtime", True),
+            ("cpp_runtime", False),
+        ]
+    )
+    def test_pre_allocated_outputs_unowned_outputs_api_check(
+        self, _, use_python_runtime
+    ):
        class SampleModel(torch.nn.Module):
            def forward(self, x):
                return torch.softmax(x * 7 + 2, dim=0)

        model = SampleModel().eval().cuda()
@@ -191,20 +193,36 @@
        )

        with torchtrt.runtime.enable_pre_allocated_outputs(optimized_model):
            _ = optimized_model(inputs[0])
            if use_python_runtime:
-                self.assertTrue(all(seen == expected for seen, expected in zip([
-                    optimized_model._run_on_acc_0.are_output_tensors_unowned(),
-                    optimized_model._run_on_acc_2.are_output_tensors_unowned()
-                ], [False, True])))
+                self.assertTrue(
+                    all(
+                        seen == expected
+                        for seen, expected in zip(
+                            [
+                                optimized_model._run_on_acc_0.are_output_tensors_unowned(),
+                                optimized_model._run_on_acc_2.are_output_tensors_unowned(),
+                            ],
+                            [False, True],
+                        )
+                    )
+                )

            else:
-                self.assertTrue(all(seen == expected for seen, expected in zip([
-                    optimized_model._run_on_acc_0.engine.are_output_tensors_unowned(),
-                    optimized_model._run_on_acc_2.engine.are_output_tensors_unowned()
-                ], [False, True])))
+                self.assertTrue(
+                    all(
+                        seen == expected
+                        for seen, expected in zip(
+                            [
+                                optimized_model._run_on_acc_0.engine.are_output_tensors_unowned(),
+                                optimized_model._run_on_acc_2.engine.are_output_tensors_unowned(),
+                            ],
+                            [False, True],
+                        )
+                    )
+                )

    @parameterized.expand(
        [
            ("python_runtime", True),
            ("cpp_runtime", False),
@@ -256,12 +274,13 @@
            check_dtype=True,
        )

        torch._dynamo.reset()

-
-    def test_pre_allocated_outputs_unowned_outputs_multiple_outputs_py_api_check_no_realloc(self):
+    def test_pre_allocated_outputs_unowned_outputs_multiple_outputs_py_api_check_no_realloc(
+        self,
+    ):
        class SampleModel(torch.nn.Module):
            def forward(self, x):
                y = torch.ops.aten.mul(x, 7)
                z = torch.ops.aten.add(y, 2)
                a = torch.ops.aten.softmax(z, dim=0)
@@ -306,11 +325,13 @@
        [
            ("python_runtime", True),
            ("cpp_runtime", False),
        ]
    )
-    def test_pre_allocated_outputs_unowned_outputs_multiple_outputs_api_check(self, _, use_python_runtime):
+    def test_pre_allocated_outputs_unowned_outputs_multiple_outputs_api_check(
+        self, _, use_python_runtime
+    ):
        class SampleModel(torch.nn.Module):
            def forward(self, x):
                y = torch.ops.aten.mul(x, 7)
                z = torch.ops.aten.add(y, 2)
                a = torch.ops.aten.softmax(z, dim=0)
@@ -332,28 +353,46 @@
        )

        with torchtrt.runtime.enable_pre_allocated_outputs(optimized_model):
            _ = optimized_model(inputs[0])
            if use_python_runtime:
-                self.assertTrue(all(seen == expected for seen, expected in zip([
-                    optimized_model._run_on_acc_0.are_output_tensors_unowned(),
-                    optimized_model._run_on_acc_2.are_output_tensors_unowned()
-                ], [True, True])))
+                self.assertTrue(
+                    all(
+                        seen == expected
+                        for seen, expected in zip(
+                            [
+                                optimized_model._run_on_acc_0.are_output_tensors_unowned(),
+                                optimized_model._run_on_acc_2.are_output_tensors_unowned(),
+                            ],
+                            [True, True],
+                        )
+                    )
+                )

            else:
-                self.assertTrue(all(seen == expected for seen, expected in zip([
-                    optimized_model._run_on_acc_0.engine.are_output_tensors_unowned(),
-                    optimized_model._run_on_acc_2.engine.are_output_tensors_unowned()
-                ], [True, True])))
-
-    @parameterized.expand(
-        [
-            ("python_runtime", True),
-            ("cpp_runtime", False),
-        ]
-    )
-    def test_pre_allocated_outputs_unowned_outputs_multi_outputs(self, _, use_python_runtime):
+                self.assertTrue(
+                    all(
+                        seen == expected
+                        for seen, expected in zip(
+                            [
+                                optimized_model._run_on_acc_0.engine.are_output_tensors_unowned(),
+                                optimized_model._run_on_acc_2.engine.are_output_tensors_unowned(),
+                            ],
+                            [True, True],
+                        )
+                    )
+                )
+
+    @parameterized.expand(
+        [
+            ("python_runtime", True),
+            ("cpp_runtime", False),
+        ]
+    )
+    def test_pre_allocated_outputs_unowned_outputs_multi_outputs(
+        self, _, use_python_runtime
+    ):
        class SampleModel(torch.nn.Module):
            def forward(self, x):
                y = torch.ops.aten.mul(x, 7)
                z = torch.ops.aten.add(y, 2)
                a = torch.ops.aten.softmax(z, dim=0)

github-actions

There are some changes that do not conform to Python style guidelines:

--- /home/runner/work/TensorRT/TensorRT/tests/py/dynamo/runtime/test_pre_allocated_outputs.py	2026-01-06 00:41:47.593434+00:00
+++ /home/runner/work/TensorRT/TensorRT/tests/py/dynamo/runtime/test_pre_allocated_outputs.py	2026-01-06 00:42:31.114551+00:00
@@ -163,16 +163,18 @@
            self.assertTrue(output_tensors[0] is new_output_tensors[0])
            # Run to run, output of output engine is reallocated
            self.assertTrue(output_tensors[1] is not new_output_tensors[1])

    @parameterized.expand(
-       [
-           ("python_runtime", True),
-           ("cpp_runtime", False),
-       ]
-    )
-    def test_pre_allocated_outputs_unowned_outputs_api_check(self, _, use_python_runtime):
+        [
+            ("python_runtime", True),
+            ("cpp_runtime", False),
+        ]
+    )
+    def test_pre_allocated_outputs_unowned_outputs_api_check(
+        self, _, use_python_runtime
+    ):
        class SampleModel(torch.nn.Module):
            def forward(self, x):
                return torch.softmax(x * 7 + 2, dim=0)

        model = SampleModel().eval().cuda()
@@ -191,20 +193,36 @@
        )

        with torchtrt.runtime.enable_pre_allocated_outputs(optimized_model):
            _ = optimized_model(inputs[0])
            if use_python_runtime:
-                self.assertTrue(all(seen == expected for seen, expected in zip([
-                    optimized_model._run_on_acc_0.are_output_tensors_unowned(),
-                    optimized_model._run_on_acc_2.are_output_tensors_unowned()
-                ], [False, True])))
+                self.assertTrue(
+                    all(
+                        seen == expected
+                        for seen, expected in zip(
+                            [
+                                optimized_model._run_on_acc_0.are_output_tensors_unowned(),
+                                optimized_model._run_on_acc_2.are_output_tensors_unowned(),
+                            ],
+                            [False, True],
+                        )
+                    )
+                )

            else:
-                self.assertTrue(all(seen == expected for seen, expected in zip([
-                    optimized_model._run_on_acc_0.engine.are_output_tensors_unowned(),
-                    optimized_model._run_on_acc_2.engine.are_output_tensors_unowned()
-                ], [False, True])))
+                self.assertTrue(
+                    all(
+                        seen == expected
+                        for seen, expected in zip(
+                            [
+                                optimized_model._run_on_acc_0.engine.are_output_tensors_unowned(),
+                                optimized_model._run_on_acc_2.engine.are_output_tensors_unowned(),
+                            ],
+                            [False, True],
+                        )
+                    )
+                )

    @parameterized.expand(
        [
            ("python_runtime", True),
            ("cpp_runtime", False),
@@ -256,12 +274,13 @@
            check_dtype=True,
        )

        torch._dynamo.reset()

-
-    def test_pre_allocated_outputs_unowned_outputs_multiple_outputs_py_api_check_no_realloc(self):
+    def test_pre_allocated_outputs_unowned_outputs_multiple_outputs_py_api_check_no_realloc(
+        self,
+    ):
        class SampleModel(torch.nn.Module):
            def forward(self, x):
                y = torch.ops.aten.mul(x, 7)
                z = torch.ops.aten.add(y, 2)
                a = torch.ops.aten.softmax(z, dim=0)
@@ -306,11 +325,13 @@
        [
            ("python_runtime", True),
            ("cpp_runtime", False),
        ]
    )
-    def test_pre_allocated_outputs_unowned_outputs_multiple_outputs_api_check(self, _, use_python_runtime):
+    def test_pre_allocated_outputs_unowned_outputs_multiple_outputs_api_check(
+        self, _, use_python_runtime
+    ):
        class SampleModel(torch.nn.Module):
            def forward(self, x):
                y = torch.ops.aten.mul(x, 7)
                z = torch.ops.aten.add(y, 2)
                a = torch.ops.aten.softmax(z, dim=0)
@@ -332,28 +353,46 @@
        )

        with torchtrt.runtime.enable_pre_allocated_outputs(optimized_model):
            _ = optimized_model(inputs[0])
            if use_python_runtime:
-                self.assertTrue(all(seen == expected for seen, expected in zip([
-                    optimized_model._run_on_acc_0.are_output_tensors_unowned(),
-                    optimized_model._run_on_acc_2.are_output_tensors_unowned()
-                ], [True, True])))
+                self.assertTrue(
+                    all(
+                        seen == expected
+                        for seen, expected in zip(
+                            [
+                                optimized_model._run_on_acc_0.are_output_tensors_unowned(),
+                                optimized_model._run_on_acc_2.are_output_tensors_unowned(),
+                            ],
+                            [True, True],
+                        )
+                    )
+                )

            else:
-                self.assertTrue(all(seen == expected for seen, expected in zip([
-                    optimized_model._run_on_acc_0.engine.are_output_tensors_unowned(),
-                    optimized_model._run_on_acc_2.engine.are_output_tensors_unowned()
-                ], [True, True])))
-
-    @parameterized.expand(
-        [
-            ("python_runtime", True),
-            ("cpp_runtime", False),
-        ]
-    )
-    def test_pre_allocated_outputs_unowned_outputs_multi_outputs(self, _, use_python_runtime):
+                self.assertTrue(
+                    all(
+                        seen == expected
+                        for seen, expected in zip(
+                            [
+                                optimized_model._run_on_acc_0.engine.are_output_tensors_unowned(),
+                                optimized_model._run_on_acc_2.engine.are_output_tensors_unowned(),
+                            ],
+                            [True, True],
+                        )
+                    )
+                )
+
+    @parameterized.expand(
+        [
+            ("python_runtime", True),
+            ("cpp_runtime", False),
+        ]
+    )
+    def test_pre_allocated_outputs_unowned_outputs_multi_outputs(
+        self, _, use_python_runtime
+    ):
        class SampleModel(torch.nn.Module):
            def forward(self, x):
                y = torch.ops.aten.mul(x, 7)
                z = torch.ops.aten.add(y, 2)
                a = torch.ops.aten.softmax(z, dim=0)

tests: Adding additional test cases for the unowned tensor feature

e3909ac

meta-cla bot added the cla signed label Jan 6, 2026

narendasan requested a review from cehongwang January 6, 2026 00:41

github-actions bot requested changes Jan 6, 2026

View reviewed changes

Changed the tests and fixed a bug

58fcc3e

cehongwang merged commit 0bef55b into graph-break-overhead Jan 6, 2026
7 of 8 checks passed

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

tests: Adding additional test cases for the unowned tensor feature #3993

tests: Adding additional test cases for the unowned tensor feature #3993

Uh oh!

narendasan commented Jan 6, 2026

Uh oh!

github-actions bot left a comment

Uh oh!

github-actions bot left a comment

Uh oh!

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

3 participants

tests: Adding additional test cases for the unowned tensor feature #3993

tests: Adding additional test cases for the unowned tensor feature #3993

Uh oh!

Conversation

narendasan commented Jan 6, 2026

Description

Type of change

Checklist:

Uh oh!

github-actions bot left a comment

Choose a reason for hiding this comment

Uh oh!

github-actions bot left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

3 participants