Add CVCUDA backend for horizontal and vertical flip transforms

zy1git · zy1git · commit 02c320a747da · 2025-11-19T15:08:27.000-08:00
Summary:
Implemented _horizontal_flip_image_cvcuda and _vertical_flip_image_cvcuda
kernels using cvcuda.flip operator. The kernels are automatically registered
when CVCUDA is available and route cvcuda.Tensor inputs appropriately.
Test Plan:
- Added test_functional_cvcuda and test_image_correctness_cvcuda tests
- Verified parity between PyTorch and CVCUDA implementations
- All tests pass with CVCUDA backend
Reviewers:

Subscribers:

Tasks:

Tags:
diff --git a/test/test_transforms_v2.py b/test/test_transforms_v2.py
@@ -1249,6 +1249,10 @@ def test_kernel_video(self):
     def test_functional(self, make_input):
         check_functional(F.horizontal_flip, make_input())
 
+    @pytest.mark.skipif(not CVCUDA_AVAILABLE, reason="CVCUDA is not available")
+    def test_functional_cvcuda(self):
+        check_functional(F.horizontal_flip, make_image_cvcuda())
+
     @pytest.mark.parametrize(
         ("kernel", "input_type"),
         [
@@ -1291,6 +1295,20 @@ def test_image_correctness(self, fn):
 
         torch.testing.assert_close(actual, expected)
 
+    @pytest.mark.skipif(not CVCUDA_AVAILABLE, reason="CVCUDA is not available")
+    @pytest.mark.parametrize(
+        "fn", [F.horizontal_flip, transform_cls_to_functional(transforms.RandomHorizontalFlip, p=1)]
+    )
+    def test_image_correctness_cvcuda(self, fn):
+        # Test parity between cvcuda and torchvision transforms
+        image = make_image_cvcuda()
+
+        actual = fn(image)
+        # CVCUDA output should match: torchvision(image) converted to CVCUDA
+        expected = F.to_cvcuda_tensor(fn(F.cvcuda_to_tensor(image)))
+
+        torch.testing.assert_close(F.cvcuda_to_tensor(actual), F.cvcuda_to_tensor(expected))
+
     def _reference_horizontal_flip_bounding_boxes(self, bounding_boxes: tv_tensors.BoundingBoxes):
         affine_matrix = np.array(
             [
@@ -1865,6 +1883,10 @@ def test_kernel_video(self):
     def test_functional(self, make_input):
         check_functional(F.vertical_flip, make_input())
 
+    @pytest.mark.skipif(not CVCUDA_AVAILABLE, reason="CVCUDA is not available")
+    def test_functional_cvcuda(self):
+        check_functional(F.vertical_flip, make_image_cvcuda())
+
     @pytest.mark.parametrize(
         ("kernel", "input_type"),
         [
@@ -1905,6 +1927,18 @@ def test_image_correctness(self, fn):
 
         torch.testing.assert_close(actual, expected)
 
+    @pytest.mark.skipif(not CVCUDA_AVAILABLE, reason="CVCUDA is not available")
+    @pytest.mark.parametrize("fn", [F.vertical_flip, transform_cls_to_functional(transforms.RandomVerticalFlip, p=1)])
+    def test_image_correctness_cvcuda(self, fn):
+        # Test parity between cvcuda and torchvision transforms
+        image = make_image_cvcuda()
+
+        actual = fn(image)
+        # CVCUDA output should match: torchvision(image) converted to CVCUDA
+        expected = F.to_cvcuda_tensor(fn(F.cvcuda_to_tensor(image)))
+
+        torch.testing.assert_close(F.cvcuda_to_tensor(actual), F.cvcuda_to_tensor(expected))
+
     def _reference_vertical_flip_bounding_boxes(self, bounding_boxes: tv_tensors.BoundingBoxes):
         affine_matrix = np.array(
             [
diff --git a/torchvision/transforms/v2/functional/_geometry.py b/torchvision/transforms/v2/functional/_geometry.py
@@ -2,7 +2,7 @@
 import numbers
 import warnings
 from collections.abc import Sequence
-from typing import Any, Optional, Union
+from typing import Any, Optional, TYPE_CHECKING, Union
 
 import PIL.Image
 import torch
@@ -26,7 +26,13 @@
 
 from ._meta import _get_size_image_pil, clamp_bounding_boxes, convert_bounding_box_format
 
-from ._utils import _FillTypeJIT, _get_kernel, _register_five_ten_crop_kernel_internal, _register_kernel_internal
+from ._utils import _FillTypeJIT, _get_kernel, _import_cvcuda, _is_cvcuda_available, _register_five_ten_crop_kernel_internal, _register_kernel_internal
+
+CVCUDA_AVAILABLE = _is_cvcuda_available()
+if TYPE_CHECKING:
+    import cvcuda
+if CVCUDA_AVAILABLE:
+    cvcuda = _import_cvcuda()
 
 
 def _check_interpolation(interpolation: Union[InterpolationMode, int]) -> InterpolationMode:
@@ -61,6 +67,12 @@ def horizontal_flip_image(image: torch.Tensor) -> torch.Tensor:
 def _horizontal_flip_image_pil(image: PIL.Image.Image) -> PIL.Image.Image:
     return _FP.hflip(image)
 
+def _horizontal_flip_image_cvcuda(image: "cvcuda.Tensor") -> "cvcuda.Tensor":
+    return cvcuda.flip(image, flipCode=1)
+
+
+if CVCUDA_AVAILABLE:
+    _horizontal_flip_image_cvcuda_registered = _register_kernel_internal(horizontal_flip, cvcuda.Tensor)(_horizontal_flip_image_cvcuda)
 
 @_register_kernel_internal(horizontal_flip, tv_tensors.Mask)
 def horizontal_flip_mask(mask: torch.Tensor) -> torch.Tensor:
@@ -150,6 +162,14 @@ def _vertical_flip_image_pil(image: PIL.Image.Image) -> PIL.Image.Image:
     return _FP.vflip(image)
 
 
+def _vertical_flip_image_cvcuda(image: "cvcuda.Tensor") -> "cvcuda.Tensor":
+    return cvcuda.flip(image, flipCode=0)
+
+
+if CVCUDA_AVAILABLE:
+    _vertical_flip_image_cvcuda_registered = _register_kernel_internal(vertical_flip, cvcuda.Tensor)(_vertical_flip_image_cvcuda)
+
+
 @_register_kernel_internal(vertical_flip, tv_tensors.Mask)
 def vertical_flip_mask(mask: torch.Tensor) -> torch.Tensor:
     return vertical_flip_image(mask)