Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions test/test_transforms_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -1249,6 +1249,10 @@ def test_kernel_video(self):
def test_functional(self, make_input):
check_functional(F.horizontal_flip, make_input())

@pytest.mark.skipif(not CVCUDA_AVAILABLE, reason="CVCUDA is not available")
def test_functional_cvcuda(self):
check_functional(F.horizontal_flip, make_image_cvcuda(batch_dims=(1,)))

Comment on lines +1253 to +1255
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You should be able to include this test as part of the generic check_functional test above. You can simply add make_image_cvcuda to the list of make_input possible value in the @pytest.mark.parametrize decorator.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should also add a test for the kernel (similar to test_kernel_image)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Similarly, let's make sure we add make_image_cvcuda to the list of make_input possible value in the @pytest.mark.parametrize decorator for the following tests: test_functional_signature, test_transform, and test_transform_noop.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agreed it should be parametrized. We still want to skip the test if cv-cuda isn't available, so I think it will look like:

# ...
make_image,
pytest.param(make_image_cvcuda, marks=pytest.mark.skipif(not CVCUDA_AVAILABLE, reason="CVCUDA is not available"))),
make_bounding_boxes,
# ...

@pytest.mark.parametrize(
("kernel", "input_type"),
[
Expand Down Expand Up @@ -1291,6 +1295,15 @@ def test_image_correctness(self, fn):

torch.testing.assert_close(actual, expected)

@pytest.mark.skipif(not CVCUDA_AVAILABLE, reason="CVCUDA is not available")
def test_image_correctness_cvcuda(self):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should be able to parametrize this test to control both the class and functional implementations as done for the the test test_image_correctness above.

@pytest.mark.parametrize(
        "fn", [F.horizontal_flip, transform_cls_to_functional(transforms.RandomHorizontalFlip, p=1)]
    )

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We could also re-use the test test_image_correctness above and add some extra parametrization. This would avoid defining a new test.

@pytest.mark.parametrize(
        "make_input",
        [
            make_image,
            make_image_cvcuda,
        ],
    )

image = make_image_cvcuda(batch_dims=(1,))

actual = F.horizontal_flip(image)
expected_torch = F.horizontal_flip(F.cvcuda_to_tensor(image))

assert torch.equal(F.cvcuda_to_tensor(actual), expected_torch)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can use torch.testing.assert_close with rtol=0, atol=0. This will preserve consistency with other tests in torchvision. Also this will provide more detailed logging.

Copy link
Member

@NicolasHug NicolasHug Nov 20, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agreed, and for that we just use the plain assert_equal util (see other usage in this file). It's our alias for:

torch.testing.assert_close with rtol=0, atol=0.


def _reference_horizontal_flip_bounding_boxes(self, bounding_boxes: tv_tensors.BoundingBoxes):
affine_matrix = np.array(
[
Expand Down Expand Up @@ -1865,6 +1878,10 @@ def test_kernel_video(self):
def test_functional(self, make_input):
check_functional(F.vertical_flip, make_input())

@pytest.mark.skipif(not CVCUDA_AVAILABLE, reason="CVCUDA is not available")
def test_functional_cvcuda(self):
check_functional(F.vertical_flip, make_image_cvcuda())

@pytest.mark.parametrize(
("kernel", "input_type"),
[
Expand Down Expand Up @@ -1905,6 +1922,15 @@ def test_image_correctness(self, fn):

torch.testing.assert_close(actual, expected)

@pytest.mark.skipif(not CVCUDA_AVAILABLE, reason="CVCUDA is not available")
def test_image_correctness_cvcuda(self):
image = make_image_cvcuda(batch_dims=(1,))

actual = F.vertical_flip(image)
expected_torch = F.vertical_flip(F.cvcuda_to_tensor(image))

assert torch.equal(F.cvcuda_to_tensor(actual), expected_torch)

def _reference_vertical_flip_bounding_boxes(self, bounding_boxes: tv_tensors.BoundingBoxes):
affine_matrix = np.array(
[
Expand Down
24 changes: 22 additions & 2 deletions torchvision/transforms/v2/functional/_geometry.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import numbers
import warnings
from collections.abc import Sequence
from typing import Any, Optional, Union
from typing import Any, Optional, TYPE_CHECKING, Union

import PIL.Image
import torch
Expand All @@ -26,7 +26,13 @@

from ._meta import _get_size_image_pil, clamp_bounding_boxes, convert_bounding_box_format

from ._utils import _FillTypeJIT, _get_kernel, _register_five_ten_crop_kernel_internal, _register_kernel_internal
from ._utils import _FillTypeJIT, _get_kernel, _import_cvcuda, _is_cvcuda_available, _register_five_ten_crop_kernel_internal, _register_kernel_internal

CVCUDA_AVAILABLE = _is_cvcuda_available()
if TYPE_CHECKING:
import cvcuda
if CVCUDA_AVAILABLE:
cvcuda = _import_cvcuda()


def _check_interpolation(interpolation: Union[InterpolationMode, int]) -> InterpolationMode:
Expand Down Expand Up @@ -61,6 +67,12 @@ def horizontal_flip_image(image: torch.Tensor) -> torch.Tensor:
def _horizontal_flip_image_pil(image: PIL.Image.Image) -> PIL.Image.Image:
return _FP.hflip(image)

def _horizontal_flip_image_cvcuda(image: "cvcuda.Tensor") -> "cvcuda.Tensor":
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we expose this in "vision/torchvision/transforms/v2/functional/init.py"?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We don't need to. See how we're not exposing the PIL kernels there either. Users can just rely on the functionals for PIL and for CVCUDA. The main reason we expose the kernels is for when:

  • users have pure tensors (instead of e.g. a BoundingBoxes class). They can't use the functional in this case, they need the kernel
  • torch.compile support

None of these apply to CV-CUDA (or for PIL).

return cvcuda.flip(image, flipCode=1)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just going back on #9277 (comment)

We'll want to explicitly use _import_cvcuda() here. The reason is twofold:

  • for users, it gives them a nice error message with instructions if this function gets accidentally called. Right now it cannot really be accidentally called, but we may change our code in the future and we might miss that.
  • for us, it makes it very clear just looking at this kernel that cvcuda is needed and that it's an optional dependency, not something that is expected to be available at all time.



if CVCUDA_AVAILABLE:
_horizontal_flip_image_cvcuda_registered = _register_kernel_internal(horizontal_flip, cvcuda.Tensor)(_horizontal_flip_image_cvcuda)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To be extra safe this could be

Suggested change
_horizontal_flip_image_cvcuda_registered = _register_kernel_internal(horizontal_flip, cvcuda.Tensor)(_horizontal_flip_image_cvcuda)
_horizontal_flip_image_cvcuda_registered = _register_kernel_internal(horizontal_flip, _import_cvcuda().Tensor)(_horizontal_flip_image_cvcuda)


@_register_kernel_internal(horizontal_flip, tv_tensors.Mask)
def horizontal_flip_mask(mask: torch.Tensor) -> torch.Tensor:
Expand Down Expand Up @@ -150,6 +162,14 @@ def _vertical_flip_image_pil(image: PIL.Image.Image) -> PIL.Image.Image:
return _FP.vflip(image)


def _vertical_flip_image_cvcuda(image: "cvcuda.Tensor") -> "cvcuda.Tensor":
return cvcuda.flip(image, flipCode=0)


if CVCUDA_AVAILABLE:
_vertical_flip_image_cvcuda_registered = _register_kernel_internal(vertical_flip, cvcuda.Tensor)(_vertical_flip_image_cvcuda)


@_register_kernel_internal(vertical_flip, tv_tensors.Mask)
def vertical_flip_mask(mask: torch.Tensor) -> torch.Tensor:
return vertical_flip_image(mask)
Expand Down
Loading