diff --git a/test/test_transforms_tensor.py b/test/test_transforms_tensor.py index ba2321ec455..3dc8813b9f8 100644 --- a/test/test_transforms_tensor.py +++ b/test/test_transforms_tensor.py @@ -447,10 +447,17 @@ def test_resize_save(self, tmpdir): ], ) @pytest.mark.parametrize("interpolation", [NEAREST, BILINEAR, BICUBIC]) - def test_resized_crop(self, scale, ratio, size, interpolation, device): + @pytest.mark.parametrize("antialias", [None, True, False]) + def test_resized_crop(self, scale, ratio, size, interpolation, antialias, device): + + if antialias and interpolation == NEAREST: + pytest.skip("Can not resize if interpolation mode is NEAREST and antialias=True") + tensor = torch.randint(0, 256, size=(3, 44, 56), dtype=torch.uint8, device=device) batch_tensors = torch.randint(0, 256, size=(4, 3, 44, 56), dtype=torch.uint8, device=device) - transform = T.RandomResizedCrop(size=size, scale=scale, ratio=ratio, interpolation=interpolation) + transform = T.RandomResizedCrop( + size=size, scale=scale, ratio=ratio, interpolation=interpolation, antialias=antialias + ) s_transform = torch.jit.script(transform) _test_transform_vs_scripted(transform, s_transform, tensor) _test_transform_vs_scripted_on_batch(transform, s_transform, batch_tensors) diff --git a/torchvision/transforms/functional.py b/torchvision/transforms/functional.py index c40ae1eb92b..fb4c7e6677d 100644 --- a/torchvision/transforms/functional.py +++ b/torchvision/transforms/functional.py @@ -555,6 +555,7 @@ def resized_crop( width: int, size: List[int], interpolation: InterpolationMode = InterpolationMode.BILINEAR, + antialias: Optional[bool] = None, ) -> Tensor: """Crop the given image and resize it to desired size. If the image is torch Tensor, it is expected @@ -575,13 +576,17 @@ def resized_crop( ``InterpolationMode.BILINEAR`` and ``InterpolationMode.BICUBIC`` are supported. For backward compatibility integer values (e.g. ``PIL.Image[.Resampling].NEAREST``) are still accepted, but deprecated since 0.13 and will be removed in 0.15. Please use InterpolationMode enum. + antialias (bool, optional): antialias flag. If ``img`` is PIL Image, the flag is ignored and anti-alias + is always used. If ``img`` is Tensor, the flag is False by default and can be set to True for + ``InterpolationMode.BILINEAR`` and ``InterpolationMode.BICUBIC`` modes. + This can help making the output for PIL images and tensors closer. Returns: PIL Image or Tensor: Cropped image. """ if not torch.jit.is_scripting() and not torch.jit.is_tracing(): _log_api_usage_once(resized_crop) img = crop(img, top, left, height, width) - img = resize(img, size, interpolation) + img = resize(img, size, interpolation, antialias=antialias) return img diff --git a/torchvision/transforms/transforms.py b/torchvision/transforms/transforms.py index 921e122bf5f..56f69e82033 100644 --- a/torchvision/transforms/transforms.py +++ b/torchvision/transforms/transforms.py @@ -310,12 +310,8 @@ class Resize(torch.nn.Module): mode). antialias (bool, optional): antialias flag. If ``img`` is PIL Image, the flag is ignored and anti-alias is always used. If ``img`` is Tensor, the flag is False by default and can be set to True for - ``InterpolationMode.BILINEAR`` only mode. This can help making the output for PIL images and tensors - closer. - - .. warning:: - There is no autodiff support for ``antialias=True`` option with input ``img`` as Tensor. - + ``InterpolationMode.BILINEAR`` and ``InterpolationMode.BICUBIC`` modes. + This can help making the output for PIL images and tensors closer. """ def __init__(self, size, interpolation=InterpolationMode.BILINEAR, max_size=None, antialias=None): @@ -873,9 +869,20 @@ class RandomResizedCrop(torch.nn.Module): ``InterpolationMode.BICUBIC`` are supported. For backward compatibility integer values (e.g. ``PIL.Image[.Resampling].NEAREST``) are still accepted, but deprecated since 0.13 and will be removed in 0.15. Please use InterpolationMode enum. + antialias (bool, optional): antialias flag. If ``img`` is PIL Image, the flag is ignored and anti-alias + is always used. If ``img`` is Tensor, the flag is False by default and can be set to True for + ``InterpolationMode.BILINEAR`` and ``InterpolationMode.BICUBIC`` modes. + This can help making the output for PIL images and tensors closer. """ - def __init__(self, size, scale=(0.08, 1.0), ratio=(3.0 / 4.0, 4.0 / 3.0), interpolation=InterpolationMode.BILINEAR): + def __init__( + self, + size, + scale=(0.08, 1.0), + ratio=(3.0 / 4.0, 4.0 / 3.0), + interpolation=InterpolationMode.BILINEAR, + antialias: Optional[bool] = None, + ): super().__init__() _log_api_usage_once(self) self.size = _setup_size(size, error_msg="Please provide only two dimensions (h, w) for size.") @@ -896,6 +903,7 @@ def __init__(self, size, scale=(0.08, 1.0), ratio=(3.0 / 4.0, 4.0 / 3.0), interp interpolation = _interpolation_modes_from_int(interpolation) self.interpolation = interpolation + self.antialias = antialias self.scale = scale self.ratio = ratio @@ -952,7 +960,7 @@ def forward(self, img): PIL Image or Tensor: Randomly cropped and resized image. """ i, j, h, w = self.get_params(img, self.scale, self.ratio) - return F.resized_crop(img, i, j, h, w, self.size, self.interpolation) + return F.resized_crop(img, i, j, h, w, self.size, self.interpolation, antialias=self.antialias) def __repr__(self) -> str: interpolate_str = self.interpolation.value @@ -960,6 +968,7 @@ def __repr__(self) -> str: format_string += f", scale={tuple(round(s, 4) for s in self.scale)}" format_string += f", ratio={tuple(round(r, 4) for r in self.ratio)}" format_string += f", interpolation={interpolate_str})" + format_string += f", antialias={self.antialias})" return format_string