diff --git a/test/test_transforms_tensor.py b/test/test_transforms_tensor.py index 4a2f59c6a9b..2c598e90833 100644 --- a/test/test_transforms_tensor.py +++ b/test/test_transforms_tensor.py @@ -188,6 +188,22 @@ def test_crop(self): 'crop', 'RandomCrop', fn_kwargs=fn_kwargs, meth_kwargs=meth_kwargs ) + # Test transforms.functional.crop including outside the image area + fn_kwargs = {"top": -2, "left": 3, "height": 4, "width": 5} # top + self._test_functional_op('crop', fn_kwargs=fn_kwargs) + + fn_kwargs = {"top": 1, "left": -3, "height": 4, "width": 5} # left + self._test_functional_op('crop', fn_kwargs=fn_kwargs) + + fn_kwargs = {"top": 7, "left": 3, "height": 4, "width": 5} # bottom + self._test_functional_op('crop', fn_kwargs=fn_kwargs) + + fn_kwargs = {"top": 3, "left": 8, "height": 4, "width": 5} # right + self._test_functional_op('crop', fn_kwargs=fn_kwargs) + + fn_kwargs = {"top": -3, "left": -3, "height": 15, "width": 15} # all + self._test_functional_op('crop', fn_kwargs=fn_kwargs) + sizes = [5, [5, ], [6, 6]] padding_configs = [ {"padding_mode": "constant", "fill": 0}, diff --git a/torchvision/transforms/functional.py b/torchvision/transforms/functional.py index 17dd649159e..5bbd91b3fd8 100644 --- a/torchvision/transforms/functional.py +++ b/torchvision/transforms/functional.py @@ -463,7 +463,8 @@ def pad(img: Tensor, padding: List[int], fill: int = 0, padding_mode: str = "con def crop(img: Tensor, top: int, left: int, height: int, width: int) -> Tensor: """Crop the given image at specified location and output size. If the image is torch Tensor, it is expected - to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions + to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions. + If image size is smaller than output size along any edge, image is padded with 0 and then cropped. Args: img (PIL Image or Tensor): Image to be cropped. (0,0) denotes the top left corner of the image. diff --git a/torchvision/transforms/functional_tensor.py b/torchvision/transforms/functional_tensor.py index 7aa63f539cb..57a0b75fdd2 100644 --- a/torchvision/transforms/functional_tensor.py +++ b/torchvision/transforms/functional_tensor.py @@ -122,7 +122,14 @@ def hflip(img: Tensor) -> Tensor: def crop(img: Tensor, top: int, left: int, height: int, width: int) -> Tensor: _assert_image_tensor(img) - return img[..., top:top + height, left:left + width] + w, h = _get_image_size(img) + right = left + width + bottom = top + height + + if left < 0 or top < 0 or right > w or bottom > h: + padding_ltrb = [max(-left, 0), max(-top, 0), max(right - w, 0), max(bottom - h, 0)] + return pad(img[..., max(top, 0):bottom, max(left, 0):right], padding_ltrb, fill=0) + return img[..., top:bottom, left:right] def rgb_to_grayscale(img: Tensor, num_output_channels: int = 1) -> Tensor: