Skip to content

Commit

Permalink
Unified inputs for F.rotate (#2495)
Browse files Browse the repository at this point in the history
* Added code for F_t.rotate with test
- updated F.affine tests

* Rotate test tolerance to 2%

* Fixes failing test

* Optimized _expanded_affine_grid with a single matmul op

* Recoded _compute_output_size
  • Loading branch information
vfdev-5 committed Aug 5, 2020
1 parent 23295fb commit 7666252
Show file tree
Hide file tree
Showing 5 changed files with 252 additions and 72 deletions.
57 changes: 49 additions & 8 deletions test/test_functional_tensor.py
Expand Up @@ -435,7 +435,7 @@ def test_affine(self):
)
# 3) Test translation
test_configs = [
[10, 12], (12, 13)
[10, 12], (-12, -13)
]
for t in test_configs:
for fn in [F.affine, scripted_affine]:
Expand All @@ -447,21 +447,21 @@ def test_affine(self):
test_configs = [
(45, [5, 6], 1.0, [0.0, 0.0]),
(33, (5, -4), 1.0, [0.0, 0.0]),
(45, [5, 4], 1.2, [0.0, 0.0]),
(33, (4, 8), 2.0, [0.0, 0.0]),
(45, [-5, 4], 1.2, [0.0, 0.0]),
(33, (-4, -8), 2.0, [0.0, 0.0]),
(85, (10, -10), 0.7, [0.0, 0.0]),
(0, [0, 0], 1.0, [35.0, ]),
(25, [0, 0], 1.2, [0.0, 15.0]),
(45, [10, 0], 0.7, [2.0, 5.0]),
(45, [10, -10], 1.2, [4.0, 5.0]),
(45, [-10, 0], 0.7, [2.0, 5.0]),
(45, [-10, -10], 1.2, [4.0, 5.0]),
]
for r in [0, ]:
for a, t, s, sh in test_configs:
out_pil_img = F.affine(pil_img, angle=a, translate=t, scale=s, shear=sh, resample=r)
out_pil_tensor = torch.from_numpy(np.array(out_pil_img).transpose((2, 0, 1)))

for fn in [F.affine, scripted_affine]:
out_tensor = fn(tensor, angle=a, translate=t, scale=s, shear=sh, resample=r)
out_pil_img = F.affine(pil_img, angle=a, translate=t, scale=s, shear=sh, resample=r)
out_pil_tensor = torch.from_numpy(np.array(out_pil_img).transpose((2, 0, 1)))

num_diff_pixels = (out_tensor != out_pil_tensor).sum().item() / 3.0
ratio_diff_pixels = num_diff_pixels / out_tensor.shape[-1] / out_tensor.shape[-2]
# Tolerance : less than 5% of different pixels
Expand All @@ -473,6 +473,47 @@ def test_affine(self):
)
)

def test_rotate(self):
# Tests on square image
tensor, pil_img = self._create_data(26, 26)
scripted_rotate = torch.jit.script(F.rotate)

img_size = pil_img.size

centers = [
None,
(int(img_size[0] * 0.3), int(img_size[0] * 0.4)),
[int(img_size[0] * 0.5), int(img_size[0] * 0.6)]
]

for r in [0, ]:
for a in range(-120, 120, 23):
for e in [True, False]:
for c in centers:

out_pil_img = F.rotate(pil_img, angle=a, resample=r, expand=e, center=c)
out_pil_tensor = torch.from_numpy(np.array(out_pil_img).transpose((2, 0, 1)))
for fn in [F.rotate, scripted_rotate]:
out_tensor = fn(tensor, angle=a, resample=r, expand=e, center=c)

self.assertEqual(
out_tensor.shape,
out_pil_tensor.shape,
msg="{}: {} vs {}".format(
(r, a, e, c), out_tensor.shape, out_pil_tensor.shape
)
)
num_diff_pixels = (out_tensor != out_pil_tensor).sum().item() / 3.0
ratio_diff_pixels = num_diff_pixels / out_tensor.shape[-1] / out_tensor.shape[-2]
# Tolerance : less than 2% of different pixels
self.assertLess(
ratio_diff_pixels,
0.02,
msg="{}: {}\n{} vs \n{}".format(
(r, a, e, c), ratio_diff_pixels, out_tensor[0, :7, :7], out_pil_tensor[0, :7, :7]
)
)


if __name__ == '__main__':
unittest.main()
2 changes: 1 addition & 1 deletion test/test_transforms.py
Expand Up @@ -1266,7 +1266,7 @@ def test_rotate(self):
x = np.zeros((100, 100, 3), dtype=np.uint8)
x[40, 40] = [255, 255, 255]

with self.assertRaises(TypeError):
with self.assertRaisesRegex(TypeError, r"img should be PIL Image"):
F.rotate(x, 10)

img = F.to_pil_image(x)
Expand Down
88 changes: 53 additions & 35 deletions torchvision/transforms/functional.py
Expand Up @@ -756,40 +756,8 @@ def adjust_gamma(img: Tensor, gamma: float, gain: float = 1) -> Tensor:
return F_t.adjust_gamma(img, gamma, gain)


def rotate(img, angle, resample=False, expand=False, center=None, fill=None):
"""Rotate the image by angle.
Args:
img (PIL Image): PIL Image to be rotated.
angle (float or int): In degrees degrees counter clockwise order.
resample (``PIL.Image.NEAREST`` or ``PIL.Image.BILINEAR`` or ``PIL.Image.BICUBIC``, optional):
An optional resampling filter. See `filters`_ for more information.
If omitted, or if the image has mode "1" or "P", it is set to ``PIL.Image.NEAREST``.
expand (bool, optional): Optional expansion flag.
If true, expands the output image to make it large enough to hold the entire rotated image.
If false or omitted, make the output image the same size as the input image.
Note that the expand flag assumes rotation around the center and no translation.
center (2-tuple, optional): Optional center of rotation.
Origin is the upper left corner.
Default is the center of the image.
fill (n-tuple or int or float): Pixel fill value for area outside the rotated
image. If int or float, the value is used for all bands respectively.
Defaults to 0 for all bands. This option is only available for ``pillow>=5.2.0``.
.. _filters: https://pillow.readthedocs.io/en/latest/handbook/concepts.html#filters
"""
if not F_pil._is_pil_image(img):
raise TypeError('img should be PIL Image. Got {}'.format(type(img)))

opts = _parse_fill(fill, img, '5.2.0')

return img.rotate(angle, resample, expand, center, **opts)


def _get_inverse_affine_matrix(
center: List[int], angle: float, translate: List[float], scale: float, shear: List[float]
center: List[float], angle: float, translate: List[float], scale: float, shear: List[float]
) -> List[float]:
# Helper method to compute inverse matrix for affine transformation

Expand Down Expand Up @@ -838,6 +806,56 @@ def _get_inverse_affine_matrix(
return matrix


def rotate(
img: Tensor, angle: float, resample: int = 0, expand: bool = False,
center: Optional[List[int]] = None, fill: Optional[int] = None
) -> Tensor:
"""Rotate the image by angle.
The image can be a PIL Image or a Tensor, in which case it is expected
to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions.
Args:
img (PIL Image or Tensor): image to be rotated.
angle (float or int): rotation angle value in degrees, counter-clockwise.
resample (``PIL.Image.NEAREST`` or ``PIL.Image.BILINEAR`` or ``PIL.Image.BICUBIC``, optional):
An optional resampling filter. See `filters`_ for more information.
If omitted, or if the image has mode "1" or "P", it is set to ``PIL.Image.NEAREST``.
expand (bool, optional): Optional expansion flag.
If true, expands the output image to make it large enough to hold the entire rotated image.
If false or omitted, make the output image the same size as the input image.
Note that the expand flag assumes rotation around the center and no translation.
center (list or tuple, optional): Optional center of rotation. Origin is the upper left corner.
Default is the center of the image.
fill (n-tuple or int or float): Pixel fill value for area outside the rotated
image. If int or float, the value is used for all bands respectively.
Defaults to 0 for all bands. This option is only available for ``pillow>=5.2.0``.
Returns:
PIL Image or Tensor: Rotated image.
.. _filters: https://pillow.readthedocs.io/en/latest/handbook/concepts.html#filters
"""
if not isinstance(angle, (int, float)):
raise TypeError("Argument angle should be int or float")

if center is not None and not isinstance(center, (list, tuple)):
raise TypeError("Argument center should be a sequence")

if not isinstance(img, torch.Tensor):
return F_pil.rotate(img, angle=angle, resample=resample, expand=expand, center=center, fill=fill)

center_f = [0.0, 0.0]
if center is not None:
img_size = _get_image_size(img)
# Center is normalized to [-1, +1]
center_f = [2.0 * t / s - 1.0 for s, t in zip(img_size, center)]
# due to current incoherence of rotation angle direction between affine and rotate implementations
# we need to set -angle.
matrix = _get_inverse_affine_matrix(center_f, -angle, [0.0, 0.0], 1.0, [0.0, 0.0])
return F_t.rotate(img, matrix=matrix, resample=resample, expand=expand, fill=fill)


def affine(
img: Tensor, angle: float, translate: List[int], scale: float, shear: List[float],
resample: int = 0, fillcolor: Optional[int] = None
Expand All @@ -847,7 +865,7 @@ def affine(
to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions.
Args:
img (PIL Image or Tensor): image to be rotated.
img (PIL Image or Tensor): image to transform.
angle (float or int): rotation angle in degrees between -180 and 180, clockwise direction.
translate (list or tuple of integers): horizontal and vertical translations (post-rotation translation)
scale (float): overall scale
Expand Down Expand Up @@ -911,7 +929,7 @@ def affine(
# we need to rescale translate by image size / 2 as its values can be between -1 and 1
translate = [2.0 * t / s for s, t in zip(img_size, translate)]

matrix = _get_inverse_affine_matrix([0, 0], angle, translate, scale, shear)
matrix = _get_inverse_affine_matrix([0.0, 0.0], angle, translate, scale, shear)
return F_t.affine(img, matrix=matrix, resample=resample, fillcolor=fillcolor)


Expand Down
34 changes: 34 additions & 0 deletions torchvision/transforms/functional_pil.py
Expand Up @@ -422,3 +422,37 @@ def affine(img, matrix, resample=0, fillcolor=None):
output_size = img.size
opts = _parse_fill(fillcolor, img, '5.0.0')
return img.transform(output_size, Image.AFFINE, matrix, resample, **opts)


@torch.jit.unused
def rotate(img, angle, resample=0, expand=False, center=None, fill=None):
"""Rotate PIL image by angle.
Args:
img (PIL Image): image to be rotated.
angle (float or int): rotation angle value in degrees, counter-clockwise.
resample (``PIL.Image.NEAREST`` or ``PIL.Image.BILINEAR`` or ``PIL.Image.BICUBIC``, optional):
An optional resampling filter. See `filters`_ for more information.
If omitted, or if the image has mode "1" or "P", it is set to ``PIL.Image.NEAREST``.
expand (bool, optional): Optional expansion flag.
If true, expands the output image to make it large enough to hold the entire rotated image.
If false or omitted, make the output image the same size as the input image.
Note that the expand flag assumes rotation around the center and no translation.
center (2-tuple, optional): Optional center of rotation.
Origin is the upper left corner.
Default is the center of the image.
fill (n-tuple or int or float): Pixel fill value for area outside the rotated
image. If int or float, the value is used for all bands respectively.
Defaults to 0 for all bands. This option is only available for ``pillow>=5.2.0``.
Returns:
PIL Image: Rotated image.
.. _filters: https://pillow.readthedocs.io/en/latest/handbook/concepts.html#filters
"""
if not _is_pil_image(img):
raise TypeError("img should be PIL Image. Got {}".format(type(img)))

opts = _parse_fill(fill, img, '5.2.0')
return img.rotate(angle, resample, expand, center, **opts)

0 comments on commit 7666252

Please sign in to comment.