From f49b4d28b1e607d772a3f9ed77a90f7f52d22f85 Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Fri, 23 Sep 2022 21:43:53 +0200 Subject: [PATCH 01/10] refactor RandomCrop --- torchvision/prototype/transforms/_geometry.py | 109 ++++++++++-------- 1 file changed, 63 insertions(+), 46 deletions(-) diff --git a/torchvision/prototype/transforms/_geometry.py b/torchvision/prototype/transforms/_geometry.py index babcb83af04..6c96aaf0a74 100644 --- a/torchvision/prototype/transforms/_geometry.py +++ b/torchvision/prototype/transforms/_geometry.py @@ -414,75 +414,92 @@ def __init__( _check_padding_mode_arg(padding_mode) self.padding = padding + self._parsed_padding = F._geometry._parse_pad_padding(padding) if padding else None self.pad_if_needed = pad_if_needed self.fill = _setup_fill_arg(fill) self.padding_mode = padding_mode def _get_params(self, sample: Any) -> Dict[str, Any]: - _, height, width = query_chw(sample) + _, padded_height, padded_width = query_chw(sample) if self.padding is not None: - # update height, width with static padding data - padding = self.padding - if isinstance(padding, Sequence): - padding = list(padding) - pad_left, pad_right, pad_top, pad_bottom = F._geometry._parse_pad_padding(padding) - height += pad_top + pad_bottom - width += pad_left + pad_right - - output_height, output_width = self.size - # We have to store maybe padded image size for pad_if_needed branch in _transform - input_height, input_width = height, width + pad_left, pad_right, pad_top, pad_bottom = self._parsed_padding + padded_height += pad_top + pad_bottom + padded_width += pad_left + pad_right + else: + pad_left = pad_right = pad_top = pad_bottom = 0 + + cropped_height, cropped_width = self.size + height_diff = padded_height - cropped_height + width_diff = padded_width - cropped_width if self.pad_if_needed: - # pad width if needed - if width < output_width: - width += 2 * (output_width - width) - # pad height if needed - if height < output_height: - height += 2 * (output_height - height) - - if height < output_height or width < output_width: + if height_diff < 0: + # This is a micro optimization that avoids some duplicate computation down the line. + # + # Here, we want to pad the top and bottom by the difference of the `cropped_height` (output) and the + # `padded_height` (input). Since the definition of `height_diff` is inverted we check for a negative + # value. + # + # Since we are padding the height by exactly twice the difference, the resulting `height_diff`, if we + # were to compute it again after the padding, is its exact inverse: + # + # new_height_diff = new_padded_height - cropped_height + # = old_padded_height + 2 * (cropped_height - old_padded_height) - cropped_height + # = - old_padded_height + cropped_height + # = -(old_padded_height - cropped_height) + # = - old_height_diff + # + # Without this trick, all `+=` would need to be changed to `-=` and the re-definition of `height_diff` + # would need to moved below the re-definition of `padded_height` and be: + # + # height_diff = padded_height - cropped_height + # + # Although this only replaces one subtraction with the unary `-` in this branch, this trick also avoids + # re-defining `height_diff` anywhere else. + height_diff = -height_diff + + pad_top += height_diff + pad_bottom += height_diff + padded_height += 2 * height_diff + + if width_diff < 0: + # Same as above by substituting 'width' for 'height'. + width_diff = -width_diff + + pad_left += width_diff + pad_right += width_diff + padded_width += 2 * width_diff + + if height_diff < 0 or width_diff < 0: raise ValueError( - f"Required crop size {(output_height, output_width)} is larger then input image size {(height, width)}" + f"Required crop size {(cropped_height, cropped_width)} is larger then " + f"{'padded ' if self.padding is not None else ''}input image size {(padded_height, cropped_width)}." ) - if width == output_width and height == output_height: - return dict(top=0, left=0, height=height, width=width, input_width=input_width, input_height=input_height) + # We need a different order here than we have in self._parsed_padding, since this padding will be parsed again + # in self._transform() + padding = [pad_left, pad_top, pad_right, pad_bottom] + needs_pad = any(padding) - top = torch.randint(0, height - output_height + 1, size=(1,)).item() - left = torch.randint(0, width - output_width + 1, size=(1,)).item() + top = int(torch.randint(0, height_diff + 1, size=())) if height_diff > 0 else 0 + left = int(torch.randint(0, width_diff + 1, size=())) if width_diff > 0 else 0 return dict( top=top, left=left, - height=output_height, - width=output_width, - input_width=input_width, - input_height=input_height, + height=cropped_height, + width=cropped_width, + needs_pad=needs_pad, + padding=padding, ) def _transform(self, inpt: Any, params: Dict[str, Any]) -> Any: - # TODO: (PERF) check for speed optimization if we avoid repeated pad calls fill = self.fill[type(inpt)] fill = F._geometry._convert_fill_arg(fill) - if self.padding is not None: - # This cast does Sequence[int] -> List[int] and is required to make mypy happy - padding = self.padding - if not isinstance(padding, int): - padding = list(padding) - - inpt = F.pad(inpt, padding=padding, fill=fill, padding_mode=self.padding_mode) - - if self.pad_if_needed: - input_width, input_height = params["input_width"], params["input_height"] - if input_width < self.size[1]: - padding = [self.size[1] - input_width, 0] - inpt = F.pad(inpt, padding=padding, fill=fill, padding_mode=self.padding_mode) - if input_height < self.size[0]: - padding = [0, self.size[0] - input_height] - inpt = F.pad(inpt, padding=padding, fill=fill, padding_mode=self.padding_mode) + if params["needs_pad"]: + inpt = F.pad(inpt, padding=params["padding"], fill=fill, padding_mode=self.padding_mode) return F.crop(inpt, top=params["top"], left=params["left"], height=params["height"], width=params["width"]) From a684ac71fd7d83609baa62ee279cc7fa601549af Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Fri, 23 Sep 2022 22:09:26 +0200 Subject: [PATCH 02/10] mypy --- torchvision/prototype/transforms/_geometry.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/torchvision/prototype/transforms/_geometry.py b/torchvision/prototype/transforms/_geometry.py index 6c96aaf0a74..e232e677d7d 100644 --- a/torchvision/prototype/transforms/_geometry.py +++ b/torchvision/prototype/transforms/_geometry.py @@ -414,7 +414,7 @@ def __init__( _check_padding_mode_arg(padding_mode) self.padding = padding - self._parsed_padding = F._geometry._parse_pad_padding(padding) if padding else None + self._parsed_padding = F._geometry._parse_pad_padding(padding) if padding else None # type: ignore[arg-type] self.pad_if_needed = pad_if_needed self.fill = _setup_fill_arg(fill) self.padding_mode = padding_mode @@ -422,7 +422,7 @@ def __init__( def _get_params(self, sample: Any) -> Dict[str, Any]: _, padded_height, padded_width = query_chw(sample) - if self.padding is not None: + if self._parsed_padding is not None: pad_left, pad_right, pad_top, pad_bottom = self._parsed_padding padded_height += pad_top + pad_bottom padded_width += pad_left + pad_right From 80dd0be282bc52a458ae74be9e3ed085d7c19e5a Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Fri, 23 Sep 2022 22:44:10 +0200 Subject: [PATCH 03/10] fix test --- test/test_prototype_transforms.py | 32 +++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/test/test_prototype_transforms.py b/test/test_prototype_transforms.py index b9c89b2b76a..2d324bb22f5 100644 --- a/test/test_prototype_transforms.py +++ b/test/test_prototype_transforms.py @@ -715,30 +715,38 @@ def test__get_params(self, padding, pad_if_needed, size, mocker): if padding is not None: if isinstance(padding, int): - h += 2 * padding - w += 2 * padding + pad_top = pad_bottom = pad_left = pad_right = padding elif isinstance(padding, list) and len(padding) == 2: - w += 2 * padding[0] - h += 2 * padding[1] + pad_left = pad_right = padding[0] + pad_top = pad_bottom = padding[1] elif isinstance(padding, list) and len(padding) == 4: - w += padding[0] + padding[2] - h += padding[1] + padding[3] + pad_left, pad_top, pad_right, pad_bottom = padding - expected_input_width = w - expected_input_height = h + h += pad_top + pad_bottom + w += pad_left + pad_right + else: + pad_left = pad_right = pad_top = pad_bottom = 0 if pad_if_needed: if w < size[1]: - w += 2 * (size[1] - w) + diff = size[1] - w + pad_left += diff + pad_right += diff + w += 2 * diff if h < size[0]: - h += 2 * (size[0] - h) + diff = size[0] - h + pad_top += diff + pad_bottom += diff + h += 2 * diff + + padding = [pad_left, pad_top, pad_right, pad_bottom] assert 0 <= params["top"] <= h - size[0] + 1 assert 0 <= params["left"] <= w - size[1] + 1 assert params["height"] == size[0] assert params["width"] == size[1] - assert params["input_width"] == expected_input_width - assert params["input_height"] == expected_input_height + assert params["needs_pad"] is any(padding) + assert params["padding"] == padding @pytest.mark.parametrize("padding", [None, 1, [2, 3], [1, 2, 3, 4]]) @pytest.mark.parametrize("pad_if_needed", [False, True]) From 64014dc7900abb0dcce0c6b10119420c19b44ccc Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Tue, 27 Sep 2022 11:35:21 +0200 Subject: [PATCH 04/10] use padding directly rather than private attribute --- torchvision/prototype/transforms/_geometry.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/torchvision/prototype/transforms/_geometry.py b/torchvision/prototype/transforms/_geometry.py index e232e677d7d..54fb0e4b5cf 100644 --- a/torchvision/prototype/transforms/_geometry.py +++ b/torchvision/prototype/transforms/_geometry.py @@ -413,8 +413,7 @@ def __init__( _check_padding_arg(padding) _check_padding_mode_arg(padding_mode) - self.padding = padding - self._parsed_padding = F._geometry._parse_pad_padding(padding) if padding else None # type: ignore[arg-type] + self.padding = F._geometry._parse_pad_padding(padding) if padding else None # type: ignore[arg-type] self.pad_if_needed = pad_if_needed self.fill = _setup_fill_arg(fill) self.padding_mode = padding_mode @@ -422,8 +421,8 @@ def __init__( def _get_params(self, sample: Any) -> Dict[str, Any]: _, padded_height, padded_width = query_chw(sample) - if self._parsed_padding is not None: - pad_left, pad_right, pad_top, pad_bottom = self._parsed_padding + if self.padding is not None: + pad_left, pad_right, pad_top, pad_bottom = self.padding padded_height += pad_top + pad_bottom padded_width += pad_left + pad_right else: From b7c8dc43c1fd03a297a1e7c86f9a6acb1239faf3 Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Tue, 27 Sep 2022 11:35:58 +0200 Subject: [PATCH 05/10] only compute type specific fill if padding is needed --- torchvision/prototype/transforms/_geometry.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/torchvision/prototype/transforms/_geometry.py b/torchvision/prototype/transforms/_geometry.py index 54fb0e4b5cf..29dac308677 100644 --- a/torchvision/prototype/transforms/_geometry.py +++ b/torchvision/prototype/transforms/_geometry.py @@ -494,10 +494,10 @@ def _get_params(self, sample: Any) -> Dict[str, Any]: ) def _transform(self, inpt: Any, params: Dict[str, Any]) -> Any: - fill = self.fill[type(inpt)] - fill = F._geometry._convert_fill_arg(fill) - if params["needs_pad"]: + fill = self.fill[type(inpt)] + fill = F._geometry._convert_fill_arg(fill) + inpt = F.pad(inpt, padding=params["padding"], fill=fill, padding_mode=self.padding_mode) return F.crop(inpt, top=params["top"], left=params["left"], height=params["height"], width=params["width"]) From 5aff3b759078228ef79e094e54650efcff8772c2 Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Tue, 27 Sep 2022 16:02:22 +0200 Subject: [PATCH 06/10] [DRAFT] don't use the diff trick --- torchvision/prototype/transforms/_geometry.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/torchvision/prototype/transforms/_geometry.py b/torchvision/prototype/transforms/_geometry.py index 29dac308677..64ed84f1b3f 100644 --- a/torchvision/prototype/transforms/_geometry.py +++ b/torchvision/prototype/transforms/_geometry.py @@ -429,11 +429,10 @@ def _get_params(self, sample: Any) -> Dict[str, Any]: pad_left = pad_right = pad_top = pad_bottom = 0 cropped_height, cropped_width = self.size - height_diff = padded_height - cropped_height - width_diff = padded_width - cropped_width if self.pad_if_needed: - if height_diff < 0: + height_diff = cropped_height - padded_height + if height_diff > 0: # This is a micro optimization that avoids some duplicate computation down the line. # # Here, we want to pad the top and bottom by the difference of the `cropped_height` (output) and the @@ -456,19 +455,23 @@ def _get_params(self, sample: Any) -> Dict[str, Any]: # # Although this only replaces one subtraction with the unary `-` in this branch, this trick also avoids # re-defining `height_diff` anywhere else. - height_diff = -height_diff - pad_top += height_diff pad_bottom += height_diff padded_height += 2 * height_diff + else: + height_diff = padded_height - cropped_height - if width_diff < 0: + width_diff = cropped_width - padded_width + if width_diff > 0: # Same as above by substituting 'width' for 'height'. - width_diff = -width_diff - pad_left += width_diff pad_right += width_diff padded_width += 2 * width_diff + else: + width_diff = padded_width - cropped_width + else: + height_diff = padded_height - cropped_height + width_diff = padded_width - cropped_width if height_diff < 0 or width_diff < 0: raise ValueError( From f8a841eb32e85fa30065125b4bdb1f266571f169 Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Tue, 27 Sep 2022 16:06:23 +0200 Subject: [PATCH 07/10] fix error message Co-authored-by: vfdev --- torchvision/prototype/transforms/_geometry.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/torchvision/prototype/transforms/_geometry.py b/torchvision/prototype/transforms/_geometry.py index 64ed84f1b3f..df6a0ea902c 100644 --- a/torchvision/prototype/transforms/_geometry.py +++ b/torchvision/prototype/transforms/_geometry.py @@ -476,7 +476,7 @@ def _get_params(self, sample: Any) -> Dict[str, Any]: if height_diff < 0 or width_diff < 0: raise ValueError( f"Required crop size {(cropped_height, cropped_width)} is larger then " - f"{'padded ' if self.padding is not None else ''}input image size {(padded_height, cropped_width)}." + f"{'padded ' if self.padding is not None else ''}input image size {(padded_height, padded_width)}." ) # We need a different order here than we have in self._parsed_padding, since this padding will be parsed again From 3d2fc44c1e1361811701c8218f2deec8c879eeb1 Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Tue, 27 Sep 2022 16:56:46 +0200 Subject: [PATCH 08/10] remove height and width diff --- torchvision/prototype/transforms/_geometry.py | 66 ++++++------------- 1 file changed, 21 insertions(+), 45 deletions(-) diff --git a/torchvision/prototype/transforms/_geometry.py b/torchvision/prototype/transforms/_geometry.py index 64ed84f1b3f..4ae0df1698b 100644 --- a/torchvision/prototype/transforms/_geometry.py +++ b/torchvision/prototype/transforms/_geometry.py @@ -431,51 +431,23 @@ def _get_params(self, sample: Any) -> Dict[str, Any]: cropped_height, cropped_width = self.size if self.pad_if_needed: - height_diff = cropped_height - padded_height - if height_diff > 0: - # This is a micro optimization that avoids some duplicate computation down the line. - # - # Here, we want to pad the top and bottom by the difference of the `cropped_height` (output) and the - # `padded_height` (input). Since the definition of `height_diff` is inverted we check for a negative - # value. - # - # Since we are padding the height by exactly twice the difference, the resulting `height_diff`, if we - # were to compute it again after the padding, is its exact inverse: - # - # new_height_diff = new_padded_height - cropped_height - # = old_padded_height + 2 * (cropped_height - old_padded_height) - cropped_height - # = - old_padded_height + cropped_height - # = -(old_padded_height - cropped_height) - # = - old_height_diff - # - # Without this trick, all `+=` would need to be changed to `-=` and the re-definition of `height_diff` - # would need to moved below the re-definition of `padded_height` and be: - # - # height_diff = padded_height - cropped_height - # - # Although this only replaces one subtraction with the unary `-` in this branch, this trick also avoids - # re-defining `height_diff` anywhere else. - pad_top += height_diff - pad_bottom += height_diff - padded_height += 2 * height_diff - else: - height_diff = padded_height - cropped_height - - width_diff = cropped_width - padded_width - if width_diff > 0: - # Same as above by substituting 'width' for 'height'. - pad_left += width_diff - pad_right += width_diff - padded_width += 2 * width_diff - else: - width_diff = padded_width - cropped_width - else: - height_diff = padded_height - cropped_height - width_diff = padded_width - cropped_width + if padded_height < cropped_height: + diff = cropped_height - padded_height + + pad_top += diff + pad_bottom += diff + padded_height += 2 * diff + + if padded_width < cropped_width: + diff = cropped_width - padded_width - if height_diff < 0 or width_diff < 0: + pad_left += diff + pad_right += diff + padded_width += 2 * diff + + if padded_height < cropped_height or padded_width < cropped_width: raise ValueError( - f"Required crop size {(cropped_height, cropped_width)} is larger then " + f"Required crop size {(cropped_height, cropped_width)} is larger then the " f"{'padded ' if self.padding is not None else ''}input image size {(padded_height, cropped_width)}." ) @@ -484,8 +456,12 @@ def _get_params(self, sample: Any) -> Dict[str, Any]: padding = [pad_left, pad_top, pad_right, pad_bottom] needs_pad = any(padding) - top = int(torch.randint(0, height_diff + 1, size=())) if height_diff > 0 else 0 - left = int(torch.randint(0, width_diff + 1, size=())) if width_diff > 0 else 0 + top = ( + int(torch.randint(0, padded_height - cropped_height + 1, size=())) if padded_height > cropped_height else 0 + ) + left = ( + int(torch.randint(0, padded_width - cropped_width + 1, size=())) if padded_width > cropped_width > 0 else 0 + ) return dict( top=top, From 749b2c966e40e78bcffc76e5c963e6812bb33936 Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Fri, 30 Sep 2022 14:16:46 +0200 Subject: [PATCH 09/10] reinstate separate diff checking --- test/test_prototype_transforms_consistency.py | 2 +- torchvision/prototype/transforms/_geometry.py | 9 ++++----- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/test/test_prototype_transforms_consistency.py b/test/test_prototype_transforms_consistency.py index 9e2e3051189..c8debe1e293 100644 --- a/test/test_prototype_transforms_consistency.py +++ b/test/test_prototype_transforms_consistency.py @@ -966,7 +966,7 @@ def _transform(self, inpt, params): class TestRefSegTransforms: def make_datapoints(self, supports_pil=True, image_dtype=torch.uint8): - size = (256, 640) + size = (256, 460) num_categories = 21 conv_fns = [] diff --git a/torchvision/prototype/transforms/_geometry.py b/torchvision/prototype/transforms/_geometry.py index 1b19766f36e..b88d4252aa1 100644 --- a/torchvision/prototype/transforms/_geometry.py +++ b/torchvision/prototype/transforms/_geometry.py @@ -456,11 +456,10 @@ def _get_params(self, sample: Any) -> Dict[str, Any]: padding = [pad_left, pad_top, pad_right, pad_bottom] needs_pad = any(padding) - if padded_height > cropped_height or padded_width > cropped_width: - top = int(torch.randint(0, padded_height - cropped_height + 1, size=())) - left = int(torch.randint(0, padded_width - cropped_width + 1, size=())) - else: - top = left = 0 + top = ( + int(torch.randint(0, padded_height - cropped_height + 1, size=())) if padded_height > cropped_height else 0 + ) + left = int(torch.randint(0, padded_width - cropped_width + 1, size=())) if padded_width > cropped_width else 0 return dict( top=top, From a63df01eda2370f931a6f0be909b51bdbd9dc9d4 Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Mon, 3 Oct 2022 07:52:58 +0200 Subject: [PATCH 10/10] introduce needs_crop flag --- torchvision/prototype/transforms/_geometry.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/torchvision/prototype/transforms/_geometry.py b/torchvision/prototype/transforms/_geometry.py index b88d4252aa1..008d4d195cb 100644 --- a/torchvision/prototype/transforms/_geometry.py +++ b/torchvision/prototype/transforms/_geometry.py @@ -456,12 +456,19 @@ def _get_params(self, sample: Any) -> Dict[str, Any]: padding = [pad_left, pad_top, pad_right, pad_bottom] needs_pad = any(padding) - top = ( - int(torch.randint(0, padded_height - cropped_height + 1, size=())) if padded_height > cropped_height else 0 + needs_vert_crop, top = ( + (True, int(torch.randint(0, padded_height - cropped_height + 1, size=()))) + if padded_height > cropped_height + else (False, 0) + ) + needs_horz_crop, left = ( + (True, int(torch.randint(0, padded_width - cropped_width + 1, size=()))) + if padded_width > cropped_width + else (False, 0) ) - left = int(torch.randint(0, padded_width - cropped_width + 1, size=())) if padded_width > cropped_width else 0 return dict( + needs_crop=needs_vert_crop or needs_horz_crop, top=top, left=left, height=cropped_height, @@ -477,7 +484,10 @@ def _transform(self, inpt: Any, params: Dict[str, Any]) -> Any: inpt = F.pad(inpt, padding=params["padding"], fill=fill, padding_mode=self.padding_mode) - return F.crop(inpt, top=params["top"], left=params["left"], height=params["height"], width=params["width"]) + if params["needs_crop"]: + inpt = F.crop(inpt, top=params["top"], left=params["left"], height=params["height"], width=params["width"]) + + return inpt class RandomPerspective(_RandomApplyTransform):