From 3e26e7078cdf66184e8f012e4ed6321fec5cdbad Mon Sep 17 00:00:00 2001 From: Vasilis Vryniotis Date: Fri, 13 May 2022 10:39:54 +0100 Subject: [PATCH 1/4] Adding `__repr__` in presets --- torchvision/transforms/_presets.py | 83 +++++++++++++++++++++--------- 1 file changed, 59 insertions(+), 24 deletions(-) diff --git a/torchvision/transforms/_presets.py b/torchvision/transforms/_presets.py index 4d503f44cc5..09bcf52df09 100644 --- a/torchvision/transforms/_presets.py +++ b/torchvision/transforms/_presets.py @@ -25,6 +25,9 @@ def forward(self, img: Tensor) -> Tensor: img = F.pil_to_tensor(img) return F.convert_image_dtype(img, torch.float) + def __repr__(self) -> str: + return self.__class__.__name__ + "()" + class ImageClassification(nn.Module): def __init__( @@ -37,21 +40,31 @@ def __init__( interpolation: InterpolationMode = InterpolationMode.BILINEAR, ) -> None: super().__init__() - self._crop_size = [crop_size] - self._size = [resize_size] - self._mean = list(mean) - self._std = list(std) - self._interpolation = interpolation + self.crop_size = [crop_size] + self.resize_size = [resize_size] + self.mean = list(mean) + self.std = list(std) + self.interpolation = interpolation def forward(self, img: Tensor) -> Tensor: - img = F.resize(img, self._size, interpolation=self._interpolation) - img = F.center_crop(img, self._crop_size) + img = F.resize(img, self.resize_size, interpolation=self.interpolation) + img = F.center_crop(img, self.crop_size) if not isinstance(img, Tensor): img = F.pil_to_tensor(img) img = F.convert_image_dtype(img, torch.float) - img = F.normalize(img, mean=self._mean, std=self._std) + img = F.normalize(img, mean=self.mean, std=self.std) return img + def __repr__(self) -> str: + format_string = self.__class__.__name__ + "(" + format_string += f"\n crop_size={self.crop_size}" + format_string += f"\n resize_size={self.resize_size}" + format_string += f"\n mean={self.mean}" + format_string += f"\n std={self.std}" + format_string += f"\n interpolation={self.interpolation}" + format_string += "\n)" + return format_string + class VideoClassification(nn.Module): def __init__( @@ -64,11 +77,11 @@ def __init__( interpolation: InterpolationMode = InterpolationMode.BILINEAR, ) -> None: super().__init__() - self._crop_size = list(crop_size) - self._size = list(resize_size) - self._mean = list(mean) - self._std = list(std) - self._interpolation = interpolation + self.crop_size = list(crop_size) + self.resize_size = list(resize_size) + self.mean = list(mean) + self.std = list(std) + self.interpolation = interpolation def forward(self, vid: Tensor) -> Tensor: need_squeeze = False @@ -79,11 +92,11 @@ def forward(self, vid: Tensor) -> Tensor: vid = vid.permute(0, 1, 4, 2, 3) # (N, T, H, W, C) => (N, T, C, H, W) N, T, C, H, W = vid.shape vid = vid.view(-1, C, H, W) - vid = F.resize(vid, self._size, interpolation=self._interpolation) - vid = F.center_crop(vid, self._crop_size) + vid = F.resize(vid, self.resize_size, interpolation=self.interpolation) + vid = F.center_crop(vid, self.crop_size) vid = F.convert_image_dtype(vid, torch.float) - vid = F.normalize(vid, mean=self._mean, std=self._std) - H, W = self._crop_size + vid = F.normalize(vid, mean=self.mean, std=self.std) + H, W = self.crop_size vid = vid.view(N, T, C, H, W) vid = vid.permute(0, 2, 1, 3, 4) # (N, T, C, H, W) => (N, C, T, H, W) @@ -91,6 +104,16 @@ def forward(self, vid: Tensor) -> Tensor: vid = vid.squeeze(dim=0) return vid + def __repr__(self) -> str: + format_string = self.__class__.__name__ + "(" + format_string += f"\n crop_size={self.crop_size}" + format_string += f"\n resize_size={self.resize_size}" + format_string += f"\n mean={self.mean}" + format_string += f"\n std={self.std}" + format_string += f"\n interpolation={self.interpolation}" + format_string += "\n)" + return format_string + class SemanticSegmentation(nn.Module): def __init__( @@ -102,20 +125,29 @@ def __init__( interpolation: InterpolationMode = InterpolationMode.BILINEAR, ) -> None: super().__init__() - self._size = [resize_size] if resize_size is not None else None - self._mean = list(mean) - self._std = list(std) - self._interpolation = interpolation + self.resize_size = [resize_size] if resize_size is not None else None + self.mean = list(mean) + self.std = list(std) + self.interpolation = interpolation def forward(self, img: Tensor) -> Tensor: - if isinstance(self._size, list): - img = F.resize(img, self._size, interpolation=self._interpolation) + if isinstance(self.resize_size, list): + img = F.resize(img, self.resize_size, interpolation=self.interpolation) if not isinstance(img, Tensor): img = F.pil_to_tensor(img) img = F.convert_image_dtype(img, torch.float) - img = F.normalize(img, mean=self._mean, std=self._std) + img = F.normalize(img, mean=self.mean, std=self.std) return img + def __repr__(self) -> str: + format_string = self.__class__.__name__ + "(" + format_string += f"\n resize_size={self.resize_size}" + format_string += f"\n mean={self.mean}" + format_string += f"\n std={self.std}" + format_string += f"\n interpolation={self.interpolation}" + format_string += "\n)" + return format_string + class OpticalFlow(nn.Module): def forward(self, img1: Tensor, img2: Tensor) -> Tuple[Tensor, Tensor]: @@ -135,3 +167,6 @@ def forward(self, img1: Tensor, img2: Tensor) -> Tuple[Tensor, Tensor]: img2 = img2.contiguous() return img1, img2 + + def __repr__(self) -> str: + return self.__class__.__name__ + "()" From 9b9e4785ed04f3ba233949445250f05e210eb00e Mon Sep 17 00:00:00 2001 From: Vasilis Vryniotis Date: Fri, 13 May 2022 10:59:42 +0100 Subject: [PATCH 2/4] Adds `describe()` methods to all presets. --- torchvision/transforms/_presets.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/torchvision/transforms/_presets.py b/torchvision/transforms/_presets.py index 09bcf52df09..7eeeeb24417 100644 --- a/torchvision/transforms/_presets.py +++ b/torchvision/transforms/_presets.py @@ -28,6 +28,9 @@ def forward(self, img: Tensor) -> Tensor: def __repr__(self) -> str: return self.__class__.__name__ + "()" + def describe(self) -> str: + return "Rescales the values of the image to [0.0, 1.0]." + class ImageClassification(nn.Module): def __init__( @@ -65,6 +68,13 @@ def __repr__(self) -> str: format_string += "\n)" return format_string + def describe(self) -> str: + return ( + f"Resizes the image to resize_size={self.resize_size} using interpolation={self.interpolation}, " + f"followed by a central crop of crop_size={self.crop_size}. Then the values are rescaled to " + f"[0.0, 1.0] and normalized using mean={self.mean} and std={self.std}." + ) + class VideoClassification(nn.Module): def __init__( @@ -114,6 +124,13 @@ def __repr__(self) -> str: format_string += "\n)" return format_string + def describe(self) -> str: + return ( + f"Resizes the video frames to resize_size={self.resize_size} using interpolation={self.interpolation}, " + f"followed by a central crop of crop_size={self.crop_size}. Then the values are rescaled to " + f"[0.0, 1.0] and normalized using mean={self.mean} and std={self.std}." + ) + class SemanticSegmentation(nn.Module): def __init__( @@ -148,6 +165,12 @@ def __repr__(self) -> str: format_string += "\n)" return format_string + def describe(self) -> str: + return ( + f"Resizes the image to resize_size={self.resize_size} using interpolation={self.interpolation}. " + f"Then the values are rescaled to [0.0, 1.0] and normalized using mean={self.mean} and std={self.std}." + ) + class OpticalFlow(nn.Module): def forward(self, img1: Tensor, img2: Tensor) -> Tuple[Tensor, Tensor]: @@ -170,3 +193,6 @@ def forward(self, img1: Tensor, img2: Tensor) -> Tuple[Tensor, Tensor]: def __repr__(self) -> str: return self.__class__.__name__ + "()" + + def describe(self) -> str: + return "Rescales the values of the images to [-1.0, 1.0]." From 812a7048d13261cb1beac0d37d4775e9e7f03f10 Mon Sep 17 00:00:00 2001 From: Vasilis Vryniotis Date: Fri, 13 May 2022 11:30:12 +0100 Subject: [PATCH 3/4] Adding transform descriptions in the documentation. --- docs/source/conf.py | 5 +++++ torchvision/transforms/_presets.py | 20 ++++++++++---------- 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/docs/source/conf.py b/docs/source/conf.py index 258bbf6b5f2..946ca5e699d 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -366,6 +366,11 @@ def inject_weight_metadata(app, what, name, obj, options, lines): lines += [".. table::", ""] lines += textwrap.indent(table, " " * 4).split("\n") lines.append("") + lines.append( + f"The preprocessing transforms are available at ``{str(field)}.transforms`` and " + f"perform the following operations: {field.transforms().describe()}" + ) + lines.append("") def generate_weights_table(module, table_name, metrics, include_patterns=None, exclude_patterns=None): diff --git a/torchvision/transforms/_presets.py b/torchvision/transforms/_presets.py index 7eeeeb24417..b009d45f1a4 100644 --- a/torchvision/transforms/_presets.py +++ b/torchvision/transforms/_presets.py @@ -29,7 +29,7 @@ def __repr__(self) -> str: return self.__class__.__name__ + "()" def describe(self) -> str: - return "Rescales the values of the image to [0.0, 1.0]." + return "The images are rescaled to ``[0.0, 1.0]``." class ImageClassification(nn.Module): @@ -70,9 +70,9 @@ def __repr__(self) -> str: def describe(self) -> str: return ( - f"Resizes the image to resize_size={self.resize_size} using interpolation={self.interpolation}, " - f"followed by a central crop of crop_size={self.crop_size}. Then the values are rescaled to " - f"[0.0, 1.0] and normalized using mean={self.mean} and std={self.std}." + f"The images are resized to ``resize_size={self.resize_size}`` using ``interpolation={self.interpolation}``, " + f"followed by a central crop of ``crop_size={self.crop_size}``. Then the values are rescaled to " + f"``[0.0, 1.0]`` and normalized using ``mean={self.mean}`` and ``std={self.std}``." ) @@ -126,9 +126,9 @@ def __repr__(self) -> str: def describe(self) -> str: return ( - f"Resizes the video frames to resize_size={self.resize_size} using interpolation={self.interpolation}, " - f"followed by a central crop of crop_size={self.crop_size}. Then the values are rescaled to " - f"[0.0, 1.0] and normalized using mean={self.mean} and std={self.std}." + f"The video frames are resized to ``resize_size={self.resize_size}`` using ``interpolation={self.interpolation}``, " + f"followed by a central crop of ``crop_size={self.crop_size}``. Then the values are rescaled to " + f"``[0.0, 1.0]`` and normalized using ``mean={self.mean}`` and ``std={self.std}``." ) @@ -167,8 +167,8 @@ def __repr__(self) -> str: def describe(self) -> str: return ( - f"Resizes the image to resize_size={self.resize_size} using interpolation={self.interpolation}. " - f"Then the values are rescaled to [0.0, 1.0] and normalized using mean={self.mean} and std={self.std}." + f"The images are resized to ``resize_size={self.resize_size}`` using ``interpolation={self.interpolation}``. " + f"Then the values are rescaled to ``[0.0, 1.0]`` and normalized using ``mean={self.mean}`` and ``std={self.std}``." ) @@ -195,4 +195,4 @@ def __repr__(self) -> str: return self.__class__.__name__ + "()" def describe(self) -> str: - return "Rescales the values of the images to [-1.0, 1.0]." + return "The images are rescaled to ``[-1.0, 1.0]``." From 7106810b57c545762850600a109931393d363600 Mon Sep 17 00:00:00 2001 From: Vasilis Vryniotis Date: Fri, 13 May 2022 11:40:09 +0100 Subject: [PATCH 4/4] Change "preprocessing" to "inference" --- docs/source/conf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/conf.py b/docs/source/conf.py index 946ca5e699d..b0fe63cb288 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -367,7 +367,7 @@ def inject_weight_metadata(app, what, name, obj, options, lines): lines += textwrap.indent(table, " " * 4).split("\n") lines.append("") lines.append( - f"The preprocessing transforms are available at ``{str(field)}.transforms`` and " + f"The inference transforms are available at ``{str(field)}.transforms`` and " f"perform the following operations: {field.transforms().describe()}" ) lines.append("")