From 6ea5ef4671f332ac38c970a93e630ce01934bb26 Mon Sep 17 00:00:00 2001 From: Prabhat Roy Date: Tue, 28 Sep 2021 16:57:43 +0100 Subject: [PATCH 1/4] Added PILToTensor and ConvertImageDtype classes in reference scripts --- references/detection/presets.py | 9 ++++++--- references/detection/transforms.py | 18 ++++++++++++++++-- references/segmentation/presets.py | 6 ++++-- references/segmentation/transforms.py | 11 ++++++++--- 4 files changed, 34 insertions(+), 10 deletions(-) diff --git a/references/detection/presets.py b/references/detection/presets.py index 1fac69ae356..b8419deb553 100644 --- a/references/detection/presets.py +++ b/references/detection/presets.py @@ -6,7 +6,8 @@ def __init__(self, data_augmentation, hflip_prob=0.5, mean=(123., 117., 104.)): if data_augmentation == 'hflip': self.transforms = T.Compose([ T.RandomHorizontalFlip(p=hflip_prob), - T.ToTensor(), + T.PILToTensor(), + T.ConvertImageDtype(), ]) elif data_augmentation == 'ssd': self.transforms = T.Compose([ @@ -14,13 +15,15 @@ def __init__(self, data_augmentation, hflip_prob=0.5, mean=(123., 117., 104.)): T.RandomZoomOut(fill=list(mean)), T.RandomIoUCrop(), T.RandomHorizontalFlip(p=hflip_prob), - T.ToTensor(), + T.PILToTensor(), + T.ConvertImageDtype(), ]) elif data_augmentation == 'ssdlite': self.transforms = T.Compose([ T.RandomIoUCrop(), T.RandomHorizontalFlip(p=hflip_prob), - T.ToTensor(), + T.PILToTensor(), + T.ConvertImageDtype(), ]) else: raise ValueError(f'Unknown data augmentation policy "{data_augmentation}"') diff --git a/references/detection/transforms.py b/references/detection/transforms.py index 6659e82f01c..19932702c22 100644 --- a/references/detection/transforms.py +++ b/references/detection/transforms.py @@ -1,10 +1,10 @@ +from typing import List, Tuple, Dict, Optional + import torch import torchvision - from torch import nn, Tensor from torchvision.transforms import functional as F from torchvision.transforms import transforms as T -from typing import List, Tuple, Dict, Optional def _flip_coco_person_keypoints(kps, width): @@ -52,6 +52,20 @@ def forward(self, image: Tensor, return image, target +class PILToTensor(nn.Module): + def forward(self, image: Tensor, + target: Optional[Dict[str, Tensor]] = None) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]: + image = F.pil_to_tensor(image) + return image, target + + +class ConvertImageDtype(nn.Module): + def forward(self, image: Tensor, + target: Optional[Dict[str, Tensor]] = None) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]: + image = F.convert_image_dtype(image) + return image, target + + class RandomIoUCrop(nn.Module): def __init__(self, min_scale: float = 0.3, max_scale: float = 1.0, min_aspect_ratio: float = 0.5, max_aspect_ratio: float = 2.0, sampler_options: Optional[List[float]] = None, trials: int = 40): diff --git a/references/segmentation/presets.py b/references/segmentation/presets.py index 3bf29c23751..15f59b3526c 100644 --- a/references/segmentation/presets.py +++ b/references/segmentation/presets.py @@ -11,7 +11,8 @@ def __init__(self, base_size, crop_size, hflip_prob=0.5, mean=(0.485, 0.456, 0.4 trans.append(T.RandomHorizontalFlip(hflip_prob)) trans.extend([ T.RandomCrop(crop_size), - T.ToTensor(), + T.PILToTensor(), + T.ConvertImageDtype(), T.Normalize(mean=mean, std=std), ]) self.transforms = T.Compose(trans) @@ -24,7 +25,8 @@ class SegmentationPresetEval: def __init__(self, base_size, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)): self.transforms = T.Compose([ T.RandomResize(base_size, base_size), - T.ToTensor(), + T.PILToTensor(), + T.ConvertImageDtype(), T.Normalize(mean=mean, std=std), ]) diff --git a/references/segmentation/transforms.py b/references/segmentation/transforms.py index 943694d3a5c..51d7eed28ba 100644 --- a/references/segmentation/transforms.py +++ b/references/segmentation/transforms.py @@ -1,7 +1,6 @@ -import numpy as np -from PIL import Image import random +import numpy as np import torch from torchvision import transforms as T from torchvision.transforms import functional as F @@ -75,9 +74,15 @@ def __call__(self, image, target): return image, target -class ToTensor(object): +class PILToTensor: def __call__(self, image, target): image = F.pil_to_tensor(image) + target = torch.as_tensor(np.array(target), dtype=torch.int64) + return image, target + + +class ConvertImageDtype: + def __call__(self, image, target): image = F.convert_image_dtype(image) target = torch.as_tensor(np.array(target), dtype=torch.int64) return image, target From 14bdad711fcbe023bb20817f7b415621f2ce142e Mon Sep 17 00:00:00 2001 From: Prabhat Roy Date: Wed, 29 Sep 2021 23:21:38 +0100 Subject: [PATCH 2/4] Addressed review comments --- references/detection/presets.py | 8 +++++--- references/detection/transforms.py | 6 +++++- references/segmentation/presets.py | 6 ++++-- references/segmentation/transforms.py | 10 ++++++---- 4 files changed, 20 insertions(+), 10 deletions(-) diff --git a/references/detection/presets.py b/references/detection/presets.py index b8419deb553..04e0680043a 100644 --- a/references/detection/presets.py +++ b/references/detection/presets.py @@ -1,3 +1,5 @@ +import torch + import transforms as T @@ -7,7 +9,7 @@ def __init__(self, data_augmentation, hflip_prob=0.5, mean=(123., 117., 104.)): self.transforms = T.Compose([ T.RandomHorizontalFlip(p=hflip_prob), T.PILToTensor(), - T.ConvertImageDtype(), + T.ConvertImageDtype(torch.float), ]) elif data_augmentation == 'ssd': self.transforms = T.Compose([ @@ -16,14 +18,14 @@ def __init__(self, data_augmentation, hflip_prob=0.5, mean=(123., 117., 104.)): T.RandomIoUCrop(), T.RandomHorizontalFlip(p=hflip_prob), T.PILToTensor(), - T.ConvertImageDtype(), + T.ConvertImageDtype(torch.float), ]) elif data_augmentation == 'ssdlite': self.transforms = T.Compose([ T.RandomIoUCrop(), T.RandomHorizontalFlip(p=hflip_prob), T.PILToTensor(), - T.ConvertImageDtype(), + T.ConvertImageDtype(torch.float), ]) else: raise ValueError(f'Unknown data augmentation policy "{data_augmentation}"') diff --git a/references/detection/transforms.py b/references/detection/transforms.py index 19932702c22..c65535750b5 100644 --- a/references/detection/transforms.py +++ b/references/detection/transforms.py @@ -60,9 +60,13 @@ def forward(self, image: Tensor, class ConvertImageDtype(nn.Module): + def __init__(self, dtype: torch.dtype) -> None: + super().__init__() + self.dtype = dtype + def forward(self, image: Tensor, target: Optional[Dict[str, Tensor]] = None) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]: - image = F.convert_image_dtype(image) + image = F.convert_image_dtype(image, self.dtype) return image, target diff --git a/references/segmentation/presets.py b/references/segmentation/presets.py index 15f59b3526c..96334356fcb 100644 --- a/references/segmentation/presets.py +++ b/references/segmentation/presets.py @@ -1,3 +1,5 @@ +import torch + import transforms as T @@ -12,7 +14,7 @@ def __init__(self, base_size, crop_size, hflip_prob=0.5, mean=(0.485, 0.456, 0.4 trans.extend([ T.RandomCrop(crop_size), T.PILToTensor(), - T.ConvertImageDtype(), + T.ConvertImageDtype(torch.float), T.Normalize(mean=mean, std=std), ]) self.transforms = T.Compose(trans) @@ -26,7 +28,7 @@ def __init__(self, base_size, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.2 self.transforms = T.Compose([ T.RandomResize(base_size, base_size), T.PILToTensor(), - T.ConvertImageDtype(), + T.ConvertImageDtype(torch.float), T.Normalize(mean=mean, std=std), ]) diff --git a/references/segmentation/transforms.py b/references/segmentation/transforms.py index 51d7eed28ba..82dbb7007ee 100644 --- a/references/segmentation/transforms.py +++ b/references/segmentation/transforms.py @@ -1,6 +1,5 @@ import random -import numpy as np import torch from torchvision import transforms as T from torchvision.transforms import functional as F @@ -77,14 +76,17 @@ def __call__(self, image, target): class PILToTensor: def __call__(self, image, target): image = F.pil_to_tensor(image) - target = torch.as_tensor(np.array(target), dtype=torch.int64) + target = torch.as_tensor(target, dtype=torch.int64) return image, target class ConvertImageDtype: + def __init__(self, dtype): + self.dtype = dtype + def __call__(self, image, target): - image = F.convert_image_dtype(image) - target = torch.as_tensor(np.array(target), dtype=torch.int64) + image = F.convert_image_dtype(image, self.dtype) + target = torch.as_tensor(target, dtype=torch.int64) return image, target From cadfd116afb5a68cc7e6819f03ed479542f7c8a5 Mon Sep 17 00:00:00 2001 From: Prabhat Roy Date: Thu, 30 Sep 2021 00:23:05 +0100 Subject: [PATCH 3/4] Fixed TypeError --- references/segmentation/transforms.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/references/segmentation/transforms.py b/references/segmentation/transforms.py index 82dbb7007ee..21b8c512299 100644 --- a/references/segmentation/transforms.py +++ b/references/segmentation/transforms.py @@ -1,5 +1,6 @@ import random +import numpy as np import torch from torchvision import transforms as T from torchvision.transforms import functional as F @@ -76,7 +77,7 @@ def __call__(self, image, target): class PILToTensor: def __call__(self, image, target): image = F.pil_to_tensor(image) - target = torch.as_tensor(target, dtype=torch.int64) + target = torch.as_tensor(np.array(target), dtype=torch.int64) return image, target From 61ac7686cd9fc1130f714560bcd5418bde1f6d1c Mon Sep 17 00:00:00 2001 From: Prabhat Roy Date: Thu, 30 Sep 2021 11:37:51 +0100 Subject: [PATCH 4/4] Addressed review comment --- references/segmentation/transforms.py | 1 - 1 file changed, 1 deletion(-) diff --git a/references/segmentation/transforms.py b/references/segmentation/transforms.py index 21b8c512299..cf4846a1c27 100644 --- a/references/segmentation/transforms.py +++ b/references/segmentation/transforms.py @@ -87,7 +87,6 @@ def __init__(self, dtype): def __call__(self, image, target): image = F.convert_image_dtype(image, self.dtype) - target = torch.as_tensor(target, dtype=torch.int64) return image, target