pytorch · datumbox · Sep 23, 2022 · Sep 22, 2022 · Sep 22, 2022 · Sep 22, 2022
diff --git a/test/test_prototype_transforms.py b/test/test_prototype_transforms.py
@@ -132,7 +132,7 @@ def test_mixup_cutmix(self, transform, input):
         transform(input_copy)
 
         # Check if we raise an error if sample contains bbox or mask or label
-        err_msg = "does not support bounding boxes, masks and plain labels"
+        err_msg = "does not support PIL images, bounding boxes, masks and plain labels"
         input_copy = dict(input)
         for unsup_data in [
             make_label(),

diff --git a/torchvision/prototype/features/__init__.py b/torchvision/prototype/features/__init__.py
@@ -1,6 +1,15 @@
 from ._bounding_box import BoundingBox, BoundingBoxFormat
 from ._encoded import EncodedData, EncodedImage, EncodedVideo
-from ._feature import _Feature, DType, is_simple_tensor
-from ._image import ColorSpace, Image, ImageType
+from ._feature import _Feature, FillType, FillTypeJIT, InputType, InputTypeJIT, is_simple_tensor
+from ._image import (
+    ColorSpace,
+    Image,
+    ImageType,
+    ImageTypeJIT,
+    LegacyImageType,
+    LegacyImageTypeJIT,
+    TensorImageType,
+    TensorImageTypeJIT,
+)
 from ._label import Label, OneHotLabel
 from ._mask import Mask
diff --git a/torchvision/prototype/features/_bounding_box.py b/torchvision/prototype/features/_bounding_box.py
@@ -6,7 +6,7 @@
 from torchvision._utils import StrEnum
 from torchvision.transforms import InterpolationMode  # TODO: this needs to be moved out of transforms
 
-from ._feature import _Feature
+from ._feature import _Feature, FillTypeJIT
 
 
 class BoundingBoxFormat(StrEnum):
@@ -115,7 +115,7 @@ def resized_crop(
     def pad(
         self,
         padding: Union[int, Sequence[int]],
-        fill: Optional[Union[int, float, List[float]]] = None,
+        fill: FillTypeJIT = None,
         padding_mode: str = "constant",
     ) -> BoundingBox:
         # This cast does Sequence[int] -> List[int] and is required to make mypy happy
@@ -137,7 +137,7 @@ def rotate(
         angle: float,
         interpolation: InterpolationMode = InterpolationMode.NEAREST,
         expand: bool = False,
-        fill: Optional[Union[int, float, List[float]]] = None,
+        fill: FillTypeJIT = None,
         center: Optional[List[float]] = None,
     ) -> BoundingBox:
         output = self._F.rotate_bounding_box(
@@ -165,7 +165,7 @@ def affine(
         scale: float,
         shear: List[float],
         interpolation: InterpolationMode = InterpolationMode.NEAREST,
-        fill: Optional[Union[int, float, List[float]]] = None,
+        fill: FillTypeJIT = None,
         center: Optional[List[float]] = None,
     ) -> BoundingBox:
         output = self._F.affine_bounding_box(
@@ -184,7 +184,7 @@ def perspective(
         self,
         perspective_coeffs: List[float],
         interpolation: InterpolationMode = InterpolationMode.BILINEAR,
-        fill: Optional[Union[int, float, List[float]]] = None,
+        fill: FillTypeJIT = None,
     ) -> BoundingBox:
         output = self._F.perspective_bounding_box(self, self.format, perspective_coeffs)
         return BoundingBox.new_like(self, output, dtype=output.dtype)
@@ -193,7 +193,7 @@ def elastic(
         self,
         displacement: torch.Tensor,
         interpolation: InterpolationMode = InterpolationMode.BILINEAR,
-        fill: Optional[Union[int, float, List[float]]] = None,
+        fill: FillTypeJIT = None,
     ) -> BoundingBox:
         output = self._F.elastic_bounding_box(self, self.format, displacement)
         return BoundingBox.new_like(self, output, dtype=output.dtype)
diff --git a/torchvision/prototype/features/_feature.py b/torchvision/prototype/features/_feature.py
@@ -3,16 +3,14 @@
 from types import ModuleType
 from typing import Any, Callable, cast, List, Mapping, Optional, Sequence, Tuple, Type, TypeVar, Union
 
+import PIL.Image
 import torch
 from torch._C import _TensorBase, DisableTorchFunction
 from torchvision.transforms import InterpolationMode
 
 F = TypeVar("F", bound="_Feature")
-
-
-# Due to torch.jit.script limitation we keep DType as torch.Tensor
-# instead of Union[torch.Tensor, PIL.Image.Image, features._Feature]
-DType = torch.Tensor
+FillType = Union[int, float, Sequence[int], Sequence[float], None]
+FillTypeJIT = Union[int, float, List[float], None]
 
 
 def is_simple_tensor(inpt: Any) -> bool:
@@ -154,7 +152,7 @@ def resized_crop(
     def pad(
         self,
         padding: Union[int, List[int]],
-        fill: Optional[Union[int, float, List[float]]] = None,
+        fill: FillTypeJIT = None,
         padding_mode: str = "constant",
     ) -> _Feature:
         return self
@@ -164,7 +162,7 @@ def rotate(
         angle: float,
         interpolation: InterpolationMode = InterpolationMode.NEAREST,
         expand: bool = False,
-        fill: Optional[Union[int, float, List[float]]] = None,
+        fill: FillTypeJIT = None,
         center: Optional[List[float]] = None,
     ) -> _Feature:
         return self
@@ -176,7 +174,7 @@ def affine(
         scale: float,
         shear: List[float],
         interpolation: InterpolationMode = InterpolationMode.NEAREST,
-        fill: Optional[Union[int, float, List[float]]] = None,
+        fill: FillTypeJIT = None,
         center: Optional[List[float]] = None,
     ) -> _Feature:
         return self
@@ -185,15 +183,15 @@ def perspective(
         self,
         perspective_coeffs: List[float],
         interpolation: InterpolationMode = InterpolationMode.BILINEAR,
-        fill: Optional[Union[int, float, List[float]]] = None,
+        fill: FillTypeJIT = None,
     ) -> _Feature:
         return self
 
     def elastic(
         self,
         displacement: torch.Tensor,
         interpolation: InterpolationMode = InterpolationMode.BILINEAR,
-        fill: Optional[Union[int, float, List[float]]] = None,
+        fill: FillTypeJIT = None,
     ) -> _Feature:
         return self
 
@@ -232,3 +230,7 @@ def invert(self) -> _Feature:
 
     def gaussian_blur(self, kernel_size: List[int], sigma: Optional[List[float]] = None) -> _Feature:
         return self
+
+
+InputType = Union[torch.Tensor, PIL.Image.Image, _Feature]
+InputTypeJIT = torch.Tensor
diff --git a/torchvision/prototype/features/_image.py b/torchvision/prototype/features/_image.py
@@ -3,18 +3,14 @@
 import warnings
 from typing import Any, cast, List, Optional, Tuple, Union
 
+import PIL.Image
 import torch
 from torchvision._utils import StrEnum
 from torchvision.transforms.functional import InterpolationMode, to_pil_image
 from torchvision.utils import draw_bounding_boxes, make_grid
 
 from ._bounding_box import BoundingBox
-from ._feature import _Feature
-
-
-# Due to torch.jit.script limitation we keep ImageType as torch.Tensor
-# instead of Union[torch.Tensor, PIL.Image.Image, features.Image]
-ImageType = torch.Tensor
+from ._feature import _Feature, FillTypeJIT
 
 
 class ColorSpace(StrEnum):
@@ -181,7 +177,7 @@ def resized_crop(
     def pad(
         self,
         padding: Union[int, List[int]],
-        fill: Optional[Union[int, float, List[float]]] = None,
+        fill: FillTypeJIT = None,
         padding_mode: str = "constant",
     ) -> Image:
         output = self._F.pad_image_tensor(self, padding, fill=fill, padding_mode=padding_mode)
@@ -192,7 +188,7 @@ def rotate(
         angle: float,
         interpolation: InterpolationMode = InterpolationMode.NEAREST,
         expand: bool = False,
-        fill: Optional[Union[int, float, List[float]]] = None,
+        fill: FillTypeJIT = None,
         center: Optional[List[float]] = None,
     ) -> Image:
         output = self._F._geometry.rotate_image_tensor(
@@ -207,7 +203,7 @@ def affine(
         scale: float,
         shear: List[float],
         interpolation: InterpolationMode = InterpolationMode.NEAREST,
-        fill: Optional[Union[int, float, List[float]]] = None,
+        fill: FillTypeJIT = None,
         center: Optional[List[float]] = None,
     ) -> Image:
         output = self._F._geometry.affine_image_tensor(
@@ -226,7 +222,7 @@ def perspective(
         self,
         perspective_coeffs: List[float],
         interpolation: InterpolationMode = InterpolationMode.BILINEAR,
-        fill: Optional[Union[int, float, List[float]]] = None,
+        fill: FillTypeJIT = None,
     ) -> Image:
         output = self._F._geometry.perspective_image_tensor(
             self, perspective_coeffs, interpolation=interpolation, fill=fill
@@ -237,7 +233,7 @@ def elastic(
         self,
         displacement: torch.Tensor,
         interpolation: InterpolationMode = InterpolationMode.BILINEAR,
-        fill: Optional[Union[int, float, List[float]]] = None,
+        fill: FillTypeJIT = None,
     ) -> Image:
         output = self._F._geometry.elastic_image_tensor(self, displacement, interpolation=interpolation, fill=fill)
         return Image.new_like(self, output)
@@ -289,3 +285,11 @@ def invert(self) -> Image:
     def gaussian_blur(self, kernel_size: List[int], sigma: Optional[List[float]] = None) -> Image:
         output = self._F.gaussian_blur_image_tensor(self, kernel_size=kernel_size, sigma=sigma)
         return Image.new_like(self, output)
+
+
+ImageType = Union[torch.Tensor, PIL.Image.Image, Image]
+ImageTypeJIT = torch.Tensor
+LegacyImageType = Union[torch.Tensor, PIL.Image.Image]
+LegacyImageTypeJIT = torch.Tensor
+TensorImageType = Union[torch.Tensor, Image]
+TensorImageTypeJIT = torch.Tensor
diff --git a/torchvision/prototype/features/_mask.py b/torchvision/prototype/features/_mask.py
@@ -5,7 +5,7 @@
 import torch
 from torchvision.transforms import InterpolationMode
 
-from ._feature import _Feature
+from ._feature import _Feature, FillTypeJIT
 
 
 class Mask(_Feature):
@@ -51,7 +51,7 @@ def resized_crop(
     def pad(
         self,
         padding: Union[int, List[int]],
-        fill: Optional[Union[int, float, List[float]]] = None,
+        fill: FillTypeJIT = None,
         padding_mode: str = "constant",
     ) -> Mask:
         output = self._F.pad_mask(self, padding, padding_mode=padding_mode, fill=fill)
@@ -62,7 +62,7 @@ def rotate(
         angle: float,
         interpolation: InterpolationMode = InterpolationMode.NEAREST,
         expand: bool = False,
-        fill: Optional[Union[int, float, List[float]]] = None,
+        fill: FillTypeJIT = None,
         center: Optional[List[float]] = None,
     ) -> Mask:
         output = self._F.rotate_mask(self, angle, expand=expand, center=center, fill=fill)
@@ -75,7 +75,7 @@ def affine(
         scale: float,
         shear: List[float],
         interpolation: InterpolationMode = InterpolationMode.NEAREST,
-        fill: Optional[Union[int, float, List[float]]] = None,
+        fill: FillTypeJIT = None,
         center: Optional[List[float]] = None,
     ) -> Mask:
         output = self._F.affine_mask(
@@ -93,7 +93,7 @@ def perspective(
         self,
         perspective_coeffs: List[float],
         interpolation: InterpolationMode = InterpolationMode.NEAREST,
-        fill: Optional[Union[int, float, List[float]]] = None,
+        fill: FillTypeJIT = None,
     ) -> Mask:
         output = self._F.perspective_mask(self, perspective_coeffs, fill=fill)
         return Mask.new_like(self, output)
@@ -102,7 +102,7 @@ def elastic(
         self,
         displacement: torch.Tensor,
         interpolation: InterpolationMode = InterpolationMode.NEAREST,
-        fill: Optional[Union[int, float, List[float]]] = None,
+        fill: FillTypeJIT = None,
     ) -> Mask:
         output = self._F.elastic_mask(self, displacement, fill=fill)
         return Mask.new_like(self, output, dtype=output.dtype)
diff --git a/torchvision/prototype/transforms/_augment.py b/torchvision/prototype/transforms/_augment.py
@@ -1,7 +1,7 @@
 import math
 import numbers
 import warnings
-from typing import Any, Dict, List, Optional, Tuple, Union
+from typing import Any, cast, Dict, List, Optional, Tuple
 
 import PIL.Image
 import torch
@@ -92,9 +92,7 @@ def _get_params(self, sample: Any) -> Dict[str, Any]:
 
         return dict(i=i, j=j, h=h, w=w, v=v)
 
-    def _transform(
-        self, inpt: Union[torch.Tensor, features.Image, PIL.Image.Image], params: Dict[str, Any]
-    ) -> Union[torch.Tensor, features.Image, PIL.Image.Image]:
+    def _transform(self, inpt: features.ImageType, params: Dict[str, Any]) -> features.ImageType:
         if params["v"] is not None:
             inpt = F.erase(inpt, **params, inplace=self.inplace)
 
@@ -110,8 +108,10 @@ def __init__(self, alpha: float, p: float = 0.5) -> None:
     def forward(self, *inputs: Any) -> Any:
         if not (has_any(inputs, features.Image, features.is_simple_tensor) and has_any(inputs, features.OneHotLabel)):
             raise TypeError(f"{type(self).__name__}() is only defined for tensor images and one-hot labels.")
-        if has_any(inputs, features.BoundingBox, features.Mask, features.Label):
-            raise TypeError(f"{type(self).__name__}() does not support bounding boxes, masks and plain labels.")
+        if has_any(inputs, PIL.Image.Image, features.BoundingBox, features.Mask, features.Label):
+            raise TypeError(
+                f"{type(self).__name__}() does not support PIL images, bounding boxes, masks and plain labels."
+            )
         return super().forward(*inputs)
 
     def _mixup_onehotlabel(self, inpt: features.OneHotLabel, lam: float) -> features.OneHotLabel:
@@ -203,15 +203,15 @@ def __init__(
 
     def _copy_paste(
         self,
-        image: Any,
+        image: features.TensorImageType,
         target: Dict[str, Any],
-        paste_image: Any,
+        paste_image: features.TensorImageType,
         paste_target: Dict[str, Any],
         random_selection: torch.Tensor,
         blending: bool,
         resize_interpolation: F.InterpolationMode,
         antialias: Optional[bool],
-    ) -> Tuple[Any, Dict[str, Any]]:
+    ) -> Tuple[features.TensorImageType, Dict[str, Any]]:
 
         paste_masks = paste_target["masks"].new_like(paste_target["masks"], paste_target["masks"][random_selection])
         paste_boxes = paste_target["boxes"].new_like(paste_target["boxes"], paste_target["boxes"][random_selection])
@@ -223,7 +223,7 @@ def _copy_paste(
         # This is something different to TF implementation we introduced here as
         # originally the algorithm works on equal-sized data
         # (for example, coming from LSJ data augmentations)
-        size1 = image.shape[-2:]
+        size1 = cast(List[int], image.shape[-2:])
         size2 = paste_image.shape[-2:]
         if size1 != size2:
             paste_image = F.resize(paste_image, size=size1, interpolation=resize_interpolation, antialias=antialias)
@@ -278,7 +278,9 @@ def _copy_paste(
 
         return image, out_target
 
-    def _extract_image_targets(self, flat_sample: List[Any]) -> Tuple[List[Any], List[Dict[str, Any]]]:
+    def _extract_image_targets(
+        self, flat_sample: List[Any]
+    ) -> Tuple[List[features.TensorImageType], List[Dict[str, Any]]]:
         # fetch all images, bboxes, masks and labels from unstructured input
         # with List[image], List[BoundingBox], List[Mask], List[Label]
         images, bboxes, masks, labels = [], [], [], []
@@ -307,7 +309,10 @@ def _extract_image_targets(self, flat_sample: List[Any]) -> Tuple[List[Any], Lis
         return images, targets
 
     def _insert_outputs(
-        self, flat_sample: List[Any], output_images: List[Any], output_targets: List[Dict[str, Any]]
+        self,
+        flat_sample: List[Any],
+        output_images: List[features.TensorImageType],
+        output_targets: List[Dict[str, Any]],
     ) -> None:
         c0, c1, c2, c3 = 0, 0, 0, 0
         for i, obj in enumerate(flat_sample):