open-mmlab · innerlee · Jan 20, 2021 · Dec 25, 2020 · Dec 26, 2020 · Dec 26, 2020
diff --git a/docs/changelog.md b/docs/changelog.md
@@ -6,6 +6,8 @@
 
 **New Features**
 
+- Support [imgaug](https://imgaug.readthedocs.io/en/latest/index.html) for augmentations in the data pipeline ([#492](https://github.com/open-mmlab/mmaction2/pull/492))
+
 **Improvements**
 
 - Support setting `max_testing_views` for extremely large models to save GPU memory used ([#511](https://github.com/open-mmlab/mmaction2/pull/511))

diff --git a/mmaction/datasets/pipelines/__init__.py b/mmaction/datasets/pipelines/__init__.py
@@ -1,6 +1,6 @@
 from .augmentations import (AudioAmplify, CenterCrop, ColorJitter,
                             EntityBoxCrop, EntityBoxFlip, EntityBoxRescale,
-                            Flip, Fuse, MelSpectrogram, MultiGroupCrop,
+                            Flip, Fuse, Imgaug, MelSpectrogram, MultiGroupCrop,
                             MultiScaleCrop, Normalize, RandomCrop,
                             RandomRescale, RandomResizedCrop, RandomScale,
                             Resize, TenCrop, ThreeCrop)
@@ -31,5 +31,5 @@
     'FormatAudioShape', 'LoadAudioFeature', 'AudioFeatureSelector',
     'AudioDecodeInit', 'EntityBoxFlip', 'EntityBoxCrop', 'EntityBoxRescale',
     'RandomScale', 'ImageDecode', 'BuildPseudoClip', 'RandomRescale',
-    'PyAVDecodeMotionVector', 'Rename'
+    'PyAVDecodeMotionVector', 'Rename', 'Imgaug'
 ]
diff --git a/mmaction/datasets/pipelines/augmentations.py b/mmaction/datasets/pipelines/augmentations.py
@@ -42,6 +42,209 @@ def _init_lazy_if_proper(results, lazy):
         assert 'lazy' not in results, 'Use Fuse after lazy operations'
 
 
+@PIPELINES.register_module()
+class Imgaug:
+    """Imgaug augmentation.
+
+    Adds custom transformations from imgaug library.
+    Please visit `https://imgaug.readthedocs.io/en/latest/index.html`
+    to get more information. An example of ``transforms`` could be found
+    in `default_transforms`
+
+    Required keys are "imgs" and "img_shape"(if "gt_bboxes" is not None),
+    added or modified keys are "imgs", "img_shape", "gt_bboxes", "proposals".
+
+    It is worth mentioning that `Imgaug` will NOT create custom keys like
+    "interpolation", "crop_bbox", "flip_direction", etc. So when using
+    `Imgaug` along with other mmaction2 pipelines, we should pay more attention
+    to required keys.
+
+    Two steps to use `Imgaug` pipeline:
+    1. Create initialization parameter `transforms`. There are three ways
+        to create `transforms`.
+        1) string: only support `default` for now.
+            e.g. `transforms='default'`
+        2) list[dict]: create a list of augmenters by a list of dicts, each
+            dict corresponds to one augmenter. Every dict MUST contain a key
+            named `type`. `type` should be a string(iaa.Augmenter's name) or
+            an iaa.Augmenter subclass.
+            e.g. `transforms=[dict(type='Rotate', rotate=(-20, 20))]`
+            e.g. `transforms=[dict(type=iaa.Rotate, rotate=(-20, 20))]`
+        3) iaa.Augmenter: create an imgaug.Augmenter object.
+            e.g. `transforms=iaa.Rotate(rotate=(-20, 20))`
+    2. Add `Imgaug` in dataset pipeline. It is recommended to insert imgaug
+        pipeline before `FormatShape`. A demo pipeline is listed as follows.
+        ```
+        pipeline = [
+            dict(
+                type='SampleFrames',
+                clip_len=1,
+                frame_interval=1,
+                num_clips=16,
+            ),
+            dict(type='RawFrameDecode'),
+            dict(type='Resize', scale=(-1, 256)),
+            dict(
+                type='MultiScaleCrop',
+                input_size=224,
+                scales=(1, 0.875, 0.75, 0.66),
+                random_crop=False,
+                max_wh_scale_gap=1,
+                num_fixed_crops=13),
+            dict(type='Resize', scale=(224, 224), keep_ratio=False),
+            dict(type='Flip', flip_ratio=0.5),
+            dict(type='Imgaug', transforms='default'),
+            # dict(type='Imgaug', transforms=[
+            #     dict(type='Rotate', rotate=(-20, 20))
+            # ]),
+            # dict(type='Imgaug',transforms=iaa.Rotate(rotate=(-20, 20))),
+            dict(type='FormatShape', input_format='NCHW'),
+            dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]),
+            dict(type='ToTensor', keys=['imgs', 'label'])
+        ]
+        ```
+
+    Args:
+        transforms (str | list[dict] | :obj:`iaa.Augmenter`): Three different
+            ways to create imgaug augmenter.
+    """
+
+    def __init__(self, transforms):
+        import imgaug
+
+        self.iaa = imgaug.augmenters
+        self.bbs = imgaug.augmentables.bbs
+
+        if transforms == 'default':
+            self.transforms = self.default_transforms()
+        elif isinstance(transforms, list):
+            assert all(isinstance(trans, dict) for trans in transforms)
+            self.transforms = transforms
+        elif isinstance(transforms, self.iaa.Augmenter):
+            self.aug = self.transforms = transforms
+        else:
+            raise ValueError('transforms must be `default` or a list of dicts'
+                             ' or iaa.Augmenter object')
+
+        if not isinstance(transforms, self.iaa.Augmenter):
+            self.aug = self.iaa.Sequential(
+                [self.imgaug_builder(t) for t in self.transforms])
+
+    def default_transforms(self):
+        """Default transforms for imgaug."""
+
+        return [
+            dict(type='Rotate', rotate=(-30, 30)),
+            dict(
+                type='SomeOf',
+                n=(0, 3),
+                children=[
+                    dict(
+                        type='OneOf',
+                        children=[
+                            dict(type='GaussianBlur', sigma=(0, 0.5)),
+                            dict(type='AverageBlur', k=(2, 7)),
+                            dict(type='MedianBlur', k=(3, 11))
+                        ]),
+                    dict(
+                        type='OneOf',
+                        children=[
+                            dict(
+                                type='Dropout', p=(0.01, 0.1),
+                                per_channel=0.5),
+                            dict(
+                                type='CoarseDropout',
+                                p=(0.03, 0.15),
+                                size_percent=(0.02, 0.05),
+                                per_channel=0.2),
+                        ]),
+                    dict(
+                        type='AdditiveGaussianNoise',
+                        loc=0,
+                        scale=(0.0, 0.05 * 255),
+                        per_channel=0.5),
+                ]),
+        ]
+
+    def imgaug_builder(self, cfg):
+        """Import a module from imgaug.
+
+        It follows the logic of :func:`build_from_cfg`. Use a dict object to
+        create an iaa.Augmenter object.
+
+        Args:
+            cfg (dict): Config dict. It should at least contain the key "type".
+
+        Returns:
+            obj:`iaa.Augmenter`: The constructed imgaug augmenter.
+        """
+        assert isinstance(cfg, dict) and 'type' in cfg
+        args = cfg.copy()
+
+        obj_type = args.pop('type')
+        if mmcv.is_str(obj_type):
+            obj_cls = getattr(self.iaa, obj_type)
+        elif issubclass(obj_type, self.iaa.Augmenter):
+            obj_cls = obj_type
+        else:
+            raise TypeError(
+                f'type must be a str or valid type, but got {type(obj_type)}')
+
+        if 'children' in args:
+            args['children'] = [
+                self.imgaug_builder(child) for child in args['children']
+            ]
+
+        return obj_cls(**args)
+
+    def __repr__(self):
+        repr_str = self.__class__.__name__ + f'(transforms={self.aug})'
+        return repr_str
+
+    def __call__(self, results):
+        cur_aug = self.aug.to_deterministic()
+
+        results['imgs'] = [
+            cur_aug.augment_image(frame) for frame in results['imgs']
+        ]
+        img_h, img_w, _ = results['imgs'][0].shape
+
+        if 'gt_bboxes' in results:
+            bbox_list = [
+                self.bbs.BoundingBox(
+                    x1=bbox[0], y1=bbox[1], x2=bbox[2], y2=bbox[3])
+                for bbox in results['gt_bboxes']
+            ]
+            bboxes = self.bbs.BoundingBoxesOnImage(
+                bbox_list, shape=results['img_shape'])
+            bbox_aug, *_ = cur_aug.augment_bounding_boxes([bboxes])
+            results['gt_bboxes'] = [[
+                max(bbox.x1, 0),
+                max(bbox.y1, 0),
+                min(bbox.x2, img_w),
+                min(bbox.y2, img_h)
+            ] for bbox in bbox_aug.items]
+            if 'proposals' in results:
+                bbox_list = [
+                    self.bbs.BoundingBox(
+                        x1=bbox[0], y1=bbox[1], x2=bbox[2], y2=bbox[3])
+                    for bbox in results['proposals']
+                ]
+                bboxes = self.bbs.BoundingBoxesOnImage(
+                    bbox_list, shape=results['img_shape'])
+                bbox_aug, *_ = cur_aug.augment_bounding_boxes([bboxes])
+                results['proposals'] = [[
+                    max(bbox.x1, 0),
+                    max(bbox.y1, 0),
+                    min(bbox.x2, img_w),
+                    min(bbox.y2, img_h)
+                ] for bbox in bbox_aug.items]
+
+        results['img_shape'] = (img_h, img_w)
+
+        return results
+
+
 @PIPELINES.register_module()
 class Fuse:
     """Fuse lazy operations.

diff --git a/requirements/optional.txt b/requirements/optional.txt
@@ -1,5 +1,6 @@
 av
 decord >= 0.4.1
+imgaug
 moviepy
 onnx
 onnxruntime

diff --git a/tests/test_data/test_augmentations.py b/tests/test_data/test_augmentations.py
@@ -8,7 +8,7 @@
 # yapf: disable
 from mmaction.datasets.pipelines import (AudioAmplify, CenterCrop, ColorJitter,
                                          EntityBoxCrop, EntityBoxFlip,
-                                         EntityBoxRescale, Flip, Fuse,
+                                         EntityBoxRescale, Flip, Fuse, Imgaug,
                                          MelSpectrogram, MultiGroupCrop,
                                          MultiScaleCrop, Normalize, RandomCrop,
                                          RandomRescale, RandomResizedCrop,
@@ -1230,3 +1230,90 @@ def test_box_flip(self):
         results_ = box_flip(results_)
         assert results_['proposals'] is None
         assert repr(box_flip) == f'EntityBoxFlip(img_shape={img_shape})'
+
+    def test_imgaug(self):
+
+        with pytest.raises(ValueError):
+            # transforms only support one string, 'default'
+            Imgaug(transforms='test')
+
+        with pytest.raises(ValueError):
+            # transforms only support string or list of dicts
+            # or iaa.Augmenter object
+            Imgaug(transforms=dict(type='Rotate'))
+
+        with pytest.raises(AssertionError):
+            # each dict must have a `type` key
+            Imgaug(transforms=[dict(rotate=(-30, 30))])
+
+        with pytest.raises(AttributeError):
+            # `type` must be available in imgaug
+            Imgaug(transforms=[dict(type='BlaBla')])
+
+        with pytest.raises(TypeError):
+            # `type` must be str or iaa available type
+            Imgaug(transforms=[dict(type=CenterCrop)])
+
+        from imgaug import augmenters as iaa
+
+        # check default configs
+        target_keys = ['imgs', 'img_shape']
+        imgs = list(np.random.randint(0, 255, (1, 64, 64, 3)).astype(np.uint8))
+        results = dict(imgs=imgs)
+        default_imgaug = Imgaug(transforms='default')
+        default_results = default_imgaug(results)
+        self.check_keys_contain(default_results.keys(), target_keys)
+        assert default_results['img_shape'] == (64, 64)
+
+        # check flip (both images and bboxes)
+        target_keys = ['imgs', 'gt_bboxes', 'proposals', 'img_shape']
+        imgs = list(np.random.rand(1, 64, 64, 3).astype(np.float32))
+        results = dict(
+            imgs=imgs,
+            proposals=np.array([[0, 0, 25, 35]]),
+            img_shape=(64, 64),
+            gt_bboxes=np.array([[0, 0, 25, 35]]))
+        imgaug_flip = Imgaug(transforms=[dict(type='Fliplr')])
+        flip_results = imgaug_flip(results)
+        assert self.check_keys_contain(flip_results.keys(), target_keys)
+        assert self.check_flip(imgs, flip_results['imgs'], 'horizontal')
+        assert_array_almost_equal(flip_results['gt_bboxes'],
+                                  np.array([[39, 0, 64, 35]]))
+        assert_array_almost_equal(flip_results['proposals'],
+                                  np.array([[39, 0, 64, 35]]))
+        transforms = iaa.Sequential([iaa.Fliplr()])
+        assert repr(imgaug_flip) == f'Imgaug(transforms={transforms})'
+
+        # check crop (both images and bboxes)
+        target_keys = ['crop_bbox', 'gt_bboxes', 'imgs', 'img_shape']
+        imgs = list(np.random.rand(1, 122, 122, 3))
+        results = dict(
+            imgs=imgs,
+            img_shape=(122, 122),
+            gt_bboxes=np.array([[1.5, 2.5, 110, 64]]))
+        imgaug_center_crop = Imgaug(transforms=[
+            dict(
+                type=iaa.CropToFixedSize,
+                width=100,
+                height=100,
+                position='center')
+        ])
+        crop_results = imgaug_center_crop(results)
+        self.check_keys_contain(crop_results.keys(), target_keys)
+        assert_array_almost_equal(crop_results['gt_bboxes'],
+                                  np.array([[0., 0., 99., 53.]]))
+        assert 'proposals' not in results
+        transforms = iaa.Sequential(
+            [iaa.CropToFixedSize(width=100, height=100, position='center')])
+        assert repr(imgaug_center_crop) == f'Imgaug(transforms={transforms})'
+
+        # check resize (images only)
+        target_keys = ['imgs', 'img_shape']
+        imgs = list(np.random.rand(1, 64, 64, 3))
+        results = dict(imgs=imgs)
+        transforms = iaa.Resize(32)
+        imgaug_resize = Imgaug(transforms=transforms)
+        resize_results = imgaug_resize(results)
+        self.check_keys_contain(resize_results.keys(), target_keys)
+        assert resize_results['img_shape'] == (32, 32)
+        assert repr(imgaug_resize) == f'Imgaug(transforms={transforms})'