Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Feature] Support Imgaug for augmentations in the data pipeline. #492

Merged
merged 13 commits into from
Jan 20, 2021
2 changes: 2 additions & 0 deletions docs/changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@

**New Features**

- Support [imgaug](https://imgaug.readthedocs.io/en/latest/index.html) for augmentations in the data pipeline ([#492](https://github.com/open-mmlab/mmaction2/pull/492))

**Improvements**

- Support setting `max_testing_views` for extremely large models to save GPU memory used ([#511](https://github.com/open-mmlab/mmaction2/pull/511))
Expand Down
4 changes: 2 additions & 2 deletions mmaction/datasets/pipelines/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from .augmentations import (AudioAmplify, CenterCrop, ColorJitter,
EntityBoxCrop, EntityBoxFlip, EntityBoxRescale,
Flip, Fuse, MelSpectrogram, MultiGroupCrop,
Flip, Fuse, Imgaug, MelSpectrogram, MultiGroupCrop,
MultiScaleCrop, Normalize, RandomCrop,
RandomRescale, RandomResizedCrop, RandomScale,
Resize, TenCrop, ThreeCrop)
Expand Down Expand Up @@ -31,5 +31,5 @@
'FormatAudioShape', 'LoadAudioFeature', 'AudioFeatureSelector',
'AudioDecodeInit', 'EntityBoxFlip', 'EntityBoxCrop', 'EntityBoxRescale',
'RandomScale', 'ImageDecode', 'BuildPseudoClip', 'RandomRescale',
'PyAVDecodeMotionVector', 'Rename'
'PyAVDecodeMotionVector', 'Rename', 'Imgaug'
]
203 changes: 203 additions & 0 deletions mmaction/datasets/pipelines/augmentations.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,209 @@ def _init_lazy_if_proper(results, lazy):
assert 'lazy' not in results, 'Use Fuse after lazy operations'


@PIPELINES.register_module()
class Imgaug:
"""Imgaug augmentation.
irvingzhang0512 marked this conversation as resolved.
Show resolved Hide resolved

Adds custom transformations from imgaug library.
Please visit `https://imgaug.readthedocs.io/en/latest/index.html`
to get more information. An example of ``transforms`` could be found
in `default_transforms`

Required keys are "imgs" and "img_shape"(if "gt_bboxes" is not None),
added or modified keys are "imgs", "img_shape", "gt_bboxes", "proposals".

It is worth mentioning that `Imgaug` will NOT create custom keys like
"interpolation", "crop_bbox", "flip_direction", etc. So when using
`Imgaug` along with other mmaction2 pipelines, we should pay more attention
to required keys.

Two steps to use `Imgaug` pipeline:
1. Create initialization parameter `transforms`. There are three ways
to create `transforms`.
1) string: only support `default` for now.
e.g. `transforms='default'`
2) list[dict]: create a list of augmenters by a list of dicts, each
dict corresponds to one augmenter. Every dict MUST contain a key
named `type`. `type` should be a string(iaa.Augmenter's name) or
an iaa.Augmenter subclass.
e.g. `transforms=[dict(type='Rotate', rotate=(-20, 20))]`
e.g. `transforms=[dict(type=iaa.Rotate, rotate=(-20, 20))]`
3) iaa.Augmenter: create an imgaug.Augmenter object.
e.g. `transforms=iaa.Rotate(rotate=(-20, 20))`
2. Add `Imgaug` in dataset pipeline. It is recommended to insert imgaug
pipeline before `FormatShape`. A demo pipeline is listed as follows.
```
pipeline = [
dict(
type='SampleFrames',
clip_len=1,
frame_interval=1,
num_clips=16,
),
dict(type='RawFrameDecode'),
dict(type='Resize', scale=(-1, 256)),
dict(
type='MultiScaleCrop',
input_size=224,
scales=(1, 0.875, 0.75, 0.66),
random_crop=False,
max_wh_scale_gap=1,
num_fixed_crops=13),
dict(type='Resize', scale=(224, 224), keep_ratio=False),
dict(type='Flip', flip_ratio=0.5),
dict(type='Imgaug', transforms='default'),
# dict(type='Imgaug', transforms=[
# dict(type='Rotate', rotate=(-20, 20))
# ]),
# dict(type='Imgaug',transforms=iaa.Rotate(rotate=(-20, 20))),
irvingzhang0512 marked this conversation as resolved.
Show resolved Hide resolved
dict(type='FormatShape', input_format='NCHW'),
dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]),
irvingzhang0512 marked this conversation as resolved.
Show resolved Hide resolved
dict(type='ToTensor', keys=['imgs', 'label'])
]
```

Args:
transforms (str | list[dict] | :obj:`iaa.Augmenter`): Three different
ways to create imgaug augmenter.
"""

def __init__(self, transforms):
import imgaug

self.iaa = imgaug.augmenters
self.bbs = imgaug.augmentables.bbs

if transforms == 'default':
self.transforms = self.default_transforms()
elif isinstance(transforms, list):
assert all(isinstance(trans, dict) for trans in transforms)
self.transforms = transforms
irvingzhang0512 marked this conversation as resolved.
Show resolved Hide resolved
elif isinstance(transforms, self.iaa.Augmenter):
self.aug = self.transforms = transforms
else:
raise ValueError('transforms must be `default` or a list of dicts'
' or iaa.Augmenter object')

if not isinstance(transforms, self.iaa.Augmenter):
self.aug = self.iaa.Sequential(
[self.imgaug_builder(t) for t in self.transforms])
irvingzhang0512 marked this conversation as resolved.
Show resolved Hide resolved

def default_transforms(self):
"""Default transforms for imgaug."""

return [
dict(type='Rotate', rotate=(-30, 30)),
dict(
type='SomeOf',
n=(0, 3),
children=[
dict(
type='OneOf',
children=[
dict(type='GaussianBlur', sigma=(0, 0.5)),
dict(type='AverageBlur', k=(2, 7)),
dict(type='MedianBlur', k=(3, 11))
]),
dict(
type='OneOf',
children=[
dict(
type='Dropout', p=(0.01, 0.1),
per_channel=0.5),
dict(
type='CoarseDropout',
p=(0.03, 0.15),
size_percent=(0.02, 0.05),
per_channel=0.2),
]),
dict(
type='AdditiveGaussianNoise',
loc=0,
scale=(0.0, 0.05 * 255),
per_channel=0.5),
]),
]

def imgaug_builder(self, cfg):
"""Import a module from imgaug.

It follows the logic of :func:`build_from_cfg`. Use a dict object to
create an iaa.Augmenter object.

Args:
cfg (dict): Config dict. It should at least contain the key "type".

Returns:
obj:`iaa.Augmenter`: The constructed imgaug augmenter.
"""
assert isinstance(cfg, dict) and 'type' in cfg
args = cfg.copy()

obj_type = args.pop('type')
if mmcv.is_str(obj_type):
obj_cls = getattr(self.iaa, obj_type)
elif issubclass(obj_type, self.iaa.Augmenter):
obj_cls = obj_type
else:
raise TypeError(
f'type must be a str or valid type, but got {type(obj_type)}')

if 'children' in args:
args['children'] = [
self.imgaug_builder(child) for child in args['children']
]

return obj_cls(**args)

def __repr__(self):
repr_str = self.__class__.__name__ + f'(transforms={self.aug})'
return repr_str

def __call__(self, results):
cur_aug = self.aug.to_deterministic()
innerlee marked this conversation as resolved.
Show resolved Hide resolved

results['imgs'] = [
cur_aug.augment_image(frame) for frame in results['imgs']
irvingzhang0512 marked this conversation as resolved.
Show resolved Hide resolved
]
img_h, img_w, _ = results['imgs'][0].shape

if 'gt_bboxes' in results:
bbox_list = [
self.bbs.BoundingBox(
x1=bbox[0], y1=bbox[1], x2=bbox[2], y2=bbox[3])
for bbox in results['gt_bboxes']
]
bboxes = self.bbs.BoundingBoxesOnImage(
bbox_list, shape=results['img_shape'])
bbox_aug, *_ = cur_aug.augment_bounding_boxes([bboxes])
results['gt_bboxes'] = [[
max(bbox.x1, 0),
max(bbox.y1, 0),
min(bbox.x2, img_w),
min(bbox.y2, img_h)
] for bbox in bbox_aug.items]
if 'proposals' in results:
bbox_list = [
self.bbs.BoundingBox(
x1=bbox[0], y1=bbox[1], x2=bbox[2], y2=bbox[3])
for bbox in results['proposals']
]
bboxes = self.bbs.BoundingBoxesOnImage(
bbox_list, shape=results['img_shape'])
bbox_aug, *_ = cur_aug.augment_bounding_boxes([bboxes])
results['proposals'] = [[
max(bbox.x1, 0),
max(bbox.y1, 0),
min(bbox.x2, img_w),
min(bbox.y2, img_h)
] for bbox in bbox_aug.items]

results['img_shape'] = (img_h, img_w)

return results


@PIPELINES.register_module()
class Fuse:
"""Fuse lazy operations.
Expand Down
1 change: 1 addition & 0 deletions requirements/optional.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
av
decord >= 0.4.1
imgaug
moviepy
onnx
onnxruntime
Expand Down
89 changes: 88 additions & 1 deletion tests/test_data/test_augmentations.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
# yapf: disable
from mmaction.datasets.pipelines import (AudioAmplify, CenterCrop, ColorJitter,
EntityBoxCrop, EntityBoxFlip,
EntityBoxRescale, Flip, Fuse,
EntityBoxRescale, Flip, Fuse, Imgaug,
MelSpectrogram, MultiGroupCrop,
MultiScaleCrop, Normalize, RandomCrop,
RandomRescale, RandomResizedCrop,
Expand Down Expand Up @@ -1230,3 +1230,90 @@ def test_box_flip(self):
results_ = box_flip(results_)
assert results_['proposals'] is None
assert repr(box_flip) == f'EntityBoxFlip(img_shape={img_shape})'

def test_imgaug(self):

with pytest.raises(ValueError):
# transforms only support one string, 'default'
Imgaug(transforms='test')

with pytest.raises(ValueError):
# transforms only support string or list of dicts
# or iaa.Augmenter object
Imgaug(transforms=dict(type='Rotate'))

with pytest.raises(AssertionError):
# each dict must have a `type` key
Imgaug(transforms=[dict(rotate=(-30, 30))])

with pytest.raises(AttributeError):
# `type` must be available in imgaug
Imgaug(transforms=[dict(type='BlaBla')])

with pytest.raises(TypeError):
# `type` must be str or iaa available type
Imgaug(transforms=[dict(type=CenterCrop)])

from imgaug import augmenters as iaa

# check default configs
target_keys = ['imgs', 'img_shape']
imgs = list(np.random.randint(0, 255, (1, 64, 64, 3)).astype(np.uint8))
results = dict(imgs=imgs)
default_imgaug = Imgaug(transforms='default')
default_results = default_imgaug(results)
self.check_keys_contain(default_results.keys(), target_keys)
assert default_results['img_shape'] == (64, 64)

# check flip (both images and bboxes)
target_keys = ['imgs', 'gt_bboxes', 'proposals', 'img_shape']
imgs = list(np.random.rand(1, 64, 64, 3).astype(np.float32))
results = dict(
imgs=imgs,
proposals=np.array([[0, 0, 25, 35]]),
img_shape=(64, 64),
gt_bboxes=np.array([[0, 0, 25, 35]]))
imgaug_flip = Imgaug(transforms=[dict(type='Fliplr')])
flip_results = imgaug_flip(results)
assert self.check_keys_contain(flip_results.keys(), target_keys)
assert self.check_flip(imgs, flip_results['imgs'], 'horizontal')
assert_array_almost_equal(flip_results['gt_bboxes'],
np.array([[39, 0, 64, 35]]))
assert_array_almost_equal(flip_results['proposals'],
np.array([[39, 0, 64, 35]]))
transforms = iaa.Sequential([iaa.Fliplr()])
assert repr(imgaug_flip) == f'Imgaug(transforms={transforms})'

# check crop (both images and bboxes)
target_keys = ['crop_bbox', 'gt_bboxes', 'imgs', 'img_shape']
imgs = list(np.random.rand(1, 122, 122, 3))
results = dict(
imgs=imgs,
img_shape=(122, 122),
gt_bboxes=np.array([[1.5, 2.5, 110, 64]]))
imgaug_center_crop = Imgaug(transforms=[
dict(
type=iaa.CropToFixedSize,
width=100,
height=100,
position='center')
])
crop_results = imgaug_center_crop(results)
self.check_keys_contain(crop_results.keys(), target_keys)
assert_array_almost_equal(crop_results['gt_bboxes'],
np.array([[0., 0., 99., 53.]]))
assert 'proposals' not in results
transforms = iaa.Sequential(
[iaa.CropToFixedSize(width=100, height=100, position='center')])
assert repr(imgaug_center_crop) == f'Imgaug(transforms={transforms})'

# check resize (images only)
target_keys = ['imgs', 'img_shape']
imgs = list(np.random.rand(1, 64, 64, 3))
results = dict(imgs=imgs)
transforms = iaa.Resize(32)
imgaug_resize = Imgaug(transforms=transforms)
resize_results = imgaug_resize(results)
self.check_keys_contain(resize_results.keys(), target_keys)
assert resize_results['img_shape'] == (32, 32)
assert repr(imgaug_resize) == f'Imgaug(transforms={transforms})'