Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Shear augmentation. #3656

Merged
merged 25 commits into from
Sep 23, 2020
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
6050308
add Shear augmentation.
v-qjqs Aug 31, 2020
f03200b
Merge branch 'master' of https://github.com/open-mmlab/mmdetection in…
v-qjqs Aug 31, 2020
948ff53
remove duplicated warpAffine declaration.
v-qjqs Aug 31, 2020
e4cea56
fix shear bboxes
v-qjqs Aug 31, 2020
35c3a8d
Merge branch 'master' of https://github.com/open-mmlab/mmdetection in…
v-qjqs Sep 3, 2020
b5f294d
re-implements Shear augmentation using mmcv.imshear
v-qjqs Sep 3, 2020
b640827
fix the name of some variables
v-qjqs Sep 4, 2020
0f5b249
Merge branch 'master' of https://github.com/open-mmlab/mmdetection in…
v-qjqs Sep 4, 2020
b3b5b30
handle conflicts and merge from upstream master
v-qjqs Sep 8, 2020
cb3c75b
reformat
v-qjqs Sep 8, 2020
0159a98
add shear abstractmethod for BaseInstanceMasks
v-qjqs Sep 8, 2020
da0294c
Merge branch 'master' of https://github.com/open-mmlab/mmdetection in…
v-qjqs Sep 16, 2020
47129f0
supports for PolygonMasks
v-qjqs Sep 16, 2020
f524b76
Merge branch 'master' of https://github.com/open-mmlab/mmdetection in…
v-qjqs Sep 16, 2020
7ee5442
add docstring
v-qjqs Sep 16, 2020
4cf52a2
add more unit test
v-qjqs Sep 18, 2020
79ee7b1
Merge branch 'master' of https://github.com/open-mmlab/mmdetection in…
v-qjqs Sep 18, 2020
e4b1998
reformat assertion message
v-qjqs Sep 18, 2020
fed138c
remove unnecessary comma in function
v-qjqs Sep 18, 2020
425a6c4
refactor
v-qjqs Sep 22, 2020
695710e
remove unnecessary
v-qjqs Sep 22, 2020
ae600c9
Merge branch 'master' of https://github.com/open-mmlab/mmdetection in…
v-qjqs Sep 22, 2020
91b69fe
remove unnecessary copy
v-qjqs Sep 22, 2020
6cdd325
mergt shear.py into auto_augment.py
v-qjqs Sep 23, 2020
d7d6da3
add unit test for autoaugment equipped with shear
v-qjqs Sep 23, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions mmdet/core/mask/structures.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from abc import ABCMeta, abstractmethod

import cv2
import mmcv
import numpy as np
import pycocotools.mask as maskUtils
Expand Down Expand Up @@ -297,6 +298,27 @@ def expand(self, expanded_h, expanded_w, top, left):
left:left + self.width] = self.masks
return BitmapMasks(expanded_mask, expanded_h, expanded_w)

def shear(self,
shear_matrix,
out_shape,
fill_val=0,
flags=cv2.INTER_NEAREST,
v-qjqs marked this conversation as resolved.
Show resolved Hide resolved
borderMode=cv2.BORDER_CONSTANT):
if len(self.masks) == 0:
sheared_masks = np.empty((0, *out_shape), dtype=np.uint8)
else:
# dsize should be in type tuple[int] with format: (w, h)
sheared_masks = np.stack([
cv2.warpAffine(
mask,
shear_matrix,
dsize=out_shape[::-1],
borderValue=fill_val,
flags=flags,
borderMode=borderMode) for mask in self.masks
]).astype(self.masks.dtype)
return BitmapMasks(sheared_masks, *out_shape)

@property
def areas(self):
"""See :py:attr:`BaseInstanceMasks.areas`."""
Expand Down Expand Up @@ -498,6 +520,14 @@ def crop_and_resize(self,
resized_masks.append(resized_mask)
return PolygonMasks(resized_masks, *out_shape)

def shear(self,
ZwwWayne marked this conversation as resolved.
Show resolved Hide resolved
shear_matrix,
out_shape,
fill_val=0,
flags=cv2.INTER_NEAREST,
borderMode=cv2.BORDER_CONSTANT):
raise NotImplementedError

def to_bitmap(self):
"""convert polygon masks to bitmap masks."""
bitmap_masks = self.to_ndarray()
Expand Down
36 changes: 29 additions & 7 deletions mmdet/datasets/pipelines/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,39 @@
from .instaboost import InstaBoost
from .loading import (LoadAnnotations, LoadImageFromFile, LoadImageFromWebcam,
LoadMultiChannelImageFromFiles, LoadProposals)
from .shear import Shear
from .test_time_aug import MultiScaleFlipAug
from .transforms import (Albu, CutOut, Expand, MinIoURandomCrop, Normalize,
Pad, PhotoMetricDistortion, RandomCenterCropPad,
RandomCrop, RandomFlip, Resize, SegRescale)

__all__ = [
'Compose', 'to_tensor', 'ToTensor', 'ImageToTensor', 'ToDataContainer',
'Transpose', 'Collect', 'LoadAnnotations', 'LoadImageFromFile',
'LoadImageFromWebcam', 'LoadMultiChannelImageFromFiles', 'LoadProposals',
'MultiScaleFlipAug', 'Resize', 'RandomFlip', 'Pad', 'RandomCrop',
'Normalize', 'SegRescale', 'MinIoURandomCrop', 'Expand',
'PhotoMetricDistortion', 'Albu', 'InstaBoost', 'RandomCenterCropPad',
'AutoAugment', 'CutOut'
'Compose',
'to_tensor',
'ToTensor',
'ImageToTensor',
'ToDataContainer',
'Transpose',
'Collect',
'LoadAnnotations',
'LoadImageFromFile',
'LoadImageFromWebcam',
'LoadMultiChannelImageFromFiles',
'LoadProposals',
'MultiScaleFlipAug',
'Resize',
'RandomFlip',
'Pad',
'RandomCrop',
'Normalize',
'SegRescale',
'MinIoURandomCrop',
'Expand',
'PhotoMetricDistortion',
'Albu',
'InstaBoost',
'RandomCenterCropPad',
'AutoAugment',
'CutOut',
'Shear',
]
249 changes: 249 additions & 0 deletions mmdet/datasets/pipelines/shear.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,249 @@
import cv2
import numpy as np

from mmdet.core.mask import BitmapMasks, PolygonMasks
from ..builder import PIPELINES

_MAX_LEVEL = 10


def level_to_value(level, max_value):
"""Map from level to values based on max_value."""
return (level / _MAX_LEVEL) * max_value


def random_negative(value, random_negative_prob):
"""Randomly negate value based on random_negative_prob."""
return -value if np.random.rand() < random_negative_prob else value


def bbox2fields():
"""The key correspondence from bboxes to labels, masks and
segmentations."""
bbox2label = {
'gt_bboxes': 'gt_labels',
'gt_bboxes_ignore': 'gt_labels_ignore'
}
bbox2mask = {
'gt_bboxes': 'gt_masks',
'gt_bboxes_ignore': 'gt_masks_ignore'
}
bbox2seg = {
'gt_bboxes': 'gt_semantic_seg',
}
return bbox2label, bbox2mask, bbox2seg


@PIPELINES.register_module()
class Shear(object):
"""Apply Shear Transformation to image (and its corresponding bbox, mask,
segmentation).

Args:
level (int | float): The level should be in range (0,_MAX_LEVEL].
img_fill_val (int | float | tuple): The filled values for image border.
If float, the same fill value will be used for all the three
channels of image. If tuple, the should be 3 elements.
seg_ignore_label (int): The fill value used for segmentation map.
Note this value must equals ``ignore_label`` in ``semantic_head``
of the corresponding config. Default 255.
prob (float): The probability for performing Shear and should be in
range [0, 1].
axis (str): Shear images along with x-axis or y-axis. The option
of axis is 'x' or 'y'.
max_shear_magnitude (float): The maximum magnitude for Shear
transformation.
"""

def __init__(self,
level,
img_fill_val=128,
seg_ignore_label=255,
prob=0.5,
axis='x',
max_shear_magnitude=0.3,
*args,
**kwargs):
assert isinstance(level, (int, float)), \
'The level must be type int or float.'
v-qjqs marked this conversation as resolved.
Show resolved Hide resolved
assert 0 <= level <= _MAX_LEVEL, \
'The level used for calculating Translate\'s offset should be ' \
'in range (0,_MAX_LEVEL]'
assert 0 <= prob <= 1.0, \
'The probability of translation should be in range 0 to 1.'
if isinstance(img_fill_val, (float, int)):
img_fill_val = tuple([float(img_fill_val)])
elif isinstance(img_fill_val, tuple):
assert len(img_fill_val) == 3, \
'img_fill_val as tuple must have 3 elements.'
img_fill_val = tuple([float(val) for val in img_fill_val])
else:
raise ValueError(
'img_fill_val must be float or tuple with 3 elements.')
assert np.all([0 <= val <= 255 for val in img_fill_val]), \
'all elements of img_fill_val should between range [0,255].'
assert axis in ('x', 'y'), \
'Translate should be alone with x-axis or y-axis.'
assert isinstance(max_shear_magnitude, float), \
'max_shear_magnitude should be type float.'
assert 0. <= max_shear_magnitude < 1., \
'Defaultly max_shear_magnitude should be in range [0,1).'
self.level = level
self.magnitude = level_to_value(level, max_shear_magnitude)
self.img_fill_val = img_fill_val
self.seg_ignore_label = seg_ignore_label
self.prob = prob
self.axis = axis
self.max_shear_magnitude = max_shear_magnitude

@staticmethod
def _get_shear_matrix(magnitude, axis='x'):
"""Generates the transformation matrix for Shear augmentation."""
if axis == 'x':
shear_matrix = np.float32([[1, magnitude, 0], [0, 1, 0]])
elif axis == 'y':
shear_matrix = np.float32([[1, 0, 0], [magnitude, 1, 0]])
return shear_matrix

def _shear_img(self,
results,
shear_matrix,
fill_val,
flags=cv2.INTER_NEAREST,
borderMode=cv2.BORDER_CONSTANT):
"""Shear the image.

Args:
results (dict): Result dict from loading pipeline.
shear_matrix (np.ndarray): Shear matrix with shape (2, 3).
fill_val (int | float | tuple): Value used in case of a constant
border. Same in ``cv2.warpAffine``.
flags: Interpolation methods used in ``cv2.warpAffine``.
borderMode: Pixel extrapolation method used in ``cv2.warpAffine``.
"""
for key in results.get('img_fields', ['img']):
img = results[key]
# dsize should be in type tuple[int] with format (w, h)
results[key] = cv2.warpAffine(
img,
shear_matrix,
dsize=img.shape[:2][::-1],
borderValue=fill_val,
flags=flags,
borderMode=borderMode).astype(img.dtype)

def _shear_bboxes(self, results, magnitude):
"""Shear the bboxes."""
h, w, c = results['img_shape']
if self.axis == 'x':
shear_matrix = np.stack([[1, magnitude],
[0, 1]]).astype(np.float32) # [2, 2]
else:
shear_matrix = np.stack([[1, 0], [magnitude,
1]]).astype(np.float32)
for key in results.get('bbox_fields', []):
min_x, min_y, max_x, max_y = np.split(
results[key], results[key].shape[-1], axis=-1)
coordinates = np.stack([[min_x, min_y], [max_x, min_y],
[min_x, max_y],
[max_x, max_y]]) # [4, 2, nb_box, 1]
coordinates = coordinates[..., 0].transpose(
(2, 1, 0)).astype(np.float32) # [nb_box, 2, 4]
new_coords = np.matmul(shear_matrix[None, :, :],
coordinates) # [nb_box, 2, 4]
min_x, min_y = np.min(
v-qjqs marked this conversation as resolved.
Show resolved Hide resolved
new_coords[:, 0, :], axis=-1), np.min(
new_coords[:, 1, :], axis=-1)
max_x, max_y = np.max(
new_coords[:, 0, :], axis=-1), np.max(
new_coords[:, 1, :], axis=-1)
min_x, min_y = np.clip(
min_x, a_min=0, a_max=w), np.clip(
min_y, a_min=0, a_max=h)
max_x, max_y = np.clip(
max_x, a_min=min_x, a_max=w), np.clip(
max_y, a_min=min_y, a_max=h)
results[key] = np.stack([min_x, min_y, max_x, max_y],
axis=-1).astype(results[key].dtype)

def _shear_masks(self, results, shear_matrix, fill_val=0):
"""Shear the masks."""
h, w, c = results['img_shape']
for key in results.get('mask_fields', []):
if isinstance(results[key], PolygonMasks):
raise NotImplementedError
v-qjqs marked this conversation as resolved.
Show resolved Hide resolved
elif isinstance(results[key], BitmapMasks):
masks = results[key]
results[key] = masks.shear(
shear_matrix, out_shape=(h, w), fill_val=fill_val)

def _shear_seg(self,
results,
shear_matrix,
fill_val=255,
flags=cv2.INTER_NEAREST,
borderMode=cv2.BORDER_CONSTANT):
"""Shear the segmentation maps."""
for key in results.get('seg_fields', []):
seg = results[key]
results[key] = cv2.warpAffine(
seg,
shear_matrix,
dsize=seg.shape[:2][::-1],
borderValue=fill_val,
flags=flags,
borderMode=borderMode).astype(seg.dtype)

def _filter_invalid(self, results, min_bbox_size=0):
"""Filter bboxes and corresponding masks too small after shear
augmentation."""
# The key correspondence from bboxes to labels and masks.
bbox2label, bbox2mask, _ = bbox2fields()
for key in results.get('bbox_fields', []):
bbox_w = results[key][:, 2] - results[key][:, 0]
bbox_h = results[key][:, 3] - results[key][:, 1]
valid_inds = (bbox_w > min_bbox_size) & (bbox_h > min_bbox_size)
results[key] = results[key][valid_inds]
# label fields. e.g. gt_labels and gt_labels_ignore
label_key = bbox2label.get(key)
if label_key in results:
results[label_key] = results[label_key][valid_inds]
# mask fields, e.g. gt_masks and gt_masks_ignore
mask_key = bbox2mask.get(key)
if mask_key in results:
results[mask_key] = results[mask_key][valid_inds]

def __call__(self, results, random_negative_prob=0.5):
"""Call function to shear images, bounding boxes, masks and semantic
segmentation maps.

Args:
results (dict): Result dict from loading pipeline.
random_negative_prob (float): The probability that turns the
offset negative.

Returns:
dict: Sheared results.
"""
if np.random.rand() > self.prob:
return results
magnitude = random_negative(self.magnitude, random_negative_prob)
# the shear matrix used for transformation
shear_matrix = self._get_shear_matrix(magnitude, self.axis)
self._shear_img(results, shear_matrix, fill_val=self.img_fill_val)
self._shear_bboxes(results, magnitude)
# fill_val set to 0 for background of mask.
self._shear_masks(results, shear_matrix, fill_val=0)
self._shear_seg(results, shear_matrix, fill_val=self.seg_ignore_label)
self._filter_invalid(results)
return results

def __repr__(self):
repr_str = self.__class__.__name__
repr_str += f'(level={self.level}, '
repr_str += f'img_fill_val={self.img_fill_val}, '
repr_str += f'seg_ignore_label={self.seg_ignore_label}, '
repr_str += f'prob={self.prob}, '
repr_str += f'axis={self.axis}, '
repr_str += f'max_shear_magnitude={self.max_shear_magnitude})'
return repr_str
Loading