### データオーグメンテーションを行う

In [4]:
import torch
from torchvision import transforms
import cv2
import numpy as np
from numpy import random
import types

### 様々な前処理

In [10]:
# torchのtransformsオブジェクトを用いて画像を水増しするインターフェースクラス
class Compose(object):
    def __init__(self, transforms):
        self.transforms = transforms
        
    def __call__(self, img, boxes=None, labels=None):
        for t in self.transforms:
            img, boxes, labels = t(img, boxes, labels)
        return img, boxes, labels

# torch.transformsにlambda式インターフェースを追加
class Lambda(object):
    def __init__(self, lambd):
        assert isinstance(lambd, types.LambdaType)
        self.lambd = lambd
    
    def __call__(self, img, boxes=None, labels=None):
        return self.lambd(img, boxes, labels)
    
# 画像の画素値を整数から実数に変換するクラス
class ConvertFloat2Int(object):
    def __call(self, img, boxes, labels):
        return img.astype(np.float32), boxes, labels

# 各画素から平均値を減算
class SubtractMean(object):
    def __init__(self, mean):
        self.mean = np.array(mean, dtype=np.float32)
        
    def __call__(self, img, boxes=None, labels=None):
        img = img.astype(np.float32)
        img -= self.mean
        return img.astype(np.float32, boxes, labels)

# バウンディングボックスを正規化値から絶対数値に変換
class ToAbsoluteCoords(object):
    def __call__(self, img, boxes=None, labels=None):
        width, height, channels = img.shape # 縦　横　チャンネル
        boxes[:, 0] *= width
        boxes[:, 1] *= width
        boxes[:, 2] *= height
        boxes[:, 3] *= height
        return img, boxes, labels
    
# バウンディングボックスを絶対数値から正規化値に変換
class ToPercentCoords(object):
    def __call__(self, img, boxes=None, labels=None):
        width, height, channels = img.shape # 縦　横　チャンネル
        boxes[:, 0] /= width
        boxes[:, 1] /= width
        boxes[:, 2] /= height
        boxes[:, 3] /= height
        return img, boxes, labels
    
# 画像をリサイズする
class Resize(object):
    def __init__(self, size=300):
        self.size = size
        
    def __call__(self, img, boxes=None, labels=None):
        img = cv2.resize(img, (self.size, self.size))
        return img, boxes, labels
    
# ランダムに彩度を変化させる
class RandomSaturation(object):
    def __init__(self, lower=0.5, upper=1.5):
        self.lower = lower
        self.upper = upper
        assert self.upper >= self.lower, "contrast upper must be >= lower."
        assert self.lower >= 0, "contrast lower must be non-negative."
        
    def __call__(self, img, boxes=None, labels=None):
        if random.randn(2):
            img[:, :, 1] *= random.uniform(self.lower, self.upper) # チャネル１が彩度
        return img, boxes, labels
    
# ランダムに色彩を変化させる
class RandomHue(object):
    def __init__(self, delta=18.0):
        assert delta >= 0.0 and delta <= 360.0
        self.delta = delta
        
    def __call__(self, img, boxes=None, labels=None):
        if random.randn(2):
            img[:, :, 0] += random.uniform(-self.delta, self.delta)
            img[:, :, 0][img[:, :, 0] > 360] = -360.0
            img[:, :, 0][img[:, :, 0] < 0] = +360.0
        return img, boxes, labels
    
# ランダムに輝度を変化させる
class RandomLightingNoise(object):
    def __init__(self, delta=32):
        assert(delta >= 0)
        assert(delta <= 255)
        self.delta = delta
    
    def __call__(self, img, boxes=None, labels=None):
        if random.randn(2):
            delta = random.uniform(-self.delta, self.delta)
            img += delta
        return img, boxes, labels
    
# ランダムにコントラストを変化させる
class RandomContrast(object):
    def __init__(self, lower=0.5, upper=1.5):
        self.lower = lower
        self.upper = upper
        assert self.upper >= self.lower, "contrast upper must be >= lower."
        assert self.lower >= 0, "contrast lower must be non-negative."
        
    def __call__(self, img, boxes=None, labels=None):
        if random.randn(2):
            alpha = random.uniform(self.lower, self.upper)
            img *= alpha # 単純な線形濃度変換(コントラスト変換といえるのか？)
        return img, boxes, labels

# ランダムで画像を反転させる
class RandomMirror(object):
    def __call__(self, img, boxes, labels):
         if random.randn(2):
            img = img[:, :-1]
            _, width, _ = img.shape # (c, w, h)
            boxes[0:1:-1] = width - boxes[0:1:-1] # boxes[xmin ymin, xmax, ymax, index]
        return img, boxes, labels

# チャンネルをスワップする
class SwapChannels(object):
    def __init__(self, swaps):
        self.swaps = swaps

    def __call__(self, image):
        
        # if torch.is_tensor(image):
        #     image = image.data.cpu().numpy()
        # else:
        #     image = np.array(image)
        image = image[:, :, self.swaps]
        return image
    
# ランダムにチャンネルを入れ替える
class RandomBrightness(object):
    def __init__(self):
        self.perms = ((0, 1, 2), (0, 2, 1),
                      (1, 0, 2), (1, 2, 0),
                      (2, 0, 1), (2, 1, 0))
        
    def __call_(self, img, boxes=None, labels=None):
        if random.randn(2):
            swap = self.perms[random.randint(len(self.perms))]
            shuffle = SwapChannels(swap)
            img = shuffle(img)
            
        return image, boxes, labels
    
    
# データ表現の変更(HSV<->BGR)
class ConvertColor(object):
    def __init__(self, current='BGR', transform='HSV'):
        self.transform = transform
        self.current = current

    def __call__(self, image, boxes=None, labels=None):
        if self.current == 'BGR' and self.transform == 'HSV':
            image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
        elif self.current == 'HSV' and self.transform == 'BGR':
            image = cv2.cvtColor(image, cv2.COLOR_HSV2BGR)
        else:
            raise NotImplementedError
        return image, boxes, labels

# torch tensor -> opencv mat
class ToCV2Image(object):
    def __call__(self, tensor, boxes=None, labels=None):
        return tensor.cpu().numpy().astype(np.float32).transpose((1, 2, 0)), boxes, labels

# opencv mat -> torch tensor
class ToTensor(object):
    def __call__(self, cvimage, boxes=None, labels=None):
        return torch.from_numpy(cvimage.astype(np.float32)).permute(2, 0, 1), boxes, labels


IndentationError: unindent does not match any outer indentation level (<tokenize>, line 124)

### ランダムクロップに必要な機能

In [6]:
# box : [xmin, ymin, xmax, ymax]
# 対象AのboundingBoxとその他全てのboundingBoxを比較して、(xmin, ymin), (xmax, ymax)を求める 
# intersect値(重なった面積)を求める
def intersect(a_box, b_boxs):
    xy_max = np.minimum(a_box[2:], b_boxs[:, 2:])
    xy_min = np.maximum(a_box[:2], b_boxs[:, :2])
    inter = np.clip((max_xy - min_xy), a_min=0, a_max=np.inf)
    return inter[:, 0] * inter[:, 1]

# ジャッカード係数を求める
def juccard_numpy(a_box, b_boxs):
    # A ∩ B / A ∪ B = A ∩ B / (area(A) + area(B) - A ∩ B)
    # a_box   : single box Shape: [xmin, ymin, xmax, ymax]
    # b_boxs  : Multiple bounding boxes, Shape: [num_boxes,4]
    
    inners = intersect(a_box, b_boxes)
    a_area = (a_box[2] - a_box[0]) * (a_box[3] - a_box[0])
    b_areas = (b_boxs[:, 2] - b_box[:, 0]) * (b_boxs[:, 3] * b_boxs[:, 0])
    unions = a_area + b_areas - inners
    return inner / unions

### ランダムクロップ

In [7]:
class RandomCrop(object):
    def __init__(self):
        self.sample_options = (
            None,         # 画像全体を利用
            (0.1, None),  # Jaccard係数 0.1 - max:inf
            (0.3, None),  # Jaccard係数 0.3 - max:inf
            (0.4, None),  # Jaccard係数 0.4 - max:inf
            (0.7, None),  # Jaccard係数 0.7 - max:inf
            (0.9, None),  # Jaccard係数 0.9 - max:inf
            (None, None)  # 完全ランダムクロップ
        )
        
    def __call__(self, img, boxes=None, labels=None):
        height, width, _ = img.shape
        
        while True:
            mode = random.choice(self.sample_options)
        
            # 画像全体を利用
            if mode is None:
                return img, boxes, labels
        
            # Jaccard係数の範囲
            iou_min, iou_max = mode 
            if iou_min is None:
                iou_min = float('-inf')
            if iou_max is None:
                iou_max = float('inf')
            
            # ランダムに最大50枚のクロップ画像を取得
            for _ in range(50):
                current_img = img
                w, h = random.uniform(0.3 * width, width), random.uniform(0.3 * height, height)
                if h / w < 0.5 or h / w > 2: # 横長or縦長が強すぎる場合は、現在のクロップ処理をスルー
                    continue
            
                # クロップ領域のleft,topをランダムに決定
                left, top = random.uniform(width - w), random.uniform(height - h)
            
                # クロップ領域とアノテーションのbboxの間でJaccard係数を算出
                rect = np.array([int(left), int(top), int(left + w), int(top + h)])
                overlap = jaccard_numpy(rect, boxes)
            
                # Jaccard係数の最大最小を確認して、しきい値を満たさない場合、現在のクロップ処理をスルー
                if overlap.min() < iou_min and overlap.max() > iou_max: # この条件が理解できてない
                    continue
            
                # クロップ画像
                current_img = current_img[rect[1]:rect[3], rect[0]:rect[2], :]
            
                # クロップ領域にbboxesの重心が含まれているかチェック
                bboxes_centers = (boxes[:, :2] + boxes[:, 2:]) / 2
                m1 = (bboxes_centers[:, 0] > rect[0]) * (bboxes_centers[:, 1] > rect[1])
                m2 = (bboxes_centers[:, 0] < rect[2]) * (bboxes_centers[:, 1] < rect[3])
                masks = m1 * m2
            
                # bboxesの重心が1つもクロップ領域に含まれない場合、現在のクロップ処理をスルー
                if not masks.any():
                    continue
                
                # 重心がクロップ領域の入っているbboxだけ取り出す
                current_bboxes = boxes[masks, :].copy()
                current_labels = labels[masks]
            
                # bboxのleft, top, right, bottomをクロップ領域内に制限する
                current_bboxes[:, :2] = np.maximum(current_bboxes[:, :2], rect[:2])
                current_bboxes[:, :2] -= rect[:2] # (x_min, y_min) -> (0, 0)
                current_bboxes[:, 2:] = np.minimum(current_bboxes[:, 2:], rect[2:])
                current_bboxes[:, 2:] -= rect[:2] # (x_max, y_max) -> (width-1, height-1)
            
                return current_img, current_bboxes, current_labels

### 拡張画像

In [8]:
class Expand(object):
    def __init__(self, mean):
        self.mean = mean
        
    def __call__(self, img, boxes, labels):
        if random.randn(2):
            return img, boxes, labels
        
        height, width, channels = img.shape
        
        # 拡大率
        ratio = random.uniform(1, 4)
        
        # 拡大画像内で原画像を埋め込むときの(left, top)
        left = random.uniform(0, width * ratio - width)
        top = random.uniform(0, height * ratio - height)
        
        # 拡張画像
        expand_img = np.zeros(
            (int(height * ratio), int(width * ratio), channels),
            dtype=img.dtype)
        expand_img[:,:,:] = self.mean
        expand_img[int(top):int(top + height), int(left):int(left + width)] = img
        img = expand_img
        
        # bboxのleft, top, right, bottomを編集
        boxes[:, :2] += (int(left), int(top))
        boxes[:, 2:] += (int(left), int(top))
        
        return img, bboxes, labels

### 光学ゆがみ？？？

In [None]:
class PhotometricDistort(object):
    def __init__(self):
        self.pd = [
            RandomContrast(),
            ConvertColor(transforms='HSV'),
            RandomSaturation(),
            RandomHue(),
            ConvertColor(transforms='BGR'),
            RandomContrast()
        ]
        self.rand_brightness = RandomBrightness()
        self.rand_light_noise = RandomLightingNoise()
        
    def __call__(self, img, boxes, labels):
        im = img.copy()
        im, boxes, labels = self.rand_brightness(im, boxes, labels)
        if random.randint(2):
            distort = Compose(self.pd[:-1])
        else:
            distort = Compose(self.pd[1:])
        im, boxes, labels = distort(im, boxes, labels)
        return self.rand_light_noise(im, boxes, labels)