Skip to content
Permalink
Branch: master
Find file Copy path
Find file Copy path
1 contributor

Users who have contributed to this file

392 lines (348 sloc) 15.2 KB
#-------------------------------------------------------------------------------
# Author: Lukasz Janyst <lukasz@jany.st>
# Date: 18.09.2017
#-------------------------------------------------------------------------------
# This file is part of SSD-TensorFlow.
#
# SSD-TensorFlow is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# SSD-TensorFlow is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with SSD-Tensorflow. If not, see <http://www.gnu.org/licenses/>.
#-------------------------------------------------------------------------------
import cv2
import random
import numpy as np
from ssdutils import get_anchors_for_preset, get_preset_by_name, anchors2array
from ssdutils import box2array, compute_overlap, compute_location, anchors2array
from utils import Size, Sample, Point, Box, abs2prop, prop2abs
from math import sqrt
#-------------------------------------------------------------------------------
class Transform:
def __init__(self, **kwargs):
for arg, val in kwargs.items():
setattr(self, arg, val)
self.initialized = False
#-------------------------------------------------------------------------------
class ImageLoaderTransform(Transform):
"""
Load and image from the file specified in the Sample object
"""
def __call__(self, data, label, gt):
return cv2.imread(gt.filename), label, gt
#-------------------------------------------------------------------------------
def process_overlap(overlap, box, anchor, matches, num_classes, vec):
if overlap.idx in matches and matches[overlap.idx] >= overlap.score:
return
matches[overlap.idx] = overlap.score
vec[overlap.idx, 0:num_classes+1] = 0
vec[overlap.idx, box.labelid] = 1
vec[overlap.idx, num_classes+1:] = compute_location(box, anchor)
#-------------------------------------------------------------------------------
class LabelCreatorTransform(Transform):
"""
Create a label vector out of a ground trut sample
Parameters: preset, num_classes
"""
#---------------------------------------------------------------------------
def initialize(self):
self.anchors = get_anchors_for_preset(self.preset)
self.vheight = len(self.anchors)
self.vwidth = self.num_classes+5 # background class + location offsets
self.img_size = Size(1000, 1000)
self.anchors_arr = anchors2array(self.anchors, self.img_size)
self.initialized = True
#---------------------------------------------------------------------------
def __call__(self, data, label, gt):
#-----------------------------------------------------------------------
# Initialize the data vector and other variables
#-----------------------------------------------------------------------
if not self.initialized:
self.initialize()
vec = np.zeros((self.vheight, self.vwidth), dtype=np.float32)
#-----------------------------------------------------------------------
# For every box compute the best match and all the matches above 0.5
# Jaccard overlap
#-----------------------------------------------------------------------
overlaps = {}
for box in gt.boxes:
box_arr = box2array(box, self.img_size)
overlaps[box] = compute_overlap(box_arr, self.anchors_arr, 0.5)
#-----------------------------------------------------------------------
# Set up the training vector resolving conflicts in favor of a better
# match
#-----------------------------------------------------------------------
vec[:, self.num_classes] = 1 # background class
vec[:, self.num_classes+1] = 0 # x offset
vec[:, self.num_classes+2] = 0 # y offset
vec[:, self.num_classes+3] = 0 # log width scale
vec[:, self.num_classes+4] = 0 # log height scale
matches = {}
for box in gt.boxes:
for overlap in overlaps[box].good:
anchor = self.anchors[overlap.idx]
process_overlap(overlap, box, anchor, matches, self.num_classes, vec)
matches = {}
for box in gt.boxes:
overlap = overlaps[box].best
if not overlap:
continue
anchor = self.anchors[overlap.idx]
process_overlap(overlap, box, anchor, matches, self.num_classes, vec)
return data, vec, gt
#-------------------------------------------------------------------------------
class ResizeTransform(Transform):
"""
Resize an image
Parameters: width, height, algorithms
"""
def __call__(self, data, label, gt):
alg = random.choice(self.algorithms)
resized = cv2.resize(data, (self.width, self.height), interpolation=alg)
return resized, label, gt
#-------------------------------------------------------------------------------
class RandomTransform(Transform):
"""
Call another transform with a given probability
Parameters: prob, transform
"""
def __call__(self, data, label, gt):
p = random.uniform(0, 1)
if p < self.prob:
return self.transform(data, label, gt)
return data, label, gt
#-------------------------------------------------------------------------------
class ComposeTransform(Transform):
"""
Call a bunch of transforms serially
Parameters: transforms
"""
def __call__(self, data, label, gt):
args = (data, label, gt)
for t in self.transforms:
args = t(*args)
return args
#-------------------------------------------------------------------------------
class TransformPickerTransform(Transform):
"""
Call a randomly chosen transform from the list
Parameters: transforms
"""
def __call__(self, data, label, gt):
pick = random.randint(0, len(self.transforms)-1)
return self.transforms[pick](data, label, gt)
#-------------------------------------------------------------------------------
class BrightnessTransform(Transform):
"""
Transform brightness
Parameters: delta
"""
def __call__(self, data, label, gt):
data = data.astype(np.float32)
delta = random.randint(-self.delta, self.delta)
data += delta
data[data>255] = 255
data[data<0] = 0
data = data.astype(np.uint8)
return data, label, gt
#-------------------------------------------------------------------------------
class ContrastTransform(Transform):
"""
Transform contrast
Parameters: lower, upper
"""
def __call__(self, data, label, gt):
data = data.astype(np.float32)
delta = random.uniform(self.lower, self.upper)
data *= delta
data[data>255] = 255
data[data<0] = 0
data = data.astype(np.uint8)
return data, label, gt
#-------------------------------------------------------------------------------
class HueTransform(Transform):
"""
Transform hue
Parameters: delta
"""
def __call__(self, data, label, gt):
data = cv2.cvtColor(data, cv2.COLOR_BGR2HSV)
data = data.astype(np.float32)
delta = random.randint(-self.delta, self.delta)
data[0] += delta
data[0][data[0]>180] -= 180
data[0][data[0]<0] +=180
data = data.astype(np.uint8)
data = cv2.cvtColor(data, cv2.COLOR_HSV2BGR)
return data, label, gt
#-------------------------------------------------------------------------------
class SaturationTransform(Transform):
"""
Transform hue
Parameters: lower, upper
"""
def __call__(self, data, label, gt):
data = cv2.cvtColor(data, cv2.COLOR_BGR2HSV)
data = data.astype(np.float32)
delta = random.uniform(self.lower, self.upper)
data[1] *= delta
data[1][data[1]>255] = 255
data[1][data[1]<0] = 0
data = data.astype(np.uint8)
data = cv2.cvtColor(data, cv2.COLOR_HSV2BGR)
return data, label, gt
#-------------------------------------------------------------------------------
class ReorderChannelsTransform(Transform):
"""
Reorder Image Channels
"""
def __call__(self, data, label, gt):
channels = [0, 1, 2]
random.shuffle(channels)
return data[:, :,channels], label, gt
#-------------------------------------------------------------------------------
def transform_box(box, orig_size, new_size, h_off, w_off):
#---------------------------------------------------------------------------
# Compute the new coordinates of the box
#---------------------------------------------------------------------------
xmin, xmax, ymin, ymax = prop2abs(box.center, box.size, orig_size)
xmin += w_off
xmax += w_off
ymin += h_off
ymax += h_off
#---------------------------------------------------------------------------
# Check if the center falls within the image
#---------------------------------------------------------------------------
width = xmax - xmin
height = ymax - ymin
new_cx = xmin + int(width/2)
new_cy = ymin + int(height/2)
if new_cx < 0 or new_cx >= new_size.w:
return None
if new_cy < 0 or new_cy >= new_size.h:
return None
center, size = abs2prop(xmin, xmax, ymin, ymax, new_size)
return Box(box.label, box.labelid, center, size)
#-------------------------------------------------------------------------------
def transform_gt(gt, new_size, h_off, w_off):
boxes = []
for box in gt.boxes:
box = transform_box(box, gt.imgsize, new_size, h_off, w_off)
if box is None:
continue
boxes.append(box)
return Sample(gt.filename, boxes, new_size)
#-------------------------------------------------------------------------------
class ExpandTransform(Transform):
"""
Expand the image and fill the empty space with the mean value
Parameters: max_ratio, mean_value
"""
def __call__(self, data, label, gt):
#-----------------------------------------------------------------------
# Calculate sizes and offsets
#-----------------------------------------------------------------------
ratio = random.uniform(1, self.max_ratio)
orig_size = gt.imgsize
new_size = Size(int(orig_size.w*ratio), int(orig_size.h*ratio))
h_off = random.randint(0, new_size.h-orig_size.h)
w_off = random.randint(0, new_size.w-orig_size.w)
#-----------------------------------------------------------------------
# Create the new image and place the input image in it
#-----------------------------------------------------------------------
img = np.zeros((new_size.h, new_size.w, 3))
img[:, :] = np.array(self.mean_value)
img[h_off:h_off+orig_size.h, w_off:w_off+orig_size.w, :] = data
#-----------------------------------------------------------------------
# Transform the ground truth
#-----------------------------------------------------------------------
gt = transform_gt(gt, new_size, h_off, w_off)
return img, label, gt
#-------------------------------------------------------------------------------
class SamplerTransform(Transform):
"""
Sample a fraction of the image according to given parameters
Params: min_scale, max_scale, min_aspect_ratio, max_aspect_ratio,
min_jaccard_overlap
"""
def __call__(self, data, label, gt):
#-----------------------------------------------------------------------
# Check whether to sample or not
#-----------------------------------------------------------------------
if not self.sample:
return data, label, gt
#-----------------------------------------------------------------------
# Retry sampling a couple of times
#-----------------------------------------------------------------------
source_boxes = anchors2array(gt.boxes, gt.imgsize)
box = None
box_arr = None
for _ in range(self.max_trials):
#-------------------------------------------------------------------
# Sample a bounding box
#-------------------------------------------------------------------
scale = random.uniform(self.min_scale, self.max_scale)
aspect_ratio = random.uniform(self.min_aspect_ratio,
self.max_aspect_ratio)
# make sure width and height will not be larger than 1
aspect_ratio = max(aspect_ratio, scale**2)
aspect_ratio = min(aspect_ratio, 1/(scale**2))
width = scale*sqrt(aspect_ratio)
height = scale/sqrt(aspect_ratio)
cx = 0.5*width + random.uniform(0, 1-width)
cy = 0.5*height + random.uniform(0, 1-height)
center = Point(cx, cy)
size = Size(width, height)
#-------------------------------------------------------------------
# Check if the box satisfies the jaccard overlap constraint
#-------------------------------------------------------------------
box_arr = np.array(prop2abs(center, size, gt.imgsize))
overlap = compute_overlap(box_arr, source_boxes, 0)
if overlap.best and overlap.best.score >= self.min_jaccard_overlap:
box = Box(None, None, center, size)
break
if box is None:
return None
#-----------------------------------------------------------------------
# Crop the box and adjust the ground truth
#-----------------------------------------------------------------------
new_size = Size(box_arr[1]-box_arr[0], box_arr[3]-box_arr[2])
w_off = -box_arr[0]
h_off = -box_arr[2]
data = data[box_arr[2]:box_arr[3], box_arr[0]:box_arr[1]]
gt = transform_gt(gt, new_size, h_off, w_off)
return data, label, gt
#-------------------------------------------------------------------------------
class SamplePickerTransform(Transform):
"""
Run a bunch of sample transforms and return one of the produced samples
Parameters: samplers
"""
def __call__(self, data, label, gt):
samples = []
for sampler in self.samplers:
sample = sampler(data, label, gt)
if sample is not None:
samples.append(sample)
return random.choice(samples)
#-------------------------------------------------------------------------------
class HorizontalFlipTransform(Transform):
"""
Horizontally flip the image
"""
def __call__(self, data, label, gt):
data = cv2.flip(data, 1)
boxes = []
for box in gt.boxes:
center = Point(1-box.center.x, box.center.y)
box = Box(box.label, box.labelid, center, box.size)
boxes.append(box)
gt = Sample(gt.filename, boxes, gt.imgsize)
return data, label, gt
You can’t perform that action at this time.