In [185]:
import json
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
from os.path import join
import imgaug as ia
import imgaug.augmenters as iaa
from imgaug.augmentables.bbs import BoundingBox, BoundingBoxesOnImage

def get_dict(json_file):
  # open json label file
  with open(json_file) as f:
    # load json into dict
    dict_file = json.load(f)
  return dict_file

def gen_labelfile(file_out, d):
  with open(file_out, 'w') as f:
    json.dump(d, f, sort_keys=True, indent=2)
  print(f'Save labels to: {file_out}')

def show_im_from_file(file_im):
  img = cv2.imread(file_im)
  grey = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
  implt = cv2.cvtColor(grey, cv2.COLOR_GRAY2BGR)
  plt.figure(figsize=(10,10)),plt.imshow(implt),plt.title('original'),plt.show()

def show_im(im, title):
  plt.figure(figsize=(10,10)),plt.imshow(im),plt.title(title),plt.show()

def show_2im(im1, im2, title1, title2):
  fig = plt.figure(figsize=(15,15))
  ax = fig.add_subplot(2,2,1)
  ax.imshow(im1)
  ax = fig.add_subplot(2,2,2)
  ax.imshow(im2)
  fig.show()

def conv_grey(im):
  grey = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
  im = cv2.cvtColor(grey, cv2.COLOR_GRAY2BGR)
  return(im)

def denoise(im):
  grey = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
  denoise = cv2.fastNlMeansDenoising(grey, None, 11, 5, 19)
  ret, thresh = cv2.threshold(denoise,25,255,cv2.THRESH_TOZERO)
  im_denoise = cv2.cvtColor(thresh, cv2.COLOR_GRAY2BGR)
  return(im_denoise)

def cleaner(im):
    w_size = 20
    im_new = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
    shape = im.shape
    w = shape[1]
    for i, row in enumerate(im):
        for j, col in enumerate(row):
            m = im[i:i+w_size, j:j+w_size]
            s = m.size
            unique, counts = np.unique(m, return_counts=True)
            d = dict(zip(unique, counts))
            if 0 in d.keys():
                n_zeros = d[0]
            else:
                n_zeros = 0
            if n_zeros > s / 4:
                im_new[i:i+w_size, j:j+w_size] = 0
    im_dark = cv2.cvtColor(im_new, cv2.COLOR_GRAY2BGR)
    return(im_dark)

In [194]:
specials = ['hr']
dir_data = 'data/train_noise'
dir_aug = 'data/train_aug'
name_via = 'via_labels.json'
file_via = join(dir_data, name_via)
file_outvia = join(dir_aug, name_via)
if not os.path.exists(dir_aug):
    os.mkdir(dir_aug)
dict_via = get_dict(file_via)
dict_aug = {}

for n, key in enumerate(dict_via):
    via_file = dict_via[key]

    name_im = via_file['filename']
    file_im = join(dir_data, name_im)
    size_im = os.path.getsize(file_im)
    im = conv_grey(cv2.imread(file_im))
    regions = via_file['regions']
    fatt = via_file['file_attributes']
    size = via_file['size']
    bbs = []
    ratts = []
    high_multi = False
    if name_im.startswith('x_'):
        high_multi = True
    for r in regions:
        satt = r['shape_attributes']
        ratt = r['region_attributes']
        ratts.append(ratt)
        l = ratt['shape']
        if l in special:
            high_multi = True
        x = satt['x']
        y = satt['y']
        w = satt['width']
        h = satt['height']
        bb = BoundingBox(x1=x, y1=y, x2=x+w, y2=y+h, label=l)
        bbs.append(bb)
    bbs = BoundingBoxesOnImage(bbs, im.shape)

    seq = iaa.Sequential([
        iaa.Fliplr(0.5),
        iaa.Sometimes(0.5, iaa.GaussianBlur(sigma=(0, 0.1))),
        iaa.Multiply((0.98, 1.02), per_channel=0.5),
        iaa.Affine(
            rotate=(-15, 15),
            translate_percent={"x": (-0.05, 0.05), "y": (-0.025, 0.025)},
            shear=(-2, 2))
    ])

    im_denoise = denoise(im)

    if high_multi:
        multi = 100
    else:
        multi = 10

    dict_multi = {}
    for i in range(multi):
        name_aug, ext = name_im.split('.')
        name_aug = f'{name_aug}_aug{i:02}.{ext}' 
        file_aug = join(dir_aug, name_aug)
        im_aug, bbs_aug = seq(image=im_denoise, bounding_boxes=bbs)
        im_aug_grey = conv_grey(im_aug)
        bbs_aug.remove_out_of_image()
        arr_bb = bbs_aug.to_xyxy_array()
        regions_aug = []
        for i, bb in enumerate(arr_bb):
            x1 = int(bb[0])
            y1 = int(bb[1])
            x2 = int(bb[2])
            y2 = int(bb[3])
            satt = {
                'x': x1,
                'y': y1,
                'height': y2 - y1,
                'width': x2 - x1,
                'name': 'rect',
            }
            dict_bb = {
                'shape_attributes': satt,
                'region_attributes': ratts[i]
            }
            regions_aug.append(dict_bb)
        cv2.imwrite(file_aug, im_aug_grey)
        size_im_aug = os.path.getsize(file_aug)
        key_aug = name_aug + str(size_im_aug)
        d = {
            'file_attributes': fatt,
            'filename': name_aug,
            'size': size_im_aug,
            'regions': regions_aug
        }
        dict_aug[key_aug] = d
        #dict_multi[key_aug] = d
    #gen_labelfile(file_outvia + str(n), dict_multi)
    gen_labelfile(file_outvia, dict_aug)
    print(f'Processed: {n + 1} of {len(dict_via.keys())}')



Save labels to: data/train_aug/via_labels.json
Processed: 1 of 191
Save labels to: data/train_aug/via_labels.json
Processed: 2 of 191
Save labels to: data/train_aug/via_labels.json
Processed: 3 of 191
Save labels to: data/train_aug/via_labels.json
Processed: 4 of 191
Save labels to: data/train_aug/via_labels.json
Processed: 5 of 191
Save labels to: data/train_aug/via_labels.json
Processed: 6 of 191
Save labels to: data/train_aug/via_labels.json
Processed: 7 of 191
Save labels to: data/train_aug/via_labels.json
Processed: 8 of 191
Save labels to: data/train_aug/via_labels.json
Processed: 9 of 191
Save labels to: data/train_aug/via_labels.json
Processed: 10 of 191
Save labels to: data/train_aug/via_labels.json
Processed: 11 of 191
Save labels to: data/train_aug/via_labels.json
Processed: 12 of 191
Save labels to: data/train_aug/via_labels.json
Processed: 13 of 191
Save labels to: data/train_aug/via_labels.json
Processed: 14 of 191
Save labels to: data/train_aug/via_labels.json
Processed:

In [None]:
for idx, im in enumerate(ims):
    if idx == 0:
        title = 'original'
    else:
        title = 'augmented'
    im_bbs = bbss[idx].draw_on_image(im, size=2, color=[0, 255, 0])
    show_im(im_bbs, title)