# Instructions

* Copy the provided **wire_set** folder into current directory
* Make sure The **wire_set** folder has the following tree:

        ├── val
        │   ├── images
        │   │   ├── xx.jpg
        ..................
        │   │   ├── xx.jpg
        │   │   └── xx.jpg
        │   └── masks
        │       ├── xx.jpg
        ..................
        │       ├── xx.jpg
        │       └── xx.jpg
        └── train
            ├── images
            │   ├── xx.jpg
            ...............
            │   └── xx.jpg
            └── masks
                ├── xx.jpg
                ...........
                └── xx.jpg


In [1]:
import os
batch_size  = 100   
image_size  = 256 
train_src   = os.path.join(os.getcwd(),'wire_set','train') 
eval_src    = os.path.join(os.getcwd(),'wire_set','val')
data_num    = 1024
DATA_IDEN   = 0

# Data Directory Creation

In [2]:
def create_dir(base_dir,ext_name):
    '''
        creates a new dir with ext_name in base_dir and returns the path
    '''
    new_dir=os.path.join(base_dir,ext_name)
    if not os.path.exists(new_dir):
        os.mkdir(new_dir)
    return new_dir

# dataset Dir
ds_dir=create_dir(os.getcwd(),'DataSet')
# train
train_dir  =   create_dir(ds_dir,'Train')
train_img  =   create_dir(train_dir,'images')
train_mask =   create_dir(train_dir,'masks')
# eval
eval_dir   =   create_dir(ds_dir,'Eval')
eval_img   =   create_dir(eval_dir,'images')
eval_mask  =   create_dir(eval_dir,'masks')
# record dir
tf_dir=create_dir(ds_dir,'WireDTF')
tf_train=create_dir(tf_dir,'Train')
tf_eval=create_dir(tf_dir,'Eval')

# Augmentation Definition

In [3]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import cv2
from PIL import Image as imgop
import random
from sklearn.utils import shuffle
import progressbar
from glob import glob
import imageio

from albumentations import (Blur, Compose, HorizontalFlip, HueSaturationValue,
                            IAAEmboss, IAASharpen, JpegCompression, OneOf,
                            RandomBrightness, RandomBrightnessContrast,
                            RandomContrast, RandomCrop, RandomGamma,
                            RandomRotate90, RGBShift, ShiftScaleRotate,
                            Transpose, VerticalFlip, ElasticTransform, GridDistortion, OpticalDistortion)
 
import albumentations as albu
from albumentations import Resize

def aug():
    return Compose([
        HorizontalFlip(p=0.5),
        VerticalFlip(p=0.5),
        RandomRotate90(p=0.5),
        Transpose(p=0.5),
        ShiftScaleRotate(shift_limit=0.01, scale_limit=0.04, rotate_limit=0, p=0.25),
        RandomBrightnessContrast(p=0.5),
        RandomGamma(p=0.25),
        IAAEmboss(p=0.25),
        Blur(p=0.01, blur_limit = 3),
        OneOf([
            ElasticTransform(p=0.5, alpha=120, sigma=120 * 0.05, alpha_affine=120 * 0.03),
            GridDistortion(p=0.5),
            OpticalDistortion(p=1, distort_limit=2, shift_limit=0.5)                  
        ], p=0.8)
    ], p = 1)


# Creating Augmented data--> (100X)

In [4]:

def read_image_mask(image_path, mask_path,image_size=256):
    # read and resize the image and return as np.uint8
    x=np.array((imgop.open(image_path)).resize((image_size,image_size))).astype(np.uint8)
    # read ground truth
    y=cv2.imread(mask_path,0)
    y= cv2.resize(y,(image_size,image_size), interpolation = cv2.INTER_AREA)
    # Otsu's thresholding after Gaussian filtering
    y   = cv2.GaussianBlur(y,(5,5),0)
    _,y = cv2.threshold(y,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)
    y=np.array(y).astype(np.uint8)
    return x,y

# data creation function
def create_data(srcp,imgp,maskp):
    global DATA_IDEN
    for _path in progressbar.progressbar(glob(os.path.join(srcp,'images','*.*'))):
        _mpath=str(_path).replace('images','masks')
        x_sample, y_sample=read_image_mask(_path,_mpath)
        # augment for desired data number
        for _ in range(batch_size):
            augmented = aug()(image=x_sample, mask=y_sample)
            img = augmented['image']
            tgt = augmented['mask'].reshape(image_size,image_size)
            img=img.astype(np.uint8)
            tgt=tgt.astype(np.uint8)
            imageio.imsave(os.path.join(imgp,'{}.png'.format(DATA_IDEN)),img)
            imageio.imsave(os.path.join(maskp,'{}.png'.format(DATA_IDEN)),tgt)
            DATA_IDEN+=1

# Manual Augmentation Functions (100X)

In [5]:
# flip function
def FlipData(img,gt,fid):
    '''
    TAKES NUMPY ARRAY
    '''
    if fid==0:# ORIGINAL
        x=np.array(img)
        y=np.array(gt)
    elif fid==1:# Left Right Flip
        x=np.array(imgop.fromarray(img).transpose(imgop.FLIP_LEFT_RIGHT))
        y=np.array(imgop.fromarray(gt).transpose(imgop.FLIP_LEFT_RIGHT))
    elif fid==2:# Up Down Flip
        x=np.array(imgop.fromarray(img).transpose(imgop.FLIP_TOP_BOTTOM))
        y=np.array(imgop.fromarray(gt).transpose(imgop.FLIP_TOP_BOTTOM))
    else: # Mirror Flip
        x=imgop.fromarray(img).transpose(imgop.FLIP_TOP_BOTTOM)
        x=np.array(x.transpose(imgop.FLIP_LEFT_RIGHT))
        y=imgop.fromarray(gt).transpose(imgop.FLIP_TOP_BOTTOM)
        y=np.array(y.transpose(imgop.FLIP_LEFT_RIGHT))
    return x,y

# save transposed data
def saveTransposedData(img,gt,imgp,maskp,comb_flag=False):
    '''
    TAKES NUMPY ARRAY
    '''
    global DATA_IDEN
    for fid in range(4):
        x,y=FlipData(img,gt,fid)
        if comb_flag:
            rot_angle=random.randint(0,90)
            x=np.array(imgop.fromarray(x).rotate(rot_angle))
            y=np.array(imgop.fromarray(y).rotate(rot_angle))
        fname='{}.png'.format(DATA_IDEN)
        imageio.imsave(os.path.join(imgp,fname),x)
        imageio.imsave(os.path.join(maskp,fname),y)
        DATA_IDEN+=1
        
# create rotated and cropped images
def createCropAug(srcp,imgp,maskp):
    for img_path in progressbar.progressbar(glob(os.path.join(srcp,'images','*.*'))):
        # gt paths
        gt_path=str(img_path).replace("images","masks")
        # read
        x,y=read_image_mask(img_path,gt_path)
        # make IMG,GT
        IMG=imgop.fromarray(x)
        GT=imgop.fromarray(y)
        _height,_width = image_size,image_size
        # cropped data
        for pxv in [0,_width//2,'AC']:
            for pxl in [0,_height//2,'AC']:
                if (pxv!='AC' and pxl!='AC'):
                    left    =   pxv
                    right   =   pxv+_width//2
                    top     =   pxl
                    bottom  =   pxl+_height//2
                    bbox    =   (left,top,right,bottom)
                    _IMG    =   IMG.crop(bbox).resize((image_size,image_size))
                    _GT     =   GT.crop(bbox).resize((image_size,image_size))
                
                elif (pxv=='AC' and pxl!='AC'): 
                    continue
                elif (pxl=='AC' and pxv!='AC'):
                    continue
                elif (pxv=='AC' and pxl=='AC'):
                    _GT=GT
                    _IMG=IMG
                else:
                    continue
                # Create Rotations
                for rot_angle in range(0,30,5): #  selected completely at random
                    rot_img =   _IMG.rotate(rot_angle)
                    rot_gt  =   _GT.rotate(rot_angle)
                    # save with flips
                    saveTransposedData(np.array(rot_img),np.array(rot_gt),imgp,maskp)
      

# Create Combination of 4 images as new data

In [6]:
# memory efficient random combination generator
def random_combination(iterable, r):
    "Random selection from itertools.combinations(iterable, r)"
    pool = tuple(iterable)
    n = len(pool)
    indices = sorted(random.sample(range(n), r))
    return tuple(pool[i] for i in indices)

# combination data creator
def createDatafromComb(comb,img_paths,_dpath):
        '''
            image collage from 4 unique images that works as a completely new image
        '''
        # crop boxes
        _dim=image_size//2
        bbox=[(0   ,   0,      _dim,_dim),
              (0   ,_dim,      _dim,image_size),
              (_dim,   0,image_size,_dim),
              (_dim,_dim,image_size,image_size)]
        # holders
        X=np.zeros((image_size,image_size,3),dtype=np.uint8)
        Y=np.zeros((image_size,image_size),dtype=np.uint8)
        # read data
        for i in range(4):
            _ipath=img_paths[comb[i]]
            _mpath=str(img_paths[comb[i]]).replace('images','masks')
            x,y=read_image_mask(_ipath,_mpath)
            x=np.array(imgop.fromarray(x).crop(bbox[i]))
            y=np.array(imgop.fromarray(y).crop(bbox[i]))            
            X[bbox[i][0]:bbox[i][2],bbox[i][1]:bbox[i][3]]=(x*255).astype(np.uint8)
            Y[bbox[i][0]:bbox[i][2],bbox[i][1]:bbox[i][3]]=(y*255).astype(np.uint8)
        
        X=(X*255).astype(np.uint8)
        Y=(Y*255).astype(np.uint8)
        # saving
        saveTransposedData(X,Y,os.path.join(_dpath,'images'),os.path.join(_dpath,'masks'),comb_flag=True)  

# comb data function wrapper
def createCombData(srcp,_dpath,mode):
        # train=80*200=16000 ---> target =20480
        # eval =10*200=2000  ---> target =2048
        if mode=='train':
            nb_comb=(20480- 80*200)//4
        elif mode=='eval':
            nb_comb=(2048- 10*200)//4
        img_paths=[_path for _path in glob(os.path.join(srcp,'images','*.*'))]
        random.shuffle(img_paths)
        vals=[i for i in range(len(img_paths))]
        for _ in progressbar.progressbar(range(nb_comb)):
            comb=random_combination(vals,4) #--> nC4
            createDatafromComb(comb,img_paths,_dpath)

# TFRecords For GCS

In [7]:
import tensorflow as tf 
tf.__version__
def _bytes_feature(value):
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
def _int64_feature(value):
      return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
def _float_feature(value):
      return tf.train.Feature(float_list=tf.train.FloatList(value=[value]))


def to_tfrecord(image_paths,save_dir,r_num):
    tfrecord_name='{}.tfrecord'.format(r_num)
    tfrecord_path=os.path.join(save_dir,tfrecord_name)
    print(tfrecord_path) 
    with tf.io.TFRecordWriter(tfrecord_path) as writer:    
        for image_path in progressbar.progressbar(image_paths):
            target_path=str(image_path).replace('images','masks')
            with(open(image_path,'rb')) as fid:
                image_png_bytes=fid.read()
            with(open(target_path,'rb')) as fid:
                target_png_bytes=fid.read()
            data ={ 'image':_bytes_feature(image_png_bytes),
                    'target':_bytes_feature(target_png_bytes)
            }
            features=tf.train.Features(feature=data)
            example= tf.train.Example(features=features)
            serialized=example.SerializeToString()
            writer.write(serialized)


def genTFRecords(data_path,mode_dir):
    data_dir=os.path.join(data_path,'images')
    __paths=[os.path.join(data_dir,_file) 
             for _file in os.listdir(data_dir) 
             if os.path.isfile(os.path.join(data_dir,_file))]
    
    random.shuffle(__paths)
    for i in range(0,len(__paths),data_num):
        image_paths= __paths[i:i+data_num]
        random.shuffle(image_paths)        
        r_num=i // data_num
        if len(image_paths)==data_num:
            to_tfrecord(image_paths,mode_dir,r_num)




# Create TFRecords to upload in GCS bucket --> COLAB TPU (SUPER FAST) 
# ALSO FOR GPU TRAINING (TIME CONSUMING)
* BUCKET: **tfalldata**
* Folder: **WireDTF**

# Create  Data 

In [8]:
# train Data
create_data(train_src,train_img,train_mask)
createCropAug(train_src,train_img,train_mask)
createCombData(train_src,train_dir,'train')
genTFRecords(train_dir,tf_train)

# eval
create_data(eval_src,eval_img,eval_mask)
createCropAug(eval_src,eval_img,eval_mask)
createCombData(eval_src,eval_dir,'eval')
genTFRecords(eval_dir,tf_eval)

100% (80 of 80) |########################| Elapsed Time: 0:12:11 Time:  0:12:11
100% (80 of 80) |########################| Elapsed Time: 0:18:02 Time:  0:18:02
100% (1120 of 1120) |####################| Elapsed Time: 0:11:59 Time:  0:11:59


/media/ansary/DriveData/UPWORK/WireDetection/pyWireDetect/DataSet/WireDTF/Train/0.tfrecord


100% (1024 of 1024) |####################| Elapsed Time: 0:00:54 Time:  0:00:54
N/A% (0 of 1024) |                       | Elapsed Time: 0:00:00 ETA:  --:--:--

/media/ansary/DriveData/UPWORK/WireDetection/pyWireDetect/DataSet/WireDTF/Train/1.tfrecord


100% (1024 of 1024) |####################| Elapsed Time: 0:00:50 Time:  0:00:50
N/A% (0 of 1024) |                       | Elapsed Time: 0:00:00 ETA:  --:--:--

/media/ansary/DriveData/UPWORK/WireDetection/pyWireDetect/DataSet/WireDTF/Train/2.tfrecord


100% (1024 of 1024) |####################| Elapsed Time: 0:00:38 Time:  0:00:38
N/A% (0 of 1024) |                       | Elapsed Time: 0:00:00 ETA:  --:--:--

/media/ansary/DriveData/UPWORK/WireDetection/pyWireDetect/DataSet/WireDTF/Train/3.tfrecord


100% (1024 of 1024) |####################| Elapsed Time: 0:00:35 Time:  0:00:35
  0% (1 of 1024) |                       | Elapsed Time: 0:00:00 ETA:   0:01:46

/media/ansary/DriveData/UPWORK/WireDetection/pyWireDetect/DataSet/WireDTF/Train/4.tfrecord


100% (1024 of 1024) |####################| Elapsed Time: 0:00:40 Time:  0:00:40
  0% (3 of 1024) |                       | Elapsed Time: 0:00:00 ETA:   0:00:37

/media/ansary/DriveData/UPWORK/WireDetection/pyWireDetect/DataSet/WireDTF/Train/5.tfrecord


100% (1024 of 1024) |####################| Elapsed Time: 0:00:34 Time:  0:00:34
N/A% (0 of 1024) |                       | Elapsed Time: 0:00:00 ETA:  --:--:--

/media/ansary/DriveData/UPWORK/WireDetection/pyWireDetect/DataSet/WireDTF/Train/6.tfrecord


100% (1024 of 1024) |####################| Elapsed Time: 0:00:34 Time:  0:00:34
N/A% (0 of 1024) |                       | Elapsed Time: 0:00:00 ETA:  --:--:--

/media/ansary/DriveData/UPWORK/WireDetection/pyWireDetect/DataSet/WireDTF/Train/7.tfrecord


100% (1024 of 1024) |####################| Elapsed Time: 0:01:12 Time:  0:01:12
  0% (7 of 1024) |                       | Elapsed Time: 0:00:00 ETA:   0:00:14

/media/ansary/DriveData/UPWORK/WireDetection/pyWireDetect/DataSet/WireDTF/Train/8.tfrecord


100% (1024 of 1024) |####################| Elapsed Time: 0:00:38 Time:  0:00:38
  0% (6 of 1024) |                       | Elapsed Time: 0:00:00 ETA:   0:00:17

/media/ansary/DriveData/UPWORK/WireDetection/pyWireDetect/DataSet/WireDTF/Train/9.tfrecord


100% (1024 of 1024) |####################| Elapsed Time: 0:00:31 Time:  0:00:31
  0% (6 of 1024) |                       | Elapsed Time: 0:00:00 ETA:   0:00:17

/media/ansary/DriveData/UPWORK/WireDetection/pyWireDetect/DataSet/WireDTF/Train/10.tfrecord


100% (1024 of 1024) |####################| Elapsed Time: 0:00:36 Time:  0:00:36
  0% (6 of 1024) |                       | Elapsed Time: 0:00:00 ETA:   0:00:17

/media/ansary/DriveData/UPWORK/WireDetection/pyWireDetect/DataSet/WireDTF/Train/11.tfrecord


100% (1024 of 1024) |####################| Elapsed Time: 0:00:49 Time:  0:00:49
  1% (13 of 1024) |                      | Elapsed Time: 0:00:00 ETA:   0:00:12

/media/ansary/DriveData/UPWORK/WireDetection/pyWireDetect/DataSet/WireDTF/Train/12.tfrecord


100% (1024 of 1024) |####################| Elapsed Time: 0:00:36 Time:  0:00:36
  0% (1 of 1024) |                       | Elapsed Time: 0:00:00 ETA:   0:01:51

/media/ansary/DriveData/UPWORK/WireDetection/pyWireDetect/DataSet/WireDTF/Train/13.tfrecord


100% (1024 of 1024) |####################| Elapsed Time: 0:00:34 Time:  0:00:34
N/A% (0 of 1024) |                       | Elapsed Time: 0:00:00 ETA:  --:--:--

/media/ansary/DriveData/UPWORK/WireDetection/pyWireDetect/DataSet/WireDTF/Train/14.tfrecord


100% (1024 of 1024) |####################| Elapsed Time: 0:00:37 Time:  0:00:37
N/A% (0 of 1024) |                       | Elapsed Time: 0:00:00 ETA:  --:--:--

/media/ansary/DriveData/UPWORK/WireDetection/pyWireDetect/DataSet/WireDTF/Train/15.tfrecord


100% (1024 of 1024) |####################| Elapsed Time: 0:00:35 Time:  0:00:35
N/A% (0 of 1024) |                       | Elapsed Time: 0:00:00 ETA:  --:--:--

/media/ansary/DriveData/UPWORK/WireDetection/pyWireDetect/DataSet/WireDTF/Train/16.tfrecord


100% (1024 of 1024) |####################| Elapsed Time: 0:00:45 Time:  0:00:45
  0% (7 of 1024) |                       | Elapsed Time: 0:00:00 ETA:   0:00:16

/media/ansary/DriveData/UPWORK/WireDetection/pyWireDetect/DataSet/WireDTF/Train/17.tfrecord


100% (1024 of 1024) |####################| Elapsed Time: 0:01:05 Time:  0:01:05
  0% (4 of 1024) |                       | Elapsed Time: 0:00:00 ETA:   0:00:26

/media/ansary/DriveData/UPWORK/WireDetection/pyWireDetect/DataSet/WireDTF/Train/18.tfrecord


100% (1024 of 1024) |####################| Elapsed Time: 0:00:52 Time:  0:00:52
N/A% (0 of 1024) |                       | Elapsed Time: 0:00:00 ETA:  --:--:--

/media/ansary/DriveData/UPWORK/WireDetection/pyWireDetect/DataSet/WireDTF/Train/19.tfrecord


100% (1024 of 1024) |####################| Elapsed Time: 0:00:32 Time:  0:00:32
  1% (13 of 1024) |                      | Elapsed Time: 0:00:00 ETA:   0:00:14

/media/ansary/DriveData/UPWORK/WireDetection/pyWireDetect/DataSet/WireDTF/Train/20.tfrecord


100% (1024 of 1024) |####################| Elapsed Time: 0:00:37 Time:  0:00:37
100% (10 of 10) |########################| Elapsed Time: 0:01:52 Time:  0:01:52
100% (10 of 10) |########################| Elapsed Time: 0:03:00 Time:  0:03:00
100% (12 of 12) |########################| Elapsed Time: 0:00:06 Time:  0:00:06


/media/ansary/DriveData/UPWORK/WireDetection/pyWireDetect/DataSet/WireDTF/Eval/0.tfrecord


100% (1024 of 1024) |####################| Elapsed Time: 0:00:00 Time:  0:00:00
 30% (312 of 1024) |######               | Elapsed Time: 0:00:00 ETA:   0:00:00

/media/ansary/DriveData/UPWORK/WireDetection/pyWireDetect/DataSet/WireDTF/Eval/1.tfrecord


100% (1024 of 1024) |####################| Elapsed Time: 0:00:02 Time:  0:00:02
