# DataSets
* **MICC_F2000**
* **MICC_F220**
* **MICC_F600**

In [1]:
# MICC_F2000
MICC_F2000_PATH='/media/ansary/DriveData/RESEARCH/CMFD/DATA/MICC-F2000/'
# MICC_F220
MICC_F220_PATH='/media/ansary/DriveData/RESEARCH/CMFD/DATA/MICC-F220/'
# MICC_F600
MICC_F600_PATH='/media/ansary/DriveData/RESEARCH/CMFD/DATA/MICC_F600/'
# CoMoFoD
CoMoFoD_PATH='/media/ansary/DriveData/RESEARCH/CMFD/DATA/CoMoFoD/'
# Common Params
SAVE_PATH='/media/ansary/DriveData/RESEARCH/CMFD/DATA/'
IMG_DIM=256


# Image And Target Creation

In [2]:
import os
from dataset import MICC_F2000,MICC_F220,MICC_F600,CoMoFoD
# datasets
DS=[CoMoFoD,MICC_F600,MICC_F220,MICC_F2000]
PATHS=[CoMoFoD_PATH,MICC_F600_PATH,MICC_F220_PATH,MICC_F2000_PATH]

for dpath,obj in zip(PATHS,DS):
    ds=obj(dpath,SAVE_PATH,IMG_DIM)
    ds.prepare()

DS_DIR=os.path.join(SAVE_PATH,'DataSets')

[32mInitializing:CoMoFoD[0m


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


[32mInitializing:MICC_F600[0m


HBox(children=(FloatProgress(value=0.0, max=160.0), HTML(value='')))


[32mInitializing:MICC_F220[0m


HBox(children=(FloatProgress(value=0.0, max=110.0), HTML(value='')))


[32mInitializing:MICC_F2000[0m


HBox(children=(FloatProgress(value=0.0, max=1298.0), HTML(value='')))




# Test Train Data

In [3]:
import shutil
import cv2
from tqdm.notebook import tqdm
from glob import glob
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

Test_Data_Idens=['MICC_F220']
Test_dir=os.path.join(DS_DIR,'Test')

Train_Data_Idens=['CoMoFoD','MICC_F600','MICC_F2000']
Train_dir=os.path.join(DS_DIR,'Train')

def create_dir(_path):
    if not os.path.exists(_path):
        os.mkdir(_path)

def merge(Data_Idens,dest_dir,mode):
    
    mode_dir=os.path.join(DS_DIR,mode)
    img_dir=os.path.join(mode_dir,'images')
    tgt_dir=os.path.join(mode_dir,'targets')
    
    create_dir(mode_dir)
    create_dir(img_dir)
    create_dir(tgt_dir)
    
    data_count=0
    
    for data_iden in Data_Idens:
        
        print('Merging Data:',data_iden,'  for:',mode)
    
        for img_path in tqdm(glob(os.path.join(DS_DIR,data_iden,'images','*.*'))):
            msk_path=str(img_path).replace('images','targets')
            # img
            shutil.copy(img_path,os.path.join(img_dir,"{}.png".format(data_count)))
            # tgt
            shutil.copy(msk_path,os.path.join(tgt_dir,"{}.png".format(data_count)))
            data_count+=1


merge(Test_Data_Idens,Test_dir,'Test')
merge(Train_Data_Idens,Train_dir,'Train')            


        

Merging Data: MICC_F220   for: Test


HBox(children=(FloatProgress(value=0.0, max=110.0), HTML(value='')))


Merging Data: CoMoFoD   for: Train


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


Merging Data: MICC_F600   for: Train


HBox(children=(FloatProgress(value=0.0, max=160.0), HTML(value='')))


Merging Data: MICC_F2000   for: Train


HBox(children=(FloatProgress(value=0.0, max=672.0), HTML(value='')))




# Augmentation Wrappers

In [4]:
from albumentations import (Blur, Compose, HorizontalFlip, HueSaturationValue,
                            IAAEmboss, IAASharpen, JpegCompression, OneOf,
                            RandomBrightness, RandomBrightnessContrast,
                            RandomContrast, RandomCrop, RandomGamma,IAAAdditiveGaussianNoise,
                            Rotate, RGBShift, ShiftScaleRotate,RandomGridShuffle,
                            Transpose, VerticalFlip, ElasticTransform, GridDistortion, OpticalDistortion)
 
def aug():
    return Compose([
        HorizontalFlip(p=0.5),
        VerticalFlip(p=0.5),
        Rotate(always_apply=True, p=1),
        Transpose(p=0.5),
        RandomBrightnessContrast(p=0.5),
        IAAAdditiveGaussianNoise(p=0.5),
        RandomGamma(p=0.5),
        IAAEmboss(p=0.5),
        RGBShift(p=0.5),
        Blur(p=0.5, blur_limit = 3)], p = 1)

# Augmentation Data Functions

In [5]:
import imageio 
NB_EVAL=1024
NB_TRAIN=10240
TOTAL_DATA=NB_EVAL+NB_TRAIN
DATA_COUNT=len([img_path for img_path in glob(os.path.join(Train_dir,'images','*.*'))])
NB_NEEDED=TOTAL_DATA-DATA_COUNT
GEN_FACTOR=(NB_NEEDED//DATA_COUNT)+1

def aug_data(x_sample,y_sample):
    augmented = aug()(image=x_sample, mask=y_sample)
    img = augmented['image']
    tgt = augmented['mask']
    img=img.astype(np.uint8)
    tgt=tgt.astype(np.uint8)
    return img,tgt


print('Augmenting Training Data')

for img_path in tqdm(glob(os.path.join(Train_dir,'images','*.*'))):
    msk_path=str(img_path).replace('images','targets')
    # samples
    x_sample=imageio.imread(img_path)
    y_sample=imageio.imread(msk_path)
    for _ in range(GEN_FACTOR):
        img,tgt=aug_data(x_sample,y_sample)
        imageio.imsave(os.path.join(Train_dir,'images',"{}.png".format(DATA_COUNT)),img)
        imageio.imsave(os.path.join(Train_dir,'targets',"{}.png".format(DATA_COUNT)),tgt)
        DATA_COUNT+=1
   
        

Augmenting Training Data


HBox(children=(FloatProgress(value=0.0, max=1032.0), HTML(value='')))




# TFRecords For GCS

In [6]:
import tensorflow as tf 
tf.__version__
import random
# record dir
tf_dir=os.path.join(DS_DIR,'CMFDTF')
tf_train=os.path.join(tf_dir,'Train')
tf_eval=os.path.join(tf_dir,'Eval')
tf_test=os.path.join(tf_dir,'Test')
create_dir(tf_dir)
create_dir(tf_train)
create_dir(tf_eval)
create_dir(tf_test)



def _bytes_feature(value):
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
def _int64_feature(value):
      return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
def _float_feature(value):
      return tf.train.Feature(float_list=tf.train.FloatList(value=[value]))

data_num    = 1024

def to_tfrecord(image_paths,save_dir,r_num):
    tfrecord_name='{}.tfrecord'.format(r_num)
    tfrecord_path=os.path.join(save_dir,tfrecord_name)
    print(tfrecord_path) 
    with tf.io.TFRecordWriter(tfrecord_path) as writer:    
        for image_path in tqdm(image_paths):
            target_path=str(image_path).replace('images','targets')
            with(open(image_path,'rb')) as fid:
                image_png_bytes=fid.read()
            with(open(target_path,'rb')) as fid:
                target_png_bytes=fid.read()
            data ={ 'image':_bytes_feature(image_png_bytes),
                    'target':_bytes_feature(target_png_bytes)
            }
            features=tf.train.Features(feature=data)
            example= tf.train.Example(features=features)
            serialized=example.SerializeToString()
            writer.write(serialized)


def genTFRecords(_paths,mode_dir):
    random.shuffle(_paths)
    for i in range(0,len(_paths),data_num):
        image_paths= _paths[i:i+data_num]
        random.shuffle(image_paths)        
        r_num=i // data_num
        if len(image_paths)==data_num:
            to_tfrecord(image_paths,mode_dir,r_num)

# Test Data Addition

In [7]:
data_paths=[img_path for img_path in tqdm(glob(os.path.join(Train_dir,'images','*.*')))]
random.shuffle(data_paths)
eval_paths=data_paths[:NB_EVAL]
train_paths=data_paths[NB_EVAL:NB_EVAL+NB_TRAIN]
test_num=128
# test data addition
test_count=len([img_path for img_path in glob(os.path.join(Test_dir,'images','*.*'))])
test_paths=data_paths[NB_EVAL+NB_TRAIN:]
for img_path in tqdm(test_paths):
    msk_path=str(img_path).replace('images','targets')
    # image
    shutil.move(img_path,os.path.join(Test_dir,'images','{}.png'.format(test_count)))
    #target
    shutil.move(msk_path,os.path.join(Test_dir,'targets','{}.png'.format(test_count)))
    test_count+=1
    if test_count==test_num:
        break
    
test_paths=[img_path for img_path in glob(os.path.join(Test_dir,'images','*.*'))]



HBox(children=(FloatProgress(value=0.0, max=11352.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=88.0), HTML(value='')))




In [8]:
genTFRecords(train_paths,tf_train)
genTFRecords(eval_paths,tf_eval)
data_num=test_num
genTFRecords(test_paths,tf_test)

/media/ansary/DriveData/RESEARCH/CMFD/DATA/DataSets/CMFDTF/Train/0.tfrecord


HBox(children=(FloatProgress(value=0.0, max=1024.0), HTML(value='')))


/media/ansary/DriveData/RESEARCH/CMFD/DATA/DataSets/CMFDTF/Train/1.tfrecord


HBox(children=(FloatProgress(value=0.0, max=1024.0), HTML(value='')))


/media/ansary/DriveData/RESEARCH/CMFD/DATA/DataSets/CMFDTF/Train/2.tfrecord


HBox(children=(FloatProgress(value=0.0, max=1024.0), HTML(value='')))


/media/ansary/DriveData/RESEARCH/CMFD/DATA/DataSets/CMFDTF/Train/3.tfrecord


HBox(children=(FloatProgress(value=0.0, max=1024.0), HTML(value='')))


/media/ansary/DriveData/RESEARCH/CMFD/DATA/DataSets/CMFDTF/Train/4.tfrecord


HBox(children=(FloatProgress(value=0.0, max=1024.0), HTML(value='')))


/media/ansary/DriveData/RESEARCH/CMFD/DATA/DataSets/CMFDTF/Train/5.tfrecord


HBox(children=(FloatProgress(value=0.0, max=1024.0), HTML(value='')))


/media/ansary/DriveData/RESEARCH/CMFD/DATA/DataSets/CMFDTF/Train/6.tfrecord


HBox(children=(FloatProgress(value=0.0, max=1024.0), HTML(value='')))


/media/ansary/DriveData/RESEARCH/CMFD/DATA/DataSets/CMFDTF/Train/7.tfrecord


HBox(children=(FloatProgress(value=0.0, max=1024.0), HTML(value='')))


/media/ansary/DriveData/RESEARCH/CMFD/DATA/DataSets/CMFDTF/Train/8.tfrecord


HBox(children=(FloatProgress(value=0.0, max=1024.0), HTML(value='')))


/media/ansary/DriveData/RESEARCH/CMFD/DATA/DataSets/CMFDTF/Train/9.tfrecord


HBox(children=(FloatProgress(value=0.0, max=1024.0), HTML(value='')))


/media/ansary/DriveData/RESEARCH/CMFD/DATA/DataSets/CMFDTF/Eval/0.tfrecord


HBox(children=(FloatProgress(value=0.0, max=1024.0), HTML(value='')))


/media/ansary/DriveData/RESEARCH/CMFD/DATA/DataSets/CMFDTF/Test/0.tfrecord


HBox(children=(FloatProgress(value=0.0, max=128.0), HTML(value='')))


