In [1]:
import os
import sys
import random

import numpy as np
import cv2

#chose the image size here. Delete the compressed dataset if it has already been generated.
#This software will not override an existing compressed dataset.
image_size = 256

In [2]:
#Data generator. Can read a dataset of images and masks and can export compressed images.

class DataGen():
    def __init__(self, ids, path, path2, batch_size=8, image_size=128):
        self.ids = ids
        self.path = path
        self.path2 = path2
        self.batch_size = batch_size
        self.image_size = image_size
        
    def __load__(self, id_name):
        ## Path
        
        image_path = os.path.join(self.path, id_name)

        mask_path = os.path.join(self.path2, id_name[:-4] + "_segmentation.png")
        
        ## Reading Images (resize and add padding)
        image = cv2.imread(image_path)
        h, w = image.shape[:2]
        scale_f = self.image_size/max(h,w)
        image = cv2.resize(image, (int(round(w*scale_f)), int(round(h*scale_f))))
        new_size = image.shape[:2]
        delta_w = self.image_size - new_size[1]
        delta_h = self.image_size - new_size[0]
        top, bottom = delta_h//2, delta_h-(delta_h//2)
        left, right = delta_w//2, delta_w-(delta_w//2)
        color = [0, 0, 0]
        image = cv2.copyMakeBorder(image, top, bottom, left, right, cv2.BORDER_CONSTANT,value=color)
        
        ## Reading Masks
        mask = cv2.imread(mask_path, -1)
        
        h, w = mask.shape[:2]
        scale_f = self.image_size/max(h,w)
        mask = cv2.resize(mask, (int(round(w*scale_f)), int(round(h*scale_f))))
        new_size = mask.shape[:2]
        delta_w = self.image_size - new_size[1]
        delta_h = self.image_size - new_size[0]
        top, bottom = delta_h//2, delta_h-(delta_h//2)
        left, right = delta_w//2, delta_w-(delta_w//2)
        color = [0, 0, 0]
        mask = cv2.copyMakeBorder(mask, top, bottom, left, right, cv2.BORDER_CONSTANT,value=color)
        
        mask = np.expand_dims(mask, axis=-1)
        mask = np.maximum(mask, mask)

        
        ## Normalizaing 
        image = image/255.0
        mask = mask/255.0
        
        return image, mask
    
    def __saveitem__(self, id_name, location, datatype):
        _img, _mask = self.__load__(id_name)
        cv2.imwrite('dataset/'+location+'/Segmentazione/'+datatype+'/Input/' + id_name, _img*255)
        cv2.imwrite('dataset/'+location+'/Segmentazione/'+datatype+'/GroundTruth/' + id_name[:-4] + "_segmentation.png", _mask*255)


In [3]:
# Select dataset
sel_dat = "Dataset_Nei"

train_path = "dataset/" + sel_dat + "/Segmentazione/ISIC2018_Task1-2_Training_Input"
gt_path = "dataset/" + sel_dat + "/Segmentazione/ISIC2018_Task1_Training_GroundTruth"

# Training Ids
img_ids = next(os.walk(train_path))
img_ids = img_ids[2:][0]
gt_ids = next(os.walk(gt_path))
gt_ids = gt_ids[2:][0]
random.shuffle(img_ids)

img_data_size = len(img_ids)
#img_data_size = 100 #Total images to be used size. Comment to use the whole dataset
img_ids = img_ids[:img_data_size]

## Validation Data Size
val_data_size = 24*10
train_data_size = len(img_ids)-val_data_size

valid_ids = img_ids[:val_data_size]
train_ids = img_ids[val_data_size:][:train_data_size]

## Test Data Size
test_data_size = 12*10
train_data_size = len(train_ids)-test_data_size

test_ids = train_ids[:test_data_size]
train_ids = train_ids[test_data_size:][:train_data_size]

print("# of training IDs: " + str(len(train_ids)))
print("# of validation IDs: " + str(len(valid_ids)))
print("# of test IDs: " + str(len(test_ids)))
print("Total # of IDs: " + str(len(img_ids)))

gen_train = DataGen(train_ids, train_path, gt_path, batch_size=1, image_size=image_size)
gen_valid = DataGen(valid_ids, train_path, gt_path, batch_size=1, image_size=image_size)
gen_test = DataGen(test_ids, train_path, gt_path, batch_size=1, image_size=image_size)

# of training IDs: 2234
# of validation IDs: 240
# of test IDs: 120
Total # of IDs: 2594


In [4]:
#create the directory tree if it doesn't exist

directory = "dataset/Dataset_compressed_" + str(image_size)

try:
    os.stat(directory)
except:
    os.mkdir(directory)
    os.mkdir(directory + "/Segmentazione")
    os.mkdir(directory + "/Segmentazione/Training")
    os.mkdir(directory + "/Segmentazione/Training/Input")
    os.mkdir(directory + "/Segmentazione/Training/GroundTruth")
    os.mkdir(directory + "/Segmentazione/Validation")
    os.mkdir(directory + "/Segmentazione/Validation/Input")
    os.mkdir(directory + "/Segmentazione/Validation/GroundTruth")
    os.mkdir(directory + "/Segmentazione/Test")
    os.mkdir(directory + "/Segmentazione/Test/Input")
    os.mkdir(directory + "/Segmentazione/Test/GroundTruth")

    #saves a the dataset in the /Dataset_compressed_[size] folder. Size depending on image_size
    for i in range(0,len(train_ids)):
        if train_ids[i][:-4]+"_segmentation.png" in gt_ids:
            gen_train.__saveitem__(train_ids[i], "Dataset_compressed_" + str(image_size), "Training")
        else:
            text = "could not save " + train_ids[i]
            #print(train_ids[i])

    for i in range(0,len(valid_ids)):
        if valid_ids[i][:-4]+"_segmentation.png" in gt_ids:
            gen_valid.__saveitem__(valid_ids[i], "Dataset_compressed_" + str(image_size), "Validation")
        else:
            text = "could not save " + valid_ids[i]
            #print(valid_ids[i])

    for i in range(0,len(test_ids)):
        if test_ids[i][:-4]+"_segmentation.png" in gt_ids:
            gen_valid.__saveitem__(test_ids[i], "Dataset_compressed_" + str(image_size), "Test")
        else:
            text = "could not save " + test_ids[i]
            #print(valid_ids[i])
