### resample subimage(s): take it randomly 

In [None]:
from PIL import Image
import numpy as np
import os

def crop_resample(image_file, mask_file, outdir, outfile,
                  num_resample=1):
    """Crops a patch from a given image, and resample random patches (optional).

    Given a 2D image and binary mask image, crops the bounding box
    area surrounding the white pixel polygon in the mask, 
    both from the image and the mask.
    
    Enlarge the crop allowing a margin both for the width and the height.
    Enlarged width will be width*crop_enlarge_width, 
    enlarged height will be height*crop_enlarge_height.
    
    Resize the cropped patch to a given size. Do not resize if any one
    of the parameters is None.
    
    Resamples patches located on a randomly selected topleft pixel, 
    both from the image and the mask (optional). Do not resample if 
    num_resample=0.
    
    Args:
        image_file (str): file path to the input image.   
        mask_file (str): file path to the binary mask image.  
        outdir: store images directory
        outfile: write each stored image name and mask name into this file.
        num_resample (int): number of random resample patches from the image and the mask.  

    Returns:
        None: Saves resulting patches into outdir

    """  
    assert os.path.exists(image_file) and os.path.isfile(image_file)
    assert os.path.exists(mask_file) and os.path.isfile(mask_file)

    img  = Image.open(image_file)
    mask = Image.open(mask_file)
    
    # get the bounding box coordinates from the mask image.
    # upperleft_x, upperleft_y, bottomright_x, and bottomright_y
    bbox = mask.getbbox()
    
    # range for cropping scale
    b=1.3
    a=1.1
    crop_enlarge_size = (b - a) * np.random.random_sample() + a

    # enlarge the bounding box with respect to given enlarging params
    old_height = bbox[3]-bbox[1]
    
    # her iki boyut en az 480 piksel olacak sekilde genislikte kes
    
    if old_height <= 480:
        new_height = 480
    else:
        new_height = int(crop_enlarge_size*old_height)
        if new_height>img.height:
            new_height=img.height
        
        
    upperleft_y = bbox[1]-int((new_height-old_height)/2.0)
    upperleft_y = 0 if upperleft_y < 0 else upperleft_y
    bottomright_y = upperleft_y + new_height
    if bottomright_y > img.height:
        upperleft_y -= (bottomright_y-img.height)
        upperleft_y = 0 if upperleft_y < 0 else upperleft_y
        bottomright_y = img.height


    old_width = bbox[2]-bbox[0]
    
    if old_width <= 480:
        new_width = 480
    else:
        new_width = int(crop_enlarge_size*old_width)
        if new_width>img.width:
            new_width=img.width
        

    upperleft_x = bbox[0]-int((new_width-old_width)/2.0)
    upperleft_x = 0 if upperleft_x < 0 else upperleft_x
    bottomright_x = upperleft_x + new_width
    bottomright_x = img.width if bottomright_x > img.width else bottomright_x
 
    if bottomright_x > img.width:
        upperleft_x -= (bottomright_x-img.height)
        upperleft_x = 0 if upperleft_x < 0 else upperleft_x
        bottomright_x = img.width

    enlarged_bbox = (upperleft_x, upperleft_y, bottomright_x, bottomright_y)
    
    if bottomright_x-upperleft_x < 480 or bottomright_y-upperleft_y < 480:        
        print(os.path.join(image_file) + " " + str(bottomright_x-upperleft_x) + " " + str(bottomright_y-upperleft_y))
        input()
    
    # crop the enlarged area from the image
    cropped_img = img.crop(enlarged_bbox)
    cropped_mask = mask.crop(enlarged_bbox)
    
    image_filename, image_file_extension = os.path.splitext(os.path.basename(image_file))
    mask_filename, mask_file_extension = os.path.splitext(os.path.basename(mask_file))
    
    cropped_img.save(os.path.join(outdir, image_filename + image_file_extension))
    cropped_mask.save(os.path.join(outdir, mask_filename + mask_file_extension))
    
    print('training: ' + os.path.basename(image_file) + " " + os.path.basename(mask_file))
    print(os.path.basename(image_file) + " " + os.path.basename(mask_file), file=train_list_file)
    
    resample_cnt=0
    if num_resample > 0:
        
        # crop another random rectangle
        for i in range(num_resample):
            
            if (new_width>=img.width) or (new_height>=img.height):
                continue
                
            if (old_width*old_height) / (img.width*img.height) > 0.6:
                continue
            
#             # initialize
#             rand_bbox_final = enlarged_bbox
#             MAX = new_height*new_width
            
#             # try 10 times, and choose the bbox that contains the max amount of bground
#             for j in range(num_trials):
#                 rand_loc_y = np.random.randint(low=0, high=img.height-new_height, size=1)[0]             
#                 rand_loc_x = np.random.randint(low=0, high=img.width-new_width, size=1)[0]
#                 rand_bbox = (rand_loc_x, rand_loc_y, rand_loc_x+new_width-1, rand_loc_y+new_height-1)
#                 cropped_mask = mask.crop(rand_bbox)
#                 num_white_pix = np.count_nonzero(np.asarray(cropped_mask))
#                 if num_white_pix < MAX:
#                     MAX = num_white_pix
#                     rand_bbox_final = rand_bbox

            rand_loc_y = np.random.randint(low=0, high=img.height-new_height, size=1)[0] 
            rand_loc_x = np.random.randint(low=0, high=img.width-new_width, size=1)[0]
            rand_bbox = (rand_loc_x, rand_loc_y, rand_loc_x+new_width, rand_loc_y+new_height)
        
        
        
            cropped_img = img.crop(rand_bbox)
            cropped_mask = mask.crop(rand_bbox)
            cropped_img.save(os.path.join(outdir, image_filename + '_{:02d}'.format(i+1) + image_file_extension))
            cropped_mask.save(os.path.join(outdir, mask_filename + '_{:02d}'.format(i+1) + mask_file_extension))
            print('training: ' + image_filename + '_{:02d}'.format(i+1) + image_file_extension + " " + mask_filename + '_{:02d}'.format(i+1) + mask_file_extension)
            print(image_filename + '_{:02d}'.format(i+1) + image_file_extension + " " + mask_filename + '_{:02d}'.format(i+1) + mask_file_extension, file=train_list_file)
            resample_cnt += 1
    
    return resample_cnt
    

### resample subimage(s): take the bbox having max background pixels among 100 random candidates

In [1]:
from PIL import Image
import numpy as np
import os

def crop_resample(image_file, mask_file, outdir, outfile,
                  num_resample=1):
    """Crops a patch from a given image, and resample random patches (optional).

    Given a 2D image and binary mask image, crops the bounding box
    area surrounding the white pixel polygon in the mask, 
    both from the image and the mask.
    
    Enlarge the crop allowing a margin both for the width and the height.
    Enlarged width will be width*crop_enlarge_width, 
    enlarged height will be height*crop_enlarge_height.
    
    Resize the cropped patch to a given size. Do not resize if any one
    of the parameters is None.
    
    Resamples patches located on a randomly selected topleft pixel, 
    both from the image and the mask (optional). Do not resample if 
    num_resample=0.
    
    Args:
        image_file (str): file path to the input image.   
        mask_file (str): file path to the binary mask image.  
        outdir: store images directory
        outfile: write each stored image name and mask name into this file.
        num_resample (int): number of random resample patches from the image and the mask.  

    Returns:
        None: Saves resulting patches into outdir

    """  
    assert os.path.exists(image_file) and os.path.isfile(image_file)
    assert os.path.exists(mask_file) and os.path.isfile(mask_file)

    img  = Image.open(image_file)
    mask = Image.open(mask_file)
    
    # get the bounding box coordinates from the mask image.
    # upperleft_x, upperleft_y, bottomright_x, and bottomright_y
    bbox = mask.getbbox()
    
    # range for cropping scale
    b=1.3
    a=1.1
    crop_enlarge_size = (b - a) * np.random.random_sample() + a

    # enlarge the bounding box with respect to given enlarging params
    old_height = bbox[3]-bbox[1]
    
    # her iki boyut en az 480 piksel olacak sekilde genislikte kes
    
    if old_height <= 480:
        new_height = 480
    else:
        new_height = int(crop_enlarge_size*old_height)
        if new_height>img.height:
            new_height=img.height
        
        
    upperleft_y = bbox[1]-int((new_height-old_height)/2.0)
    upperleft_y = 0 if upperleft_y < 0 else upperleft_y
    bottomright_y = upperleft_y + new_height
    if bottomright_y > img.height:
        upperleft_y -= (bottomright_y-img.height)
        upperleft_y = 0 if upperleft_y < 0 else upperleft_y
        bottomright_y = img.height


    old_width = bbox[2]-bbox[0]
    
    if old_width <= 480:
        new_width = 480
    else:
        new_width = int(crop_enlarge_size*old_width)
        if new_width>img.width:
            new_width=img.width
        

    upperleft_x = bbox[0]-int((new_width-old_width)/2.0)
    upperleft_x = 0 if upperleft_x < 0 else upperleft_x
    bottomright_x = upperleft_x + new_width
    bottomright_x = img.width if bottomright_x > img.width else bottomright_x
 
    if bottomright_x > img.width:
        upperleft_x -= (bottomright_x-img.height)
        upperleft_x = 0 if upperleft_x < 0 else upperleft_x
        bottomright_x = img.width

    enlarged_bbox = (upperleft_x, upperleft_y, bottomright_x, bottomright_y)
    
    if bottomright_x-upperleft_x < 480 or bottomright_y-upperleft_y < 480:        
        print(os.path.join(image_file) + " " + str(bottomright_x-upperleft_x) + " " + str(bottomright_y-upperleft_y))
        input()
    
    # crop the enlarged area from the image
    cropped_img = img.crop(enlarged_bbox)
    cropped_mask = mask.crop(enlarged_bbox)
    
    image_filename, image_file_extension = os.path.splitext(os.path.basename(image_file))
    mask_filename, mask_file_extension = os.path.splitext(os.path.basename(mask_file))
    
    cropped_img.save(os.path.join(outdir, image_filename + image_file_extension))
    cropped_mask.save(os.path.join(outdir, mask_filename + mask_file_extension))
    
    print('training: ' + os.path.basename(image_file) + " " + os.path.basename(mask_file))
    print(os.path.basename(image_file) + " " + os.path.basename(mask_file), file=train_list_file)
    
    resample_cnt=0
    if num_resample > 0:
        
        # crop another random rectangle
        for i in range(num_resample):
            
            if (new_width>=img.width) or (new_height>=img.height):
                continue
                
            if (old_width*old_height) / (img.width*img.height) > 0.6:
                continue
            
            
            # initialize
            rand_bbox_final = enlarged_bbox
            MAX = new_height*new_width
            num_trials=100
            # try 10 times, and choose the bbox that contains the max amount of bground
            for j in range(num_trials):
                rand_loc_y = np.random.randint(low=0, high=img.height-new_height, size=1)[0]             
                rand_loc_x = np.random.randint(low=0, high=img.width-new_width, size=1)[0]
                rand_bbox = (rand_loc_x, rand_loc_y, rand_loc_x+new_width, rand_loc_y+new_height)
                cropped_mask = mask.crop(rand_bbox)
                num_white_pix = np.count_nonzero(np.asarray(cropped_mask))
                if num_white_pix < MAX:
                    MAX = num_white_pix
                    rand_bbox_final = rand_bbox       
    
            
            cropped_img = img.crop(rand_bbox_final)
            cropped_mask = mask.crop(rand_bbox_final)
            cropped_img.save(os.path.join(outdir, image_filename + '_{:02d}'.format(i+1) + image_file_extension))
            cropped_mask.save(os.path.join(outdir, mask_filename + '_{:02d}'.format(i+1) + mask_file_extension))
            print('training: ' + image_filename + '_{:02d}'.format(i+1) + image_file_extension + " " + mask_filename + '_{:02d}'.format(i+1) + mask_file_extension)
            print(image_filename + '_{:02d}'.format(i+1) + image_file_extension + " " + mask_filename + '_{:02d}'.format(i+1) + mask_file_extension, file=train_list_file)
            resample_cnt += 1
    
    return resample_cnt
    

In [2]:
import os
from shutil import copy
import errno

### ##################################################################### ###
### This cell performs cropping to only training samples                  ###
### Random 10% portions of the training set is left as validation set     ###
### Training and validation samples are put into training_out_dir_path    ###
### ##################################################################### ###

base_dir_path = os.path.normpath('C:/Users/kirta/Documents/research/data')

training_data_dirname = 'ISBI2016_ISIC_Part1_Training_Data'
training_data_dir_path = os.path.join(base_dir_path, training_data_dirname)

training_gt_dirname = 'ISBI2016_ISIC_Part1_Training_GroundTruth'
training_gt_dir_path = os.path.join(base_dir_path, training_gt_dirname)

training_out_dirname = 'ISBI2016_ISIC_Part1_Training_Data_v8_processed'
training_out_dir_path = os.path.join(base_dir_path, training_out_dirname)

try:
    os.makedirs(training_out_dir_path)
except OSError as e:
    if e.errno != errno.EEXIST:
        raise  # raises the error again


train_val_image_filenames=[]
for root, _, files in os.walk(training_data_dir_path):
    for name in files:
        train_val_image_filenames.append(name) 

train_val_image_filenames.sort()

train_val_mask_filenames=[]
for root, _, files in os.walk(training_gt_dir_path):
    for name in files:
        train_val_mask_filenames.append(name) 

train_val_mask_filenames.sort()


merged = list(zip(train_val_image_filenames, train_val_mask_filenames))
np.random.shuffle(merged)


# both cropping and resizing for training samples
train_list_filename = 'isbi2016_isic_part1_train_v8.list'
train_list_filepath = os.path.join(base_dir_path, train_list_filename)
train_list_file = open(train_list_filepath, 'w')

# no cropping or resizing for validation samples
val_list_filename = 'isbi2016_isic_part1_val_v8.list'
val_list_filepath = os.path.join(base_dir_path, val_list_filename)
validation_list_file = open(val_list_filepath, 'w')


num_val = int(0.1*len(train_val_image_filenames))
cnt=0
resample_cnt=0
for image_file, mask_file in merged:
    
    if cnt < num_val:
        print('validation: ' + image_file + " " + mask_file)
        print(image_file + " " + mask_file, file=validation_list_file)
        copy(os.path.join(training_data_dir_path, image_file), training_out_dir_path)
        copy(os.path.join(training_gt_dir_path, mask_file), training_out_dir_path)
    else:
        resample_cnt += crop_resample(os.path.join(training_data_dir_path, image_file), 
                      os.path.join(training_gt_dir_path, mask_file), 
                      training_out_dir_path, train_list_file)
        print('resample count: ' + str(resample_cnt))
        
    cnt += 1

train_list_file.close()
validation_list_file.close()
    

validation: ISIC_0000423.jpg ISIC_0000423_Segmentation.png
validation: ISIC_0002093.jpg ISIC_0002093_Segmentation.png
validation: ISIC_0009877.jpg ISIC_0009877_Segmentation.png
validation: ISIC_0010225.jpg ISIC_0010225_Segmentation.png
validation: ISIC_0001188.jpg ISIC_0001188_Segmentation.png
validation: ISIC_0000385.jpg ISIC_0000385_Segmentation.png
validation: ISIC_0009960.jpg ISIC_0009960_Segmentation.png
validation: ISIC_0003174.jpg ISIC_0003174_Segmentation.png
validation: ISIC_0000301.jpg ISIC_0000301_Segmentation.png
validation: ISIC_0010380.jpg ISIC_0010380_Segmentation.png
validation: ISIC_0000281.jpg ISIC_0000281_Segmentation.png
validation: ISIC_0010358.jpg ISIC_0010358_Segmentation.png
validation: ISIC_0000031.jpg ISIC_0000031_Segmentation.png
validation: ISIC_0010558.jpg ISIC_0010558_Segmentation.png
validation: ISIC_0011380.jpg ISIC_0011380_Segmentation.png
validation: ISIC_0006800.jpg ISIC_0006800_Segmentation.png
validation: ISIC_0010191.jpg ISIC_0010191_Segmentation.p

training: ISIC_0011296.jpg ISIC_0011296_Segmentation.png
training: ISIC_0011296_01.jpg ISIC_0011296_Segmentation_01.png
resample count: 18
training: ISIC_0000363.jpg ISIC_0000363_Segmentation.png
training: ISIC_0000363_01.jpg ISIC_0000363_Segmentation_01.png
resample count: 19
training: ISIC_0000109.jpg ISIC_0000109_Segmentation.png
resample count: 19
training: ISIC_0000412.jpg ISIC_0000412_Segmentation.png
resample count: 19
training: ISIC_0000075.jpg ISIC_0000075_Segmentation.png
resample count: 19
training: ISIC_0010554.jpg ISIC_0010554_Segmentation.png
training: ISIC_0010554_01.jpg ISIC_0010554_Segmentation_01.png
resample count: 20
training: ISIC_0000171.jpg ISIC_0000171_Segmentation.png
resample count: 20
training: ISIC_0000444.jpg ISIC_0000444_Segmentation.png
training: ISIC_0000444_01.jpg ISIC_0000444_Segmentation_01.png
resample count: 21
training: ISIC_0010066.jpg ISIC_0010066_Segmentation.png
training: ISIC_0010066_01.jpg ISIC_0010066_Segmentation_01.png
resample count: 22
t

training: ISIC_0000314_01.jpg ISIC_0000314_Segmentation_01.png
resample count: 70
training: ISIC_0011353.jpg ISIC_0011353_Segmentation.png
training: ISIC_0011353_01.jpg ISIC_0011353_Segmentation_01.png
resample count: 71
training: ISIC_0000269.jpg ISIC_0000269_Segmentation.png
training: ISIC_0000269_01.jpg ISIC_0000269_Segmentation_01.png
resample count: 72
training: ISIC_0000500.jpg ISIC_0000500_Segmentation.png
training: ISIC_0000500_01.jpg ISIC_0000500_Segmentation_01.png
resample count: 73
training: ISIC_0010442.jpg ISIC_0010442_Segmentation.png
training: ISIC_0010442_01.jpg ISIC_0010442_Segmentation_01.png
resample count: 74
training: ISIC_0000189.jpg ISIC_0000189_Segmentation.png
resample count: 74
training: ISIC_0009188.jpg ISIC_0009188_Segmentation.png
training: ISIC_0009188_01.jpg ISIC_0009188_Segmentation_01.png
resample count: 75
training: ISIC_0008294.jpg ISIC_0008294_Segmentation.png
training: ISIC_0008294_01.jpg ISIC_0008294_Segmentation_01.png
resample count: 76
training

training: ISIC_0000260.jpg ISIC_0000260_Segmentation.png
training: ISIC_0000260_01.jpg ISIC_0000260_Segmentation_01.png
resample count: 114
training: ISIC_0001163.jpg ISIC_0001163_Segmentation.png
training: ISIC_0001163_01.jpg ISIC_0001163_Segmentation_01.png
resample count: 115
training: ISIC_0000006.jpg ISIC_0000006_Segmentation.png
training: ISIC_0000006_01.jpg ISIC_0000006_Segmentation_01.png
resample count: 116
training: ISIC_0010057.jpg ISIC_0010057_Segmentation.png
resample count: 116
training: ISIC_0000522.jpg ISIC_0000522_Segmentation.png
resample count: 116
training: ISIC_0010480.jpg ISIC_0010480_Segmentation.png
training: ISIC_0010480_01.jpg ISIC_0010480_Segmentation_01.png
resample count: 117
training: ISIC_0000137.jpg ISIC_0000137_Segmentation.png
training: ISIC_0000137_01.jpg ISIC_0000137_Segmentation_01.png
resample count: 118
training: ISIC_0010320.jpg ISIC_0010320_Segmentation.png
resample count: 118
training: ISIC_0010233.jpg ISIC_0010233_Segmentation.png
training: IS

training: ISIC_0000050_01.jpg ISIC_0000050_Segmentation_01.png
resample count: 163
training: ISIC_0010496.jpg ISIC_0010496_Segmentation.png
resample count: 163
training: ISIC_0000493.jpg ISIC_0000493_Segmentation.png
resample count: 163
training: ISIC_0000229.jpg ISIC_0000229_Segmentation.png
resample count: 163
training: ISIC_0010595.jpg ISIC_0010595_Segmentation.png
resample count: 163
training: ISIC_0000521.jpg ISIC_0000521_Segmentation.png
resample count: 163
training: ISIC_0011120.jpg ISIC_0011120_Segmentation.png
resample count: 163
training: ISIC_0011366.jpg ISIC_0011366_Segmentation.png
training: ISIC_0011366_01.jpg ISIC_0011366_Segmentation_01.png
resample count: 164
training: ISIC_0000528.jpg ISIC_0000528_Segmentation.png
training: ISIC_0000528_01.jpg ISIC_0000528_Segmentation_01.png
resample count: 165
training: ISIC_0002374.jpg ISIC_0002374_Segmentation.png
training: ISIC_0002374_01.jpg ISIC_0002374_Segmentation_01.png
resample count: 166
training: ISIC_0000369.jpg ISIC_000

training: ISIC_0001102.jpg ISIC_0001102_Segmentation.png
resample count: 207
training: ISIC_0009915.jpg ISIC_0009915_Segmentation.png
training: ISIC_0009915_01.jpg ISIC_0009915_Segmentation_01.png
resample count: 208
training: ISIC_0011079.jpg ISIC_0011079_Segmentation.png
resample count: 208
training: ISIC_0009929.jpg ISIC_0009929_Segmentation.png
training: ISIC_0009929_01.jpg ISIC_0009929_Segmentation_01.png
resample count: 209
training: ISIC_0010577.jpg ISIC_0010577_Segmentation.png
training: ISIC_0010577_01.jpg ISIC_0010577_Segmentation_01.png
resample count: 210
training: ISIC_0001254.jpg ISIC_0001254_Segmentation.png
training: ISIC_0001254_01.jpg ISIC_0001254_Segmentation_01.png
resample count: 211
training: ISIC_0010264.jpg ISIC_0010264_Segmentation.png
resample count: 211
training: ISIC_0002476.jpg ISIC_0002476_Segmentation.png
training: ISIC_0002476_01.jpg ISIC_0002476_Segmentation_01.png
resample count: 212
training: ISIC_0000016.jpg ISIC_0000016_Segmentation.png
training: IS

training: ISIC_0000108.jpg ISIC_0000108_Segmentation.png
training: ISIC_0000108_01.jpg ISIC_0000108_Segmentation_01.png
resample count: 259
training: ISIC_0010267.jpg ISIC_0010267_Segmentation.png
resample count: 259
training: ISIC_0004985.jpg ISIC_0004985_Segmentation.png
training: ISIC_0004985_01.jpg ISIC_0004985_Segmentation_01.png
resample count: 260
training: ISIC_0005666.jpg ISIC_0005666_Segmentation.png
training: ISIC_0005666_01.jpg ISIC_0005666_Segmentation_01.png
resample count: 261
training: ISIC_0000173.jpg ISIC_0000173_Segmentation.png
training: ISIC_0000173_01.jpg ISIC_0000173_Segmentation_01.png
resample count: 262
training: ISIC_0000235.jpg ISIC_0000235_Segmentation.png
resample count: 262
training: ISIC_0000900.jpg ISIC_0000900_Segmentation.png
training: ISIC_0000900_01.jpg ISIC_0000900_Segmentation_01.png
resample count: 263
training: ISIC_0000333.jpg ISIC_0000333_Segmentation.png
training: ISIC_0000333_01.jpg ISIC_0000333_Segmentation_01.png
resample count: 264
traini

training: ISIC_0010022.jpg ISIC_0010022_Segmentation.png
resample count: 304
training: ISIC_0000413.jpg ISIC_0000413_Segmentation.png
resample count: 304
training: ISIC_0000152.jpg ISIC_0000152_Segmentation.png
training: ISIC_0000152_01.jpg ISIC_0000152_Segmentation_01.png
resample count: 305
training: ISIC_0011207.jpg ISIC_0011207_Segmentation.png
training: ISIC_0011207_01.jpg ISIC_0011207_Segmentation_01.png
resample count: 306
training: ISIC_0010005.jpg ISIC_0010005_Segmentation.png
resample count: 306
training: ISIC_0000349.jpg ISIC_0000349_Segmentation.png
training: ISIC_0000349_01.jpg ISIC_0000349_Segmentation_01.png
resample count: 307
training: ISIC_0008145.jpg ISIC_0008145_Segmentation.png
training: ISIC_0008145_01.jpg ISIC_0008145_Segmentation_01.png
resample count: 308
training: ISIC_0000237.jpg ISIC_0000237_Segmentation.png
training: ISIC_0000237_01.jpg ISIC_0000237_Segmentation_01.png
resample count: 309
training: ISIC_0000067.jpg ISIC_0000067_Segmentation.png
training: IS

training: ISIC_0011220_01.jpg ISIC_0011220_Segmentation_01.png
resample count: 356
training: ISIC_0011354.jpg ISIC_0011354_Segmentation.png
training: ISIC_0011354_01.jpg ISIC_0011354_Segmentation_01.png
resample count: 357
training: ISIC_0000259.jpg ISIC_0000259_Segmentation.png
resample count: 357
training: ISIC_0000140.jpg ISIC_0000140_Segmentation.png
training: ISIC_0000140_01.jpg ISIC_0000140_Segmentation_01.png
resample count: 358
training: ISIC_0011334.jpg ISIC_0011334_Segmentation.png
resample count: 358
training: ISIC_0008541.jpg ISIC_0008541_Segmentation.png
training: ISIC_0008541_01.jpg ISIC_0008541_Segmentation_01.png
resample count: 359
training: ISIC_0011144.jpg ISIC_0011144_Segmentation.png
training: ISIC_0011144_01.jpg ISIC_0011144_Segmentation_01.png
resample count: 360
training: ISIC_0010571.jpg ISIC_0010571_Segmentation.png
training: ISIC_0010571_01.jpg ISIC_0010571_Segmentation_01.png
resample count: 361
training: ISIC_0006940.jpg ISIC_0006940_Segmentation.png
traini

training: ISIC_0000391.jpg ISIC_0000391_Segmentation.png
resample count: 407
training: ISIC_0000046.jpg ISIC_0000046_Segmentation.png
training: ISIC_0000046_01.jpg ISIC_0000046_Segmentation_01.png
resample count: 408
training: ISIC_0005564.jpg ISIC_0005564_Segmentation.png
training: ISIC_0005564_01.jpg ISIC_0005564_Segmentation_01.png
resample count: 409
training: ISIC_0010455.jpg ISIC_0010455_Segmentation.png
resample count: 409
training: ISIC_0000065.jpg ISIC_0000065_Segmentation.png
training: ISIC_0000065_01.jpg ISIC_0000065_Segmentation_01.png
resample count: 410
training: ISIC_0010186.jpg ISIC_0010186_Segmentation.png
resample count: 410
training: ISIC_0001267.jpg ISIC_0001267_Segmentation.png
training: ISIC_0001267_01.jpg ISIC_0001267_Segmentation_01.png
resample count: 411
training: ISIC_0000486.jpg ISIC_0000486_Segmentation.png
training: ISIC_0000486_01.jpg ISIC_0000486_Segmentation_01.png
resample count: 412
training: ISIC_0010176.jpg ISIC_0010176_Segmentation.png
training: IS

training: ISIC_0000214_01.jpg ISIC_0000214_Segmentation_01.png
resample count: 458
training: ISIC_0000360.jpg ISIC_0000360_Segmentation.png
training: ISIC_0000360_01.jpg ISIC_0000360_Segmentation_01.png
resample count: 459
training: ISIC_0010060.jpg ISIC_0010060_Segmentation.png
training: ISIC_0010060_01.jpg ISIC_0010060_Segmentation_01.png
resample count: 460
training: ISIC_0011097.jpg ISIC_0011097_Segmentation.png
training: ISIC_0011097_01.jpg ISIC_0011097_Segmentation_01.png
resample count: 461
training: ISIC_0000054.jpg ISIC_0000054_Segmentation.png
training: ISIC_0000054_01.jpg ISIC_0000054_Segmentation_01.png
resample count: 462
training: ISIC_0010356.jpg ISIC_0010356_Segmentation.png
training: ISIC_0010356_01.jpg ISIC_0010356_Segmentation_01.png
resample count: 463
training: ISIC_0000116.jpg ISIC_0000116_Segmentation.png
training: ISIC_0000116_01.jpg ISIC_0000116_Segmentation_01.png
resample count: 464
training: ISIC_0010252.jpg ISIC_0010252_Segmentation.png
resample count: 464


training: ISIC_0000496.jpg ISIC_0000496_Segmentation.png
resample count: 509
training: ISIC_0011382.jpg ISIC_0011382_Segmentation.png
training: ISIC_0011382_01.jpg ISIC_0011382_Segmentation_01.png
resample count: 510
training: ISIC_0010104.jpg ISIC_0010104_Segmentation.png
training: ISIC_0010104_01.jpg ISIC_0010104_Segmentation_01.png
resample count: 511
training: ISIC_0010382.jpg ISIC_0010382_Segmentation.png
resample count: 511
training: ISIC_0000498.jpg ISIC_0000498_Segmentation.png
training: ISIC_0000498_01.jpg ISIC_0000498_Segmentation_01.png
resample count: 512
training: ISIC_0010025.jpg ISIC_0010025_Segmentation.png
training: ISIC_0010025_01.jpg ISIC_0010025_Segmentation_01.png
resample count: 513
training: ISIC_0010054.jpg ISIC_0010054_Segmentation.png
training: ISIC_0010054_01.jpg ISIC_0010054_Segmentation_01.png
resample count: 514
training: ISIC_0007788.jpg ISIC_0007788_Segmentation.png
training: ISIC_0007788_01.jpg ISIC_0007788_Segmentation_01.png
resample count: 515
traini

training: ISIC_0010029.jpg ISIC_0010029_Segmentation.png
resample count: 558
training: ISIC_0010372.jpg ISIC_0010372_Segmentation.png
training: ISIC_0010372_01.jpg ISIC_0010372_Segmentation_01.png
resample count: 559
training: ISIC_0000255.jpg ISIC_0000255_Segmentation.png
training: ISIC_0000255_01.jpg ISIC_0000255_Segmentation_01.png
resample count: 560
training: ISIC_0000367.jpg ISIC_0000367_Segmentation.png
training: ISIC_0000367_01.jpg ISIC_0000367_Segmentation_01.png
resample count: 561
training: ISIC_0010465.jpg ISIC_0010465_Segmentation.png
training: ISIC_0010465_01.jpg ISIC_0010465_Segmentation_01.png
resample count: 562
training: ISIC_0011200.jpg ISIC_0011200_Segmentation.png
training: ISIC_0011200_01.jpg ISIC_0011200_Segmentation_01.png
resample count: 563
training: ISIC_0009932.jpg ISIC_0009932_Segmentation.png
resample count: 563
training: ISIC_0000236.jpg ISIC_0000236_Segmentation.png
resample count: 563
training: ISIC_0010189.jpg ISIC_0010189_Segmentation.png
resample cou

In [None]:
test_data_dirname = 'ISBI2016_ISIC_Part1_Test_Data'
test_data_dir_path = os.path.join(base_dir_path, test_data_dirname)

test_gt_dirname = 'ISBI2016_ISIC_Part1_Test_GroundTruth'
test_gt_dir_path = os.path.join(base_dir_path, test_gt_dirname)

test_image_filenames=[]
for root, _, files in os.walk(test_data_dir_path):
    for name in files:
        test_image_filenames.append(name) 

test_image_filenames.sort()

test_mask_filenames=[]
for root, _, files in os.walk(test_gt_dir_path):
    for name in files:
        test_mask_filenames.append(name) 

test_mask_filenames.sort()


# no cropping or resizing for testing samples
test_list_filename = 'isbi2016_isic_part1_test_v8.list'
test_list_filepath = os.path.join(base_dir_path, test_list_filename)
test_list_file = open(test_list_filepath, 'w')

merged = list(zip(test_image_filenames, test_mask_filenames))

for image_file, mask_file in merged:
    print('test: ' + image_file + " " + mask_file)
    print(image_file + " " + mask_file, file=test_list_file)

test_list_file.close()

In [5]:
from PIL import Image
import numpy as np
import os

base_dir_path = os.path.normpath('C:/Users/kirta/Documents/research/data')

training_out_dirname = 'ISBI2016_ISIC_Part1_Training_Data_v8_processed'
training_out_dir_path = os.path.join(base_dir_path, training_out_dirname)


for root, _, files in os.walk(training_out_dir_path):
    for name in files:
        img = Image.open(os.path.join(root, name))
        if img.width < 480 or img.height < 480:   
            print(os.path.join(root, name) + " width " + str(img.width) + " height " + str(img.height))
            