In [6]:
%matplotlib inline
import matplotlib.pyplot as plt
from PIL import Image
import numpy as np
import os

def crop_resample(image_file, mask_file, outdir, 
                  crop_enlarge_width=1.3, crop_enlarge_height=1.1, 
                  num_resample=1):
    """Crops a patch from a given image, and resample random patches (optional).

    Given a 2D image and binary mask image, crops the bounding box
    area surrounding the white pixel polygon in the mask, 
    both from the image and the mask.
    
    Enlarge the crop allowing a margin both for the width and the height.
    Enlarged width will be width*crop_enlarge_width, 
    enlarged height will be height*crop_enlarge_height.
    
    Resize the cropped patch to a given size. Do not resize if any one
    of the parameters is None.
    
    Resamples patches located on a randomly selected topleft pixel, 
    both from the image and the mask (optional). Do not resample if 
    num_resample=0.
    
    Args:
        image_file (str): file path to the input image.   
        mask_file (str): file path to the binary mask image.   
        crop_enlarge_width (float): enlarging parameter for the with of the cropped region.
        crop_enlarge_height (float): enlarging parameter for the height of the cropped region.
        num_resample (int): number of random resample patches from the image and the mask.  

    Returns:
        None: Saves resulting patches into the current working directory

    """  
    assert os.path.exists(image_file) and os.path.isfile(image_file)
    assert os.path.exists(mask_file) and os.path.isfile(mask_file)
    assert isinstance(crop_enlarge_width, float)
    assert isinstance(crop_enlarge_height, float)
    assert crop_enlarge_width > 1.0 and crop_enlarge_height > 1.0
    
    img  = Image.open(image_file)
    mask = Image.open(mask_file)
    
    # get the bounding box coordinates from the mask image.
    # upperleft_x, upperleft_y, bottomright_x, and bottomright_y
    bbox = mask.getbbox()

    # enlarge the bounding box with respect to given enlarging params
    old_height = bbox[3]-bbox[1]+1
    new_height = int(crop_enlarge_height*old_height)
    upperleft_y = bbox[1]-int((new_height-old_height)/2.0)
    upperleft_y = 0 if upperleft_y < 0 else upperleft_y
    bottomright_y = upperleft_y + new_height -1
    bottomright_y = img.height-1 if bottomright_y > img.height-1 else bottomright_y


    old_width = bbox[2]-bbox[0]+1
    new_width = int(crop_enlarge_width*old_width)
    upperleft_x = bbox[0]-int((new_width-old_width)/2.0)
    upperleft_x = 0 if upperleft_x < 0 else upperleft_x
    bottomright_x = upperleft_x + new_width -1
    bottomright_x = img.width-1 if bottomright_x > img.width-1 else bottomright_x

    enlarged_bbox = (upperleft_x, upperleft_y, bottomright_x, bottomright_y)
    
    # crop the enlarged area from the image
    cropped_img = img.crop(enlarged_bbox)
    cropped_mask = mask.crop(enlarged_bbox)
    
    image_filename, image_file_extension = os.path.splitext(os.path.basename(image_file))
    mask_filename, mask_file_extension = os.path.splitext(os.path.basename(mask_file))
    
    cropped_img.save(os.path.join(outdir, image_filename + image_file_extension))
    cropped_mask.save(os.path.join(outdir, mask_filename + mask_file_extension))
    
    if num_resample > 0:
        # crop another random rectangle
        for i in range(num_resample):
            rand_loc_y = 0 if (new_height>=img.height) else np.random.randint(low=0, high=img.height-new_height, size=1)[0]
            rand_loc_x = 0 if (new_width>=img.width) else np.random.randint(low=0, high=img.width-new_width, size=1)[0]
            if rand_loc_x == 0 and rand_loc_y == 0:
                continue
            else:
                rand_bbox = (rand_loc_x, rand_loc_y, rand_loc_x+new_width-1, rand_loc_y+new_height-1)
                cropped_img = img.crop(rand_bbox)
                cropped_mask = mask.crop(rand_bbox)
                cropped_img.save(os.path.join(outdir, image_filename + '_{:02d}'.format(i+1) + image_file_extension))
                cropped_mask.save(os.path.join(outdir, mask_filename + '_{:02d}'.format(i+1) + mask_file_extension))
    

In [7]:
image_filenames=[]
for root, _, files in os.walk("/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/"):
    for name in files:
        image_filenames.append(os.path.join(root, name)) 

image_filenames.sort()

mask_filenames=[]
for root, _, files in os.walk("/home/kadir/data/ISBI2016_ISIC_Part1_Training_GroundTruth/"):
    for name in files:
        mask_filenames.append(os.path.join(root, name)) 

mask_filenames.sort()

for image_file, mask_file in zip(image_filenames, mask_filenames):
    print(image_file, mask_file)
    crop_resample(image_file, mask_file, '/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data_processed')
    

/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0000000.jpg /home/kadir/data/ISBI2016_ISIC_Part1_Training_GroundTruth/ISIC_0000000_Segmentation.png
/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0000001.jpg /home/kadir/data/ISBI2016_ISIC_Part1_Training_GroundTruth/ISIC_0000001_Segmentation.png
/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0000002.jpg /home/kadir/data/ISBI2016_ISIC_Part1_Training_GroundTruth/ISIC_0000002_Segmentation.png
/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0000004.jpg /home/kadir/data/ISBI2016_ISIC_Part1_Training_GroundTruth/ISIC_0000004_Segmentation.png
/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0000006.jpg /home/kadir/data/ISBI2016_ISIC_Part1_Training_GroundTruth/ISIC_0000006_Segmentation.png
/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0000007.jpg /home/kadir/data/ISBI2016_ISIC_Part1_Training_GroundTruth/ISIC_0000007_Segmentation.png
/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0000008.

/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0000081.jpg /home/kadir/data/ISBI2016_ISIC_Part1_Training_GroundTruth/ISIC_0000081_Segmentation.png
/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0000082.jpg /home/kadir/data/ISBI2016_ISIC_Part1_Training_GroundTruth/ISIC_0000082_Segmentation.png
/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0000085.jpg /home/kadir/data/ISBI2016_ISIC_Part1_Training_GroundTruth/ISIC_0000085_Segmentation.png
/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0000086.jpg /home/kadir/data/ISBI2016_ISIC_Part1_Training_GroundTruth/ISIC_0000086_Segmentation.png
/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0000087.jpg /home/kadir/data/ISBI2016_ISIC_Part1_Training_GroundTruth/ISIC_0000087_Segmentation.png
/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0000089.jpg /home/kadir/data/ISBI2016_ISIC_Part1_Training_GroundTruth/ISIC_0000089_Segmentation.png
/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0000091.

/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0000157.jpg /home/kadir/data/ISBI2016_ISIC_Part1_Training_GroundTruth/ISIC_0000157_Segmentation.png
/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0000159.jpg /home/kadir/data/ISBI2016_ISIC_Part1_Training_GroundTruth/ISIC_0000159_Segmentation.png
/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0000162.jpg /home/kadir/data/ISBI2016_ISIC_Part1_Training_GroundTruth/ISIC_0000162_Segmentation.png
/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0000163.jpg /home/kadir/data/ISBI2016_ISIC_Part1_Training_GroundTruth/ISIC_0000163_Segmentation.png
/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0000166.jpg /home/kadir/data/ISBI2016_ISIC_Part1_Training_GroundTruth/ISIC_0000166_Segmentation.png
/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0000167.jpg /home/kadir/data/ISBI2016_ISIC_Part1_Training_GroundTruth/ISIC_0000167_Segmentation.png
/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0000170.

/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0000243.jpg /home/kadir/data/ISBI2016_ISIC_Part1_Training_GroundTruth/ISIC_0000243_Segmentation.png
/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0000244.jpg /home/kadir/data/ISBI2016_ISIC_Part1_Training_GroundTruth/ISIC_0000244_Segmentation.png
/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0000245.jpg /home/kadir/data/ISBI2016_ISIC_Part1_Training_GroundTruth/ISIC_0000245_Segmentation.png
/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0000247.jpg /home/kadir/data/ISBI2016_ISIC_Part1_Training_GroundTruth/ISIC_0000247_Segmentation.png
/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0000249.jpg /home/kadir/data/ISBI2016_ISIC_Part1_Training_GroundTruth/ISIC_0000249_Segmentation.png
/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0000250.jpg /home/kadir/data/ISBI2016_ISIC_Part1_Training_GroundTruth/ISIC_0000250_Segmentation.png
/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0000251.

/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0000336.jpg /home/kadir/data/ISBI2016_ISIC_Part1_Training_GroundTruth/ISIC_0000336_Segmentation.png
/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0000337.jpg /home/kadir/data/ISBI2016_ISIC_Part1_Training_GroundTruth/ISIC_0000337_Segmentation.png
/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0000338.jpg /home/kadir/data/ISBI2016_ISIC_Part1_Training_GroundTruth/ISIC_0000338_Segmentation.png
/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0000339.jpg /home/kadir/data/ISBI2016_ISIC_Part1_Training_GroundTruth/ISIC_0000339_Segmentation.png
/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0000341.jpg /home/kadir/data/ISBI2016_ISIC_Part1_Training_GroundTruth/ISIC_0000341_Segmentation.png
/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0000342.jpg /home/kadir/data/ISBI2016_ISIC_Part1_Training_GroundTruth/ISIC_0000342_Segmentation.png
/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0000344.

/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0000421.jpg /home/kadir/data/ISBI2016_ISIC_Part1_Training_GroundTruth/ISIC_0000421_Segmentation.png
/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0000423.jpg /home/kadir/data/ISBI2016_ISIC_Part1_Training_GroundTruth/ISIC_0000423_Segmentation.png
/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0000425.jpg /home/kadir/data/ISBI2016_ISIC_Part1_Training_GroundTruth/ISIC_0000425_Segmentation.png
/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0000426.jpg /home/kadir/data/ISBI2016_ISIC_Part1_Training_GroundTruth/ISIC_0000426_Segmentation.png
/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0000427.jpg /home/kadir/data/ISBI2016_ISIC_Part1_Training_GroundTruth/ISIC_0000427_Segmentation.png
/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0000431.jpg /home/kadir/data/ISBI2016_ISIC_Part1_Training_GroundTruth/ISIC_0000431_Segmentation.png
/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0000434.

/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0000511.jpg /home/kadir/data/ISBI2016_ISIC_Part1_Training_GroundTruth/ISIC_0000511_Segmentation.png
/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0000513.jpg /home/kadir/data/ISBI2016_ISIC_Part1_Training_GroundTruth/ISIC_0000513_Segmentation.png
/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0000514.jpg /home/kadir/data/ISBI2016_ISIC_Part1_Training_GroundTruth/ISIC_0000514_Segmentation.png
/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0000516.jpg /home/kadir/data/ISBI2016_ISIC_Part1_Training_GroundTruth/ISIC_0000516_Segmentation.png
/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0000517.jpg /home/kadir/data/ISBI2016_ISIC_Part1_Training_GroundTruth/ISIC_0000517_Segmentation.png
/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0000519.jpg /home/kadir/data/ISBI2016_ISIC_Part1_Training_GroundTruth/ISIC_0000519_Segmentation.png
/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0000520.

/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0001247.jpg /home/kadir/data/ISBI2016_ISIC_Part1_Training_GroundTruth/ISIC_0001247_Segmentation.png
/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0001254.jpg /home/kadir/data/ISBI2016_ISIC_Part1_Training_GroundTruth/ISIC_0001254_Segmentation.png
/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0001262.jpg /home/kadir/data/ISBI2016_ISIC_Part1_Training_GroundTruth/ISIC_0001262_Segmentation.png
/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0001267.jpg /home/kadir/data/ISBI2016_ISIC_Part1_Training_GroundTruth/ISIC_0001267_Segmentation.png
/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0001275.jpg /home/kadir/data/ISBI2016_ISIC_Part1_Training_GroundTruth/ISIC_0001275_Segmentation.png
/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0001286.jpg /home/kadir/data/ISBI2016_ISIC_Part1_Training_GroundTruth/ISIC_0001286_Segmentation.png
/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0001292.

/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0005247.jpg /home/kadir/data/ISBI2016_ISIC_Part1_Training_GroundTruth/ISIC_0005247_Segmentation.png
/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0005548.jpg /home/kadir/data/ISBI2016_ISIC_Part1_Training_GroundTruth/ISIC_0005548_Segmentation.png
/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0005555.jpg /home/kadir/data/ISBI2016_ISIC_Part1_Training_GroundTruth/ISIC_0005555_Segmentation.png
/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0005564.jpg /home/kadir/data/ISBI2016_ISIC_Part1_Training_GroundTruth/ISIC_0005564_Segmentation.png
/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0005620.jpg /home/kadir/data/ISBI2016_ISIC_Part1_Training_GroundTruth/ISIC_0005620_Segmentation.png
/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0005639.jpg /home/kadir/data/ISBI2016_ISIC_Part1_Training_GroundTruth/ISIC_0005639_Segmentation.png
/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0005666.

/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0009599.jpg /home/kadir/data/ISBI2016_ISIC_Part1_Training_GroundTruth/ISIC_0009599_Segmentation.png
/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0009758.jpg /home/kadir/data/ISBI2016_ISIC_Part1_Training_GroundTruth/ISIC_0009758_Segmentation.png
/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0009800.jpg /home/kadir/data/ISBI2016_ISIC_Part1_Training_GroundTruth/ISIC_0009800_Segmentation.png
/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0009860.jpg /home/kadir/data/ISBI2016_ISIC_Part1_Training_GroundTruth/ISIC_0009860_Segmentation.png
/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0009868.jpg /home/kadir/data/ISBI2016_ISIC_Part1_Training_GroundTruth/ISIC_0009868_Segmentation.png
/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0009870.jpg /home/kadir/data/ISBI2016_ISIC_Part1_Training_GroundTruth/ISIC_0009870_Segmentation.png
/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0009871.

/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0009968.jpg /home/kadir/data/ISBI2016_ISIC_Part1_Training_GroundTruth/ISIC_0009968_Segmentation.png
/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0009969.jpg /home/kadir/data/ISBI2016_ISIC_Part1_Training_GroundTruth/ISIC_0009969_Segmentation.png
/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0009971.jpg /home/kadir/data/ISBI2016_ISIC_Part1_Training_GroundTruth/ISIC_0009971_Segmentation.png
/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0009972.jpg /home/kadir/data/ISBI2016_ISIC_Part1_Training_GroundTruth/ISIC_0009972_Segmentation.png
/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0009973.jpg /home/kadir/data/ISBI2016_ISIC_Part1_Training_GroundTruth/ISIC_0009973_Segmentation.png
/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0009974.jpg /home/kadir/data/ISBI2016_ISIC_Part1_Training_GroundTruth/ISIC_0009974_Segmentation.png
/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0009975.

/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0010070.jpg /home/kadir/data/ISBI2016_ISIC_Part1_Training_GroundTruth/ISIC_0010070_Segmentation.png
/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0010071.jpg /home/kadir/data/ISBI2016_ISIC_Part1_Training_GroundTruth/ISIC_0010071_Segmentation.png
/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0010074.jpg /home/kadir/data/ISBI2016_ISIC_Part1_Training_GroundTruth/ISIC_0010074_Segmentation.png
/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0010075.jpg /home/kadir/data/ISBI2016_ISIC_Part1_Training_GroundTruth/ISIC_0010075_Segmentation.png
/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0010078.jpg /home/kadir/data/ISBI2016_ISIC_Part1_Training_GroundTruth/ISIC_0010078_Segmentation.png
/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0010079.jpg /home/kadir/data/ISBI2016_ISIC_Part1_Training_GroundTruth/ISIC_0010079_Segmentation.png
/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0010080.

/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0010239.jpg /home/kadir/data/ISBI2016_ISIC_Part1_Training_GroundTruth/ISIC_0010239_Segmentation.png
/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0010240.jpg /home/kadir/data/ISBI2016_ISIC_Part1_Training_GroundTruth/ISIC_0010240_Segmentation.png
/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0010241.jpg /home/kadir/data/ISBI2016_ISIC_Part1_Training_GroundTruth/ISIC_0010241_Segmentation.png
/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0010242.jpg /home/kadir/data/ISBI2016_ISIC_Part1_Training_GroundTruth/ISIC_0010242_Segmentation.png
/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0010244.jpg /home/kadir/data/ISBI2016_ISIC_Part1_Training_GroundTruth/ISIC_0010244_Segmentation.png
/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0010246.jpg /home/kadir/data/ISBI2016_ISIC_Part1_Training_GroundTruth/ISIC_0010246_Segmentation.png
/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0010247.

/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0010380.jpg /home/kadir/data/ISBI2016_ISIC_Part1_Training_GroundTruth/ISIC_0010380_Segmentation.png
/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0010382.jpg /home/kadir/data/ISBI2016_ISIC_Part1_Training_GroundTruth/ISIC_0010382_Segmentation.png
/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0010435.jpg /home/kadir/data/ISBI2016_ISIC_Part1_Training_GroundTruth/ISIC_0010435_Segmentation.png
/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0010436.jpg /home/kadir/data/ISBI2016_ISIC_Part1_Training_GroundTruth/ISIC_0010436_Segmentation.png
/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0010438.jpg /home/kadir/data/ISBI2016_ISIC_Part1_Training_GroundTruth/ISIC_0010438_Segmentation.png
/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0010439.jpg /home/kadir/data/ISBI2016_ISIC_Part1_Training_GroundTruth/ISIC_0010439_Segmentation.png
/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0010440.

/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0010575.jpg /home/kadir/data/ISBI2016_ISIC_Part1_Training_GroundTruth/ISIC_0010575_Segmentation.png
/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0010576.jpg /home/kadir/data/ISBI2016_ISIC_Part1_Training_GroundTruth/ISIC_0010576_Segmentation.png
/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0010577.jpg /home/kadir/data/ISBI2016_ISIC_Part1_Training_GroundTruth/ISIC_0010577_Segmentation.png
/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0010581.jpg /home/kadir/data/ISBI2016_ISIC_Part1_Training_GroundTruth/ISIC_0010581_Segmentation.png
/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0010585.jpg /home/kadir/data/ISBI2016_ISIC_Part1_Training_GroundTruth/ISIC_0010585_Segmentation.png
/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0010586.jpg /home/kadir/data/ISBI2016_ISIC_Part1_Training_GroundTruth/ISIC_0010586_Segmentation.png
/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0010589.

/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0011131.jpg /home/kadir/data/ISBI2016_ISIC_Part1_Training_GroundTruth/ISIC_0011131_Segmentation.png
/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0011135.jpg /home/kadir/data/ISBI2016_ISIC_Part1_Training_GroundTruth/ISIC_0011135_Segmentation.png
/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0011136.jpg /home/kadir/data/ISBI2016_ISIC_Part1_Training_GroundTruth/ISIC_0011136_Segmentation.png
/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0011137.jpg /home/kadir/data/ISBI2016_ISIC_Part1_Training_GroundTruth/ISIC_0011137_Segmentation.png
/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0011139.jpg /home/kadir/data/ISBI2016_ISIC_Part1_Training_GroundTruth/ISIC_0011139_Segmentation.png
/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0011140.jpg /home/kadir/data/ISBI2016_ISIC_Part1_Training_GroundTruth/ISIC_0011140_Segmentation.png
/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0011141.

/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0011323.jpg /home/kadir/data/ISBI2016_ISIC_Part1_Training_GroundTruth/ISIC_0011323_Segmentation.png
/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0011324.jpg /home/kadir/data/ISBI2016_ISIC_Part1_Training_GroundTruth/ISIC_0011324_Segmentation.png
/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0011326.jpg /home/kadir/data/ISBI2016_ISIC_Part1_Training_GroundTruth/ISIC_0011326_Segmentation.png
/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0011327.jpg /home/kadir/data/ISBI2016_ISIC_Part1_Training_GroundTruth/ISIC_0011327_Segmentation.png
/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0011328.jpg /home/kadir/data/ISBI2016_ISIC_Part1_Training_GroundTruth/ISIC_0011328_Segmentation.png
/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0011329.jpg /home/kadir/data/ISBI2016_ISIC_Part1_Training_GroundTruth/ISIC_0011329_Segmentation.png
/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data/ISIC_0011330.

In [9]:
train_file = open('/home/kadir/data/isbi2016_isic_part1_train.list', 'w')
validation_file = open('/home/kadir/data/isbi2016_isic_part1_val.list', 'w')

for root, _, files in os.walk("/home/kadir/data/ISBI2016_ISIC_Part1_Training_Data_processed"):
    image_filenames=[name for name in files if name.endswith(".jpg")]
    mask_filenames=[name for name in files if name.endswith(".png")]

image_filenames.sort()
mask_filenames.sort()

merged = list(zip(image_filenames, mask_filenames))
np.random.shuffle(merged)

num_val = int(0.1*len(image_filenames))
cnt=0

for image_file, mask_file in merged:
    print(image_file + " " + mask_file)
    cnt +=1
    if cnt > num_val:
        print(image_file + " " + mask_file, file=train_file)
    else:
        print(image_file + " " + mask_file, file=validation_file)
      
      
train_file.close()
validation_file.close()    

ISIC_0010364_01.jpg ISIC_0010364_Segmentation_01.png
ISIC_0000460_01.jpg ISIC_0000460_Segmentation_01.png
ISIC_0009981_01.jpg ISIC_0009981_Segmentation_01.png
ISIC_0010067.jpg ISIC_0010067_Segmentation.png
ISIC_0010053.jpg ISIC_0010053_Segmentation.png
ISIC_0000403_01.jpg ISIC_0000403_Segmentation_01.png
ISIC_0010480_01.jpg ISIC_0010480_Segmentation_01.png
ISIC_0000018.jpg ISIC_0000018_Segmentation.png
ISIC_0010365.jpg ISIC_0010365_Segmentation.png
ISIC_0010572.jpg ISIC_0010572_Segmentation.png
ISIC_0008396.jpg ISIC_0008396_Segmentation.png
ISIC_0000385.jpg ISIC_0000385_Segmentation.png
ISIC_0000067_01.jpg ISIC_0000067_Segmentation_01.png
ISIC_0000050.jpg ISIC_0000050_Segmentation.png
ISIC_0000264_01.jpg ISIC_0000264_Segmentation_01.png
ISIC_0000485.jpg ISIC_0000485_Segmentation.png
ISIC_0000360_01.jpg ISIC_0000360_Segmentation_01.png
ISIC_0000361_01.jpg ISIC_0000361_Segmentation_01.png
ISIC_0009800_01.jpg ISIC_0009800_Segmentation_01.png
ISIC_0000173.jpg ISIC_0000173_Segmentation.png
