In [0]:
%matplotlib inline
import matplotlib.pyplot as plt
from PIL import Image
import numpy as np
import os

def crop_resize_resample(image_file, mask_file,
                         crop_enlarge_width=1.3, crop_enlarge_height=1.1,
                         resize_width=480, resize_height=480,
                         num_resample=1):
    """Crops a patch from a given image, and resample random patches (optional).

    Given a 2D image and binary mask image, crops the bounding box
    area surrounding the white pixel polygon in the mask, 
    both from the image and the mask.
    
    Enlarge the crop allowing a margin both for the width and the height.
    Enlarged width will be width*crop_enlarge_width, 
    enlarged height will be height*crop_enlarge_height.
    
    Resize the cropped patch to a given size. Do not resize if any one
    of the parameters is None.
    
    Resamples patches located on a randomly selected topleft pixel, 
    both from the image and the mask (optional). Do not resample if 
    num_resample=0.
    
    Args:
        image_file (str): file path to the input image.   
        mask_file (str): file path to the binary mask image.   
        crop_enlarge_width (float): enlarging parameter for the with of the cropped region.
        crop_enlarge_height (float): enlarging parameter for the height of the cropped region.
        resize_width (int or None): resize width parameter of the cropped region.
        resize_height (int or None): resize height parameter of the cropped region.
        num_resample (int): number of random resample patches from the image and the mask.  

    Returns:
        None: Saves resulting patches into the current working directory

    """  
    assert os.path.exists(image_file) and os.path.isfile(image_file)
    assert os.path.exists(mask_file) and os.path.isfile(mask_file)
    assert isinstance(crop_enlarge_width, float)
    assert isinstance(crop_enlarge_height, float)
    assert crop_enlarge_width > 1.0 and crop_enlarge_height > 1.0
    assert isinstance(resize_width, int)
    assert isinstance(resize_height, int)
    assert resize_width > 1 and resize_width < 1000
    assert resize_height > 1 and resize_height < 1000
    
    img  = Image.open(image_file)
    mask = Image.open(mask_file)
    
    # get the bounding box coordinates from the mask image.
    # upperleft_x, upperleft_y, bottomright_x, and bottomright_y
    bbox = mask.getbbox()

    # enlarge the bounding box with respect to given enlarging params
    old_height = bbox[3]-bbox[1]+1
    new_height = int(crop_enlarge_height*old_height)
    upperleft_y = bbox[1]-int((new_height-old_height)/2.0)
    upperleft_y = 0 if upperleft_y < 0 else upperleft_y
    bottomright_y = upperleft_y + new_height -1
    bottomright_y = img.height-1 if bottomright_y > img.height-1 else bottomright_y


    old_width = bbox[2]-bbox[0]+1
    new_width = int(crop_enlarge_width*old_width)
    upperleft_x = bbox[0]-int((new_width-old_width)/2.0)
    upperleft_x = 0 if upperleft_x < 0 else upperleft_x
    bottomright_x = upperleft_x + new_width -1
    bottomright_x = img.width-1 if bottomright_x > img.width-1 else bottomright_x

    enlarged_bbox = (upperleft_x, upperleft_y, bottomright_x, bottomright_y)
    
    # cropped the enlarged area from the image
    cropped_img = img.crop(enlarged_bbox)
    cropped_mask = mask.crop(enlarged_bbox)

    # resize the cropped image and the mask.
    resized_img = cropped_img.resize((resize_width, resize_height), Image.LANCZOS)
    resized_mask = cropped_mask.resize((resize_width, resize_height), Image.LANCZOS)
    
    image_filename, image_file_extension = os.path.splitext(os.path.basename(image_file))
    mask_filename, mask_file_extension = os.path.splitext(os.path.basename(mask_file))
    
    resized_img.save(os.path.join('ISBI2016_ISIC_Part1_Training_Data_processed/',image_filename + image_file_extension))
    resized_mask.save(os.path.join('ISBI2016_ISIC_Part1_Training_Data_processed/',mask_filename + mask_file_extension))
    
    if num_resample > 0:
        # crop another random rectangle
        for i in range(num_resample):
            rand_loc_y = 0 if (new_height>=img.height) else np.random.randint(low=0, high=img.height-new_height, size=1)[0]
            rand_loc_x = 0 if (new_width>=img.width) else np.random.randint(low=0, high=img.width-new_width, size=1)[0]
            if rand_loc_x == 0 and rand_loc_y == 0:
                continue
            else:
                rand_bbox = (rand_loc_x, rand_loc_y, rand_loc_x+new_width-1, rand_loc_y+new_height-1)
                cropped_img = img.crop(rand_bbox)
                resized_img = cropped_img.resize((resize_width, resize_height), Image.LANCZOS)
                cropped_mask = mask.crop(rand_bbox)
                resized_mask = cropped_mask.resize((resize_width, resize_height), Image.LANCZOS)
                resized_img.save(os.path.join('ISBI2016_ISIC_Part1_Training_Data_processed/',image_filename + '_{:02d}'.format(i+1) + image_file_extension))
                resized_mask.save(os.path.join('ISBI2016_ISIC_Part1_Training_Data_processed/',mask_filename + '_{:02d}'.format(i+1) + mask_file_extension))
    

In [0]:
image_filenames=[]
for root, _, files in os.walk("ISBI2016_ISIC_Part1_Training_Data/"):
    for name in files:
        image_filenames.append(os.path.join(root, name)) 

image_filenames.sort()

mask_filenames=[]
for root, _, files in os.walk("ISBI2016_ISIC_Part1_Training_GroundTruth/"):
    for name in files:
        mask_filenames.append(os.path.join(root, name)) 

mask_filenames.sort()

for image_file, mask_file in zip(image_filenames,mask_filenames):
    print(image_file, mask_file)
    crop_resize_resample(image_file, mask_file)
    

In [0]:
# Install the PyDrive wrapper & import libraries.
# This only needs to be done once per notebook.
!pip install -U -q PyDrive
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

# Authenticate and create the PyDrive client.
# This only needs to be done once per notebook.
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

# Download data from ISBI2016_part1 segmentation task
#
# A file ID looks like: laggVyWshwcyP6kEI-y_W3P8D26sz
folder_id = '1eyrJkGD6Zb0AMgKX8CS8pOBqd3y5gi4v'
file_list = drive.ListFile({'q': "'{}' in parents and trashed=false".format(folder_id)}).GetList()
for i,file1 in enumerate(sorted(file_list, key = lambda x: x['title'])):
    print('Downloading {} from GDrive ({}/{})'.format(file1['title'], i, len(file_list)))
    file1.GetContentFile(file1['title'])

In [0]:
import numpy as np

train_file = open('isbi2016_isic_part1_train.list', 'w')
validation_file = open('isbi2016_isic_part1_val.list', 'w')

for root, _, files in os.walk("."):
    image_filenames=[os.path.abspath(name) for name in files if name.endswith(".jpg")]
    mask_filenames=[os.path.abspath(name) for name in files if name.endswith(".png")]

image_filenames.sort()
mask_filenames.sort()

merged = list(zip(image_filenames, mask_filenames))
np.random.shuffle(merged)

num_val = int(0.1*len(image_filenames))
cnt=0

for image_file, mask_file in merged:
    print(image_file + " " + mask_file)
    cnt +=1
    if cnt > num_val:
      print(image_file + " " + mask_file, file=train_file)
    else:
      print(image_file + " " + mask_file, file=validation_file)
      
      
train_file.close()
validation_file.close()      

In [0]:
# upload the content to data folder in drkadir account.
tgt_folder_id = '1Biqy1Y-pTnAkLFqL1aHARngZ5bGa7VFd'

uploaded = drive.CreateFile({'title':'isbi2016_isic_part1_train.list', 'mimeType':'text/csv',
        "parents": [{"kind": "drive#fileLink","id": tgt_folder_id}]})
uploaded.SetContentFile('isbi2016_isic_part1_train.list')
uploaded.Upload()
print('Uploaded file with ID {}'.format(uploaded.get('id')))

uploaded = drive.CreateFile({'title':'isbi2016_isic_part1_val.list', 'mimeType':'text/csv',
        "parents": [{"kind": "drive#fileLink","id": tgt_folder_id}]})
uploaded.SetContentFile('isbi2016_isic_part1_val.list')
uploaded.Upload()
print('Uploaded file with ID {}'.format(uploaded.get('id')))

In [0]:
!cat isbi2016_isic_part1_val.list