In [4]:
import os
from PIL import Image
from multiprocessing import cpu_count
from concurrent.futures import ProcessPoolExecutor, as_completed

In [5]:
def scan_files(directory, prefix=None, postfix=None):
    files_list = []
    for root, sub_dirs, files in os.walk(directory):
        for special_file in files:
            if postfix:
                if special_file.endswith(postfix):
                    files_list.append(os.path.join(root, special_file))
            elif prefix:
                if special_file.startswith(prefix):
                    files_list.append(os.path.join(root, special_file))
            else:
                files_list.append(os.path.join(root, special_file))
    return files_list

# half the image size
def half_image(image_name, save_dir, depth):
    tokens = image_name.rsplit(os.sep, depth+1)
    image_name_ = os.path.join(save_dir, *tokens[1:])
#     os.makedirs(os.path.dirname(image_name_), exist_ok=True)
    with Image.open(image_name) as image:
        w, h = image.size
        image.resize((w//2, h//2)).save(image_name_)

# half the image size and pad/crop to size 299
def half_and_pad_image(image_name, save_dir, depth, size):
    tokens = image_name.rsplit(os.sep, depth+1)
    image_name_ = os.path.join(save_dir, *tokens[1:])
#     os.makedirs(os.path.dirname(image_name_), exist_ok=True)
    with Image.open(image_name) as image:
        w, h = image.size
        img = image.resize((w//2, h//2))
        img_croped = img.crop(
            (
                -((size - img.size[0]) // 2),
                -((size - img.size[1]) // 2),
                size - ((size - img.size[0]) // 2),
                size - ((size - img.size[1]) // 2)
            )
        )
        img_croped.save(image_name_)
        

def batch_half_image(image_names, save_dir, depth, size):
    for image_name in image_names:
#         half_image(image_name, save_dir, depth)
        half_and_pad_image(image_name, save_dir, depth, size)

In [7]:
def process(cells_dir, cells_dir_half, depth=1, size=299):
    image_names = scan_files(cells_dir, postfix=".jpg")
    print("# images", len(image_names))
    
    executor = ProcessPoolExecutor(max_workers=cpu_count() - 4)
    tasks = []
    
    batch_size = 1000
    for i in range(0, len(image_names), batch_size):
        batch = image_names[i : i+batch_size]
        tasks.append(executor.submit(batch_half_image, batch, cells_dir_half, depth, size))

    job_count = len(tasks)
    for future in as_completed(tasks):
        # result = future.result()  # get the returning result from calling fuction
        job_count -= 1
        print("One Job Done, Remaining Job Count: %s" % (job_count))

In [8]:
cells_dir = "/home/hdd0/Data/Batch5_Xception/Batch5 Train DATA"
cells_dir_half299 = "/home/hdd0/Data/Batch5_Xception/batch5_half299"
for sub_dir in os.listdir(cells_dir):
    os.makedirs(os.path.join(cells_dir_half, sub_dir))

process(cells_dir, cells_dir_half)

# images 132301
One Job Done, Remaining Job Count: 132
One Job Done, Remaining Job Count: 131
One Job Done, Remaining Job Count: 130
One Job Done, Remaining Job Count: 129
One Job Done, Remaining Job Count: 128
One Job Done, Remaining Job Count: 127
One Job Done, Remaining Job Count: 126
One Job Done, Remaining Job Count: 125
One Job Done, Remaining Job Count: 124
One Job Done, Remaining Job Count: 123
One Job Done, Remaining Job Count: 122
One Job Done, Remaining Job Count: 121
One Job Done, Remaining Job Count: 120
One Job Done, Remaining Job Count: 119
One Job Done, Remaining Job Count: 118
One Job Done, Remaining Job Count: 117
One Job Done, Remaining Job Count: 116
One Job Done, Remaining Job Count: 115
One Job Done, Remaining Job Count: 114
One Job Done, Remaining Job Count: 113
One Job Done, Remaining Job Count: 112
One Job Done, Remaining Job Count: 111
One Job Done, Remaining Job Count: 110
One Job Done, Remaining Job Count: 109
One Job Done, Remaining Job Count: 108
One Job D