In [1]:
import os
import cv2
from multiprocessing import cpu_count
from concurrent.futures import ProcessPoolExecutor, as_completed

In [2]:
def scan_files(directory, prefix=None, postfix=None):
    files_list = []
    for root, sub_dirs, files in os.walk(directory):
        for special_file in files:
            if postfix:
                if special_file.endswith(postfix):
                    files_list.append(os.path.join(root, special_file))
            elif prefix:
                if special_file.startswith(prefix):
                    files_list.append(os.path.join(root, special_file))
            else:
                files_list.append(os.path.join(root, special_file))
    return files_list

In [3]:
def resize(img_name, size, save_path, label="NORMAL"):
    img = cv2.imread(img_name)
    img = cv2.resize(img, (size, size))
    img_name_new = os.path.join(save_path, label, os.path.basename(img_name))
    cv2.imwrite(img_name_new, img)
    
def batch_process(img_names, size, save_path):
    for img_name in img_names:
        resize(img_name, size, save_path)
        
def main(data_path, save_path, size=299):
    files = scan_files(data_path, postfix=".bmp")
    print("# files:", len(files))

    executor = ProcessPoolExecutor(max_workers=4)
    tasks = []

    batch_size = 10000
    for i in range(0, len(files), batch_size):
        batch = files[i : i+batch_size]
        # batch_process(batch, size, save_path)
        tasks.append(executor.submit(batch_process, batch, size, save_path))
    
    job_count = len(tasks)
    for future in as_completed(tasks):
        # result = future.result()  # get the returning result from calling fuction
        job_count -= 1
        print("One Job Done, Remaining Job Count: %s" % (job_count))

In [4]:
data_path = "/home/TMP10T/Develop/liyu/batch6.4-608-to-299/selected_neg"
save_path = "/home/TMP10T/Develop/liyu/batch6.4-608-to-299/original-hls09-rotated/train"

main(data_path, save_path)

# files: 80604
One Job Done, Remaining Job Count: 8
One Job Done, Remaining Job Count: 7
One Job Done, Remaining Job Count: 6
One Job Done, Remaining Job Count: 5
One Job Done, Remaining Job Count: 4
One Job Done, Remaining Job Count: 3
One Job Done, Remaining Job Count: 2
One Job Done, Remaining Job Count: 1
One Job Done, Remaining Job Count: 0
