In [1]:
import os
from PIL import Image
from time import time, sleep
from concurrent.futures import ProcessPoolExecutor
from tqdm.notebook import tqdm

In [2]:
input_size = (75, 75)
output_size = (20, 20)

INPUT_PATH = './images_75x75/'
OUTPUT_PATH = f'./images_{output_size[0]}x{output_size[1]}/'

max_workers = 8
chunksize = 100

In [3]:
files_names = os.listdir(INPUT_PATH)
input_files = [os.path.join(INPUT_PATH,file) for file in files_names]
output_files = [os.path.join(OUTPUT_PATH,file) for file in files_names]

try:
    os.mkdir(OUTPUT_PATH)
except FileExistsError:
    print('Directory already exists')
output_already_exists = os.listdir(OUTPUT_PATH)

filtered_input_files = []
for file in files_names:
    if file not in output_already_exists:
        filtered_input_files.append(file)
    
print(f'Found {len(input_files)} files in input path')
print(f'Found {len(output_already_exists)} files in output path')
print(f'\n{len(filtered_input_files)} files in input path are not in output path.\n{len(filtered_input_files)} files will be resized.')

Found 119272 files in input path
Found 0 files in output path

119272 files in input path are not in output path.
119272 files will be resized.


In [4]:
def resize_image(input_file, output_file):
    
    try:
        im = Image.open(input_file)
    except:
        print(f'Error opening {input_file}')
        return -1
    
    if im.size != input_size:
        print(f'Warning! File {input_file} is sized {im.size}')
    try:
        imr = im.resize(output_size, resample=Image.BILINEAR)
    except:
        print(f'Error resizing {input_file}')
        return -2
    
    try:
        imr.save(output_file)
    except:
        print(f'Error saving {output_file}')
        return -3
    else:    
        return 1


def split_in_chunks(input_files, output_files, chunksize):
    assert len(input_files)==len(output_files)
    ntotal = len(input_files)
    ind = list(range(0,ntotal,chunksize)) + [ntotal]
    input_files_chunks = [input_files[ind[i]:ind[i+1]] for i in range(len(ind)-1)]
    output_files_chunks = [output_files[ind[i]:ind[i+1]] for i in range(len(ind)-1)]
    return (input_files_chunks, output_files_chunks)

In [5]:
input_files_chunks, output_files_chunks = split_in_chunks(input_files, output_files, chunksize)

results = []
for inp_f, out_f in tqdm(zip(input_files_chunks, output_files_chunks), total=len(input_files_chunks)):
    with ProcessPoolExecutor(max_workers = max_workers) as pool:
        res = pool.map(resize_image, inp_f, out_f)
    results.append(list(res))    

HBox(children=(IntProgress(value=0, max=1193), HTML(value='')))


