In [1]:
from PIL import Image, ImageOps
import os
import numpy as np

# Image Processing
For all the images in the given data folder, it does the following:
- removes alpha channel, if given
- pads the image so that is has squared shape
- resizes image
- if the image is grayscale, it adds a third channel
- overwrites the processed image

In [2]:
data = '../../../data/fashion_models/dresses/'

In [3]:
def pad_image(img):

    width, height = img.size
    
    max_size = max(width, height)
    
    pad_height = max_size - height
    pad_width = max_size - width
        
    padding = (pad_width // 2, 
               pad_height // 2, 
               pad_width - (pad_width // 2), 
               pad_height - (pad_height // 2))
        
    padded_img = ImageOps.expand(img, padding, fill=(255,255,255))
    return padded_img

In [4]:
def remove_alpha(img):
    
    if img.mode == 'RGBA':

        img.load()  # required for png.split()
        image_jpeg = Image.new("RGB", img.size, (255, 255, 255))
        image_jpeg.paste(img, mask=img.split()[3])  # 3 is the alpha channel
        img = image_jpeg
        
    return img

In [5]:
def resize_image(img, size):
    return img.resize(size, Image.ANTIALIAS)

In [6]:
for dirpath, dirnames, filenames in os.walk(data):
    
    images = [f for f in filenames if f.endswith(".jpg")]
    print('Directory: {}'.format(dirpath, len(images)))
    
    for idx, filename in enumerate(images):
        if idx % 1000 == 0:
            print('processed images: ', idx)
        
        try:
        
            img_path = os.path.join(dirpath, filename)
            img = Image.open(img_path)
            
            if img.size == (256, 256):
                continue
            else:
                img = remove_alpha(img)
                img = pad_image(img)
                img = resize_image(img, size=[256,256])

                img.save(img_path)
            
        except Exception as e:
            print(filename, e)

Directory: ../../../data/fashion_models/dresses/
processed images:  0
processed images:  1000
processed images:  2000
processed images:  3000
processed images:  4000
processed images:  5000
processed images:  6000
processed images:  7000
processed images:  8000
processed images:  9000
VIL1881001000002@4.jpg function takes exactly 1 argument (3 given)
processed images:  10000
H4421C0MP-Q11@2.jpg function takes exactly 1 argument (3 given)
processed images:  11000
processed images:  12000
processed images:  13000
processed images:  14000
processed images:  15000
processed images:  16000
D5721C00J-Q11@1.jpg function takes exactly 1 argument (3 given)
processed images:  17000
processed images:  18000
D5721C00J-Q11@0.jpg function takes exactly 1 argument (3 given)
processed images:  19000
M84280-001-XS@3.jpg function takes exactly 1 argument (3 given)
processed images:  20000
processed images:  21000
processed images:  22000
processed images:  23000
processed images:  24000
processed images

In [7]:
for dirpath, dirnames, filenames in os.walk(data):
    
    images = [f for f in filenames if f.endswith(".jpg")]
    print('Directory: {}'.format(dirpath, len(images)))
    
    for idx, filename in enumerate(images):
        if idx % 1000 == 0:
            print('processed images: ', idx)
            
        try:
        
            img_path = os.path.join(dirpath, filename)
            img = Image.open(img_path)
            data = np.asarray(img)
            
            if data.shape != (256, 256, 3):
                print(filename, data.shape)
                data_RGB = np.repeat(data[:, :, np.newaxis], 3, axis=2)
                print(data_RGB.shape)
                img_RGB = Image.fromarray(data_RGB)
                img_RGB.save(img_path)
                
            
        except Exception as e:
            print(filename, e)

Directory: ../../../data/fashion_models/dresses/
processed images:  0
processed images:  1000
processed images:  2000
processed images:  3000
processed images:  4000
processed images:  5000
processed images:  6000
processed images:  7000
processed images:  8000
processed images:  9000
VIL1881001000002@4.jpg (341, 256)
(341, 256, 3)
processed images:  10000
H4421C0MP-Q11@2.jpg (369, 256)
(369, 256, 3)
processed images:  11000
processed images:  12000
processed images:  13000
processed images:  14000
processed images:  15000
processed images:  16000
D5721C00J-Q11@1.jpg (369, 256)
(369, 256, 3)
processed images:  17000
processed images:  18000
D5721C00J-Q11@0.jpg (369, 256)
(369, 256, 3)
processed images:  19000
M84280-001-XS@3.jpg (469, 256)
(469, 256, 3)
processed images:  20000
processed images:  21000
processed images:  22000
processed images:  23000
processed images:  24000
processed images:  25000
processed images:  26000
processed images:  27000
processed images:  28000
40530412060