In [1]:
from PIL import Image, ImageOps
import os
import numpy as np

# Image Processing
For all the images in the given data folder, it does the following:
- removes alpha channel, if given
- pads the image so that is has squared shape
- resizes image
- if the image is grayscale, it adds a third channel
- overwrites the processed image

In [11]:
data = '../../../data/fashion/dresses'

In [3]:
def pad_image(img):

    width, height = img.size
    
    max_size = max(width, height)
    
    pad_height = max_size - height
    pad_width = max_size - width
        
    padding = (pad_width // 2, 
               pad_height // 2, 
               pad_width - (pad_width // 2), 
               pad_height - (pad_height // 2))
        
    padded_img = ImageOps.expand(img, padding, fill=(255,255,255))
    return padded_img

In [4]:
def remove_alpha(img):
    
    if img.mode == 'RGBA':

        img.load()  # required for png.split()
        image_jpeg = Image.new("RGB", img.size, (255, 255, 255))
        image_jpeg.paste(img, mask=img.split()[3])  # 3 is the alpha channel
        img = image_jpeg
        
    return img

In [5]:
def resize_image(img, size):
    return img.resize(size, Image.ANTIALIAS)

In [12]:
for dirpath, dirnames, filenames in os.walk(data):
    
    images = [f for f in filenames if f.endswith(".jpg")]
    print('Directory: {}'.format(dirpath, len(images)))
    
    for idx, filename in enumerate(images):
        if idx % 1000 == 0:
            print('processed images: ', idx)
        
        try:
        
            img_path = os.path.join(dirpath, filename)
            img = Image.open(img_path)
            
            if img.size == (256, 256):
                continue
            else:
                img = remove_alpha(img)
                img = pad_image(img)
                img = resize_image(img, size=[256,256])

                img.save(img_path)
            
        except Exception as e:
            print(filename, e)

Directory: ../../../data/fashion/dresses
processed images:  0
processed images:  1000
processed images:  2000
processed images:  3000
processed images:  4000
processed images:  5000
processed images:  6000
processed images:  7000
processed images:  8000
processed images:  9000
processed images:  10000
processed images:  11000
processed images:  12000


In [9]:
for dirpath, dirnames, filenames in os.walk(data):
    
    images = [f for f in filenames if f.endswith(".jpg")]
    print('Directory: {}'.format(dirpath, len(images)))
    
    for idx, filename in enumerate(images):
        if idx % 1000 == 0:
            print('processed images: ', idx)
            
        try:
        
            img_path = os.path.join(dirpath, filename)
            img = Image.open(img_path)
            data = np.asarray(img)
            
            if data.shape != (256, 256, 3):
                print(filename, data.shape)
                data_RGB = np.repeat(data[:, :, np.newaxis], 3, axis=2)
                print(data_RGB.shape)
                img_RGB = Image.fromarray(data_RGB)
                img_RGB.save(img_path)
                
            
        except Exception as e:
            print(filename, e)

Directory: ../../../data/fashion/dresses
processed images:  0
8aa61dd0a0b32721b388c913fc79631d.jpg (506, 400)
(506, 400, 3)
processed images:  1000
3c6ca61c52aed8557b66e1fe6f70a105.jpg (575, 400)
(575, 400, 3)
4b885dadd4b449d04b5629c011225074.jpg (575, 400)
(575, 400, 3)
processed images:  2000
processed images:  3000
processed images:  4000
e38e4172a0e1e410be71ab9c2e079556.jpg (533, 400)
(533, 400, 3)
ff2d22b07f83d589650eff9b69091cd2.jpg (576, 400)
(576, 400, 3)
processed images:  5000
145f0278f87ffd12019c39296ecece3c.jpg (533, 399)
(533, 399, 3)
processed images:  6000
ce0a5a72b14ad802af917593770978fb.jpg (575, 400)
(575, 400, 3)
ab3bc8f476d1f6c803ce6b7fb8a5a903.jpg (575, 400)
(575, 400, 3)
processed images:  7000
42e425ec2940e0f1fd07add30e06b04c.jpg (575, 400)
(575, 400, 3)
a491de30a7688408d1b2e98a744b92e8.jpg (533, 400)
(533, 400, 3)
processed images:  8000
processed images:  9000
processed images:  10000
processed images:  11000
b02f0fd3a59c2ce75e1491cca464ff30.jpg (575, 400)
(575