In [1]:
from PIL import Image, ImageOps
import os
import numpy as np

# Image Processing
For all the images in the given data folder, it does the following:
- removes alpha channel, if given
- pads the image so that is has squared shape
- resizes image
- if the image is grayscale, it adds a third channel
- overwrites the processed image

In [7]:
data = '../../../data/aboutyou_models/'

In [8]:
def pad_image(img):

    width, height = img.size
    
    max_size = max(width, height)
    
    pad_height = max_size - height
    pad_width = max_size - width
        
    padding = (pad_width // 2, 
               pad_height // 2, 
               pad_width - (pad_width // 2), 
               pad_height - (pad_height // 2))
        
    padded_img = ImageOps.expand(img, padding, fill=(255,255,255))
    return padded_img

In [9]:
def remove_alpha(img):
    
    if img.mode == 'RGBA':

        img.load()  # required for png.split()
        image_jpeg = Image.new("RGB", img.size, (255, 255, 255))
        image_jpeg.paste(img, mask=img.split()[3])  # 3 is the alpha channel
        img = image_jpeg
        
    return img

In [10]:
def resize_image(img, size):
    return img.resize(size, Image.ANTIALIAS)

In [11]:
for dirpath, dirnames, filenames in os.walk(data):
    
    images = [f for f in filenames if f.endswith(".jpg")]
    print('Directory: {}'.format(dirpath, len(images)))
    
    for idx, filename in enumerate(images):
        if idx % 1000 == 0:
            print('processed images: ', idx)
        
        try:
        
            img_path = os.path.join(dirpath, filename)
            img = Image.open(img_path)
            
            if img.size == (256, 256):
                continue
            else:
                img = remove_alpha(img)
                img = pad_image(img)
                img = resize_image(img, size=[256,256])

                img.save(img_path)
            
        except Exception as e:
            print(filename, e)

Directory: ../../../data/aboutyou_models/
Directory: ../../../data/aboutyou_models/kleider
processed images:  0
AAG0251001000001@1.jpg cannot identify image file '../../../data/aboutyou_models/kleider/AAG0251001000001@1.jpg'
ASB0075001001000@1.jpg cannot identify image file '../../../data/aboutyou_models/kleider/ASB0075001001000@1.jpg'
5627646_489931134@3.jpg cannot identify image file '../../../data/aboutyou_models/kleider/5627646_489931134@3.jpg'
5627646_489931134@2.jpg cannot identify image file '../../../data/aboutyou_models/kleider/5627646_489931134@2.jpg'
70613-270-XL@4.jpg function takes exactly 1 argument (3 given)
605382106@4.jpg function takes exactly 1 argument (3 given)
5021893_482618536@3.jpg cannot identify image file '../../../data/aboutyou_models/kleider/5021893_482618536@3.jpg'
692400345@2.jpg function takes exactly 1 argument (3 given)
ASB0075001001000@2.jpg cannot identify image file '../../../data/aboutyou_models/kleider/ASB0075001001000@2.jpg'
4251250717361@6.jpg c

KeyboardInterrupt: 

In [None]:
for dirpath, dirnames, filenames in os.walk(data):
    
    images = [f for f in filenames if f.endswith(".jpg")]
    print('Directory: {}'.format(dirpath, len(images)))
    
    for idx, filename in enumerate(images):
        if idx % 1000 == 0:
            print('processed images: ', idx)
            
        try:
        
            img_path = os.path.join(dirpath, filename)
            img = Image.open(img_path)
            data = np.asarray(img)
            
            if data.shape != (256, 256, 3):
                print(filename, data.shape)
                data_RGB = np.repeat(data[:, :, np.newaxis], 3, axis=2)
                print(data_RGB.shape)
                img_RGB = Image.fromarray(data_RGB)
                img_RGB.save(img_path)
                
            
        except Exception as e:
            print(filename, e)