# Import

In [1]:
import os
import random
import shutil
from functools import partial
from pathlib import Path

import pandas as pd
import torchvision.transforms.functional as F
from joblib import Parallel, delayed
from PIL import Image
from tqdm.notebook import tqdm

# Config

In [2]:
dir_data = Path('/home/blaberj/justinblaber/deoldify/data')
dir_imgs = dir_data/'imgs'

# Utility

In [3]:
def parallel(f, it, n_jobs=6):
    Parallel(n_jobs=n_jobs)(delayed(f)(i) for i in tqdm(it));

# Clean

Delete images which are corrupt

In [4]:
def _validate_image(file_img):
    try:
        Image.open(file_img)
    except:
        print(f'{file_img} is corrupt; removing...')
        file_img.unlink()        

In [5]:
parallel(_validate_image, dir_imgs.rglob('*'))

HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))




# Make sure all images are color

In [6]:
def _process_img(f):
    img = np.array(Image.open(f))
    if len(img.shape) == 2 or \
       np.array_equal(img[:,:,0], img[:,:,1]) or \
       np.array_equal(img[:,:,0], img[:,:,2]) or \
       np.array_equal(img[:,:,1], img[:,:,2]):
        print(f)
        f.unlink()

In [7]:
parallel(_process_img, dir_imgs.rglob('*'))

HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))




# Pre-resize and crop images

Resize while maintaining aspect ratio, crop to size, then save in separate folders

In [8]:
def _img_pipeline(file_img, sz):
    img = Image.open(file_img)
    img = F.resize(img, sz)
    img = F.center_crop(img, sz)
    img.save(file_img)

In [9]:
def _parallel_img_pipeline(dir_imgs, sz):
    parallel(partial(_img_pipeline, sz=sz), dir_imgs.rglob('*'))

In [11]:
_parallel_img_pipeline(dir_imgs, 224)

HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))




# Make grayscale images

In [4]:
files_img = list(dir_imgs.glob('*.jpg'))

In [5]:
dir_colr = dir_imgs/'colr'
dir_gray = dir_imgs/'gray'

In [10]:
dir_colr.mkdir(exist_ok=True)
dir_gray.mkdir(exist_ok=True)

In [9]:
parallel(lambda x: shutil.move(x.as_posix(), dir_colr), files_img)

HBox(children=(FloatProgress(value=0.0, max=119907.0), HTML(value='')))




In [18]:
parallel(lambda x: Image.open(x).convert('L').save(dir_gray/x.name), dir_colr.glob('*.jpg'))

HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))


