## Data loading and saving

In [11]:
from psd_tools import PSDImage
from PIL import Image
from dotenv import load_dotenv

import sys
sys.path.append('../src')

import numpy as np
import io
import glob
import utils.constant as constant
import os

%matplotlib inline

### Set input and output directories

In [3]:
load_dotenv()

input_dir = os.getenv('INPUT_DIR')
im_dir = os.getenv('IMAGE_DIR')
gt_dir = os.getenv('MASK_DIR')

filenames = glob.glob(input_dir, recursive=True)
num_images = len(filenames)

### Read photoshop files and save as numPy arrays

In [None]:
# Create output directories 
if not os.path.exists(im_dir):
    os.makedirs(im_dir)
if not os.path.exists(gt_dir):
    os.makedirs(gt_dir)

num_images = len(filenames)
i = 0
for filename in filenames:
    im = []
    mask = []
    gt = []
    
    # Print status
    if i % 5 == 0:
        print(i, '/', num_images, 'images processed')

    # Open photoshop image
    try:
        psd = PSDImage.open(filename)
    except:
        continue
        
    im_name = filename.split('/')[-1].split('.')[0]+'.npy'
    
    # Iterate through individual layers
    for layer in psd:
        
        # Convert image to PIL Image
        if layer.name == constant.ORIGINAL:
            im = np.array(layer.topil())  
            dim = im.shape
            
        # Convert mask to PIL Image
        elif layer.name == constant.MASK:
            try:
                mask = np.array(layer.topil())[:,:,3]/255.  # Select only last channel and normalize
            except:
                continue
                
            # Construct mask that matches image dimensions - input mask is clipped and needs padding
            mask_height = mask.shape[0]
            gt = np.zeros((dim[0],dim[1]))
            offset = layer.offset[1]
            
            # Adjust mask location by # of offset pixels in y direction
            try:
                gt[offset:offset+mask_height,:] = mask
            except:
                print(i,'non-matching dimensions')
                gt = [] 

    # Serialize images as numPy if there exists a valid image and mask
    if len(im) > 0 and len(gt) > 0:
        np.save(os.path.join(im_dir,im_name), im)
        np.save(os.path.join(gt_dir,im_name), gt)
        
    i+=1

### Read photoshop files and save a compressed jpegs - for classification

In [None]:
import time

jpg_dir = os.getenv('JPG_DIR')

# Create output directories 
if not os.path.exists(jpg_dir):
    os.makedirs(jpg_dir)

num_images = len(filenames)
i = 0
for filename in filenames:
    start_time = time.time()
    im = []
    
    # Print status
    if i % 5 == 0:
        print(i, '/', num_images, 'images processed')

    # Open photoshop image
    try:
        psd = PSDImage.open(filename)
    except:
        continue
        
    im_name = filename.split('/')[-1].split('.')[0]+'.jpg'
    
    # Iterate through individual layers
    for layer in psd:
        
        # Convert image to PIL Image
        if layer.name == constant.ORIGINAL:
            im = layer.topil()
            im = im.thumbnail(size=4000)
            im.save(os.path.join(jpg_dir,im_name), format='JPEG')
    i+=1
    print("--- %s seconds ---" % (time.time() - start_time))


0 / 498 images processed
--- 4.378073692321777 seconds ---
--- 12.12387490272522 seconds ---
--- 7.599055528640747 seconds ---
--- 5.7538042068481445 seconds ---
--- 10.769662380218506 seconds ---
5 / 498 images processed
--- 10.764729976654053 seconds ---
--- 10.206562042236328 seconds ---
--- 8.65841269493103 seconds ---
--- 10.089845180511475 seconds ---
--- 9.410696268081665 seconds ---
10 / 498 images processed
--- 4.273073196411133 seconds ---
--- 4.223832130432129 seconds ---
--- 4.082217216491699 seconds ---
--- 4.368122339248657 seconds ---
--- 4.108163595199585 seconds ---
15 / 498 images processed
--- 4.156564235687256 seconds ---
--- 4.138370990753174 seconds ---
--- 4.081676721572876 seconds ---
--- 3.910816192626953 seconds ---
--- 4.083730697631836 seconds ---
20 / 498 images processed
--- 4.148123741149902 seconds ---
--- 3.975846290588379 seconds ---
--- 4.16506028175354 seconds ---
--- 4.3236308097839355 seconds ---
--- 4.154120683670044 seconds ---
25 / 498 images pr

--- 3.9759466648101807 seconds ---
--- 3.958531618118286 seconds ---
210 / 498 images processed
--- 4.268280982971191 seconds ---
--- 4.095633506774902 seconds ---
--- 4.058404922485352 seconds ---
--- 4.119487524032593 seconds ---
--- 3.9696779251098633 seconds ---
215 / 498 images processed
--- 3.9558000564575195 seconds ---
--- 4.102509498596191 seconds ---
--- 4.002611398696899 seconds ---
--- 3.9102187156677246 seconds ---
--- 3.9850449562072754 seconds ---
220 / 498 images processed
--- 4.005164861679077 seconds ---
--- 3.9496419429779053 seconds ---
--- 3.9638733863830566 seconds ---
--- 4.0265562534332275 seconds ---
--- 4.067943334579468 seconds ---
225 / 498 images processed
--- 4.147687673568726 seconds ---
--- 4.042124509811401 seconds ---
--- 4.011808156967163 seconds ---
--- 4.039259433746338 seconds ---
--- 4.1917643547058105 seconds ---
230 / 498 images processed
--- 4.092013359069824 seconds ---
--- 4.0440754890441895 seconds ---
--- 4.177356004714966 seconds ---
--- 3

Invalid signature (b'\x00\x00\x00\x00')


--- 3.4009640216827393 seconds ---
--- 4.990818500518799 seconds ---
--- 4.6767964363098145 seconds ---
--- 4.750550031661987 seconds ---
360 / 498 images processed
--- 4.738431453704834 seconds ---
--- 4.797611474990845 seconds ---
--- 5.693897247314453 seconds ---
--- 4.974083423614502 seconds ---
--- 4.952880144119263 seconds ---
365 / 498 images processed
--- 4.990387439727783 seconds ---
--- 4.758399248123169 seconds ---
--- 4.826606035232544 seconds ---
--- 6.734043598175049 seconds ---
--- 4.903717279434204 seconds ---
370 / 498 images processed
--- 4.586228609085083 seconds ---
--- 5.054441928863525 seconds ---
--- 5.153325080871582 seconds ---
--- 6.302938461303711 seconds ---
--- 4.871473073959351 seconds ---
375 / 498 images processed
--- 3.9786665439605713 seconds ---
--- 4.647852182388306 seconds ---
--- 1.1217341423034668 seconds ---
--- 4.705688953399658 seconds ---
--- 4.751446962356567 seconds ---
380 / 498 images processed
--- 4.8450047969818115 seconds ---
--- 4.7026