# Pre-process images from an unsplash.com web page

As unsplash uses continuous scrolling, this code expects a locally saved HTML page containing the required images.

Dependencies:

* Python 3.x
* OpenCV v3.4


In [2]:
import os
import cv2
import numpy as np

* Image directories, etc.

In [3]:
!pwd

/Users/markstrefford/Development/Timelaps/Mask_RCNN/samples/building


In [22]:
orig_images_dir = '../../datasets/building/orig/train'
train_images_dir = '../../datasets/building/center-aligned/train'

* Image resizing

Start with a default size, I may change this later (also note that upscaling to a higher resolution will come later!!)

In [30]:
width = 1024
height = 1024
crop_pc = 0.8

In [31]:
def resize_image(image, height=height, width=width):
    resized = cv2.resize(image, (width, height), interpolation=cv2.INTER_AREA)
    # print('Resized {} to size {}'.format(image, resized.shape))
    return resized

In [32]:
def crop(image, crop_pc):
    cropped_images = []
    h, w, _ = image.shape
    h_crop, w_crop = int(h*crop_pc), int(w*crop_pc)
    center_y, center_x = int((h - h_crop) / 2), int((w - w_crop) / 2)
    mid_crop_y, mid_crop_x = int(center_y / 2), int(center_x / 2)
    # print('h={}, w={}\nh_crop={}, w_crop={}\ncenter_y={}, center_x={}\nmid_crop_y={}, mid_crop_x={}'.
    #     format(h, w, h_crop, w_crop, center_y, center_x, mid_crop_y, mid_crop_x))
    crops = [
        [0, h_crop, 0, w_crop],
        [h - h_crop, h, w - w_crop, w],
        [0, h_crop, w - w_crop, w],
        [h - h_crop, h, 0, w_crop],
        [center_y, center_y+h_crop, center_x, center_x+w_crop],
        [mid_crop_y, mid_crop_y + h_crop,       mid_crop_x, mid_crop_x + w_crop],    # Center left top
        [center_y + mid_crop_y, h - mid_crop_y, mid_crop_x, mid_crop_x + w_crop],    # Center left bottom
        [mid_crop_y, mid_crop_y + h_crop,       center_x + mid_crop_x, w - mid_crop_x],  # Center right top
        [center_y + mid_crop_y, h - mid_crop_y, center_x + mid_crop_x, w - mid_crop_x],  # Center right bottom 
        [0, h, 0, w]
    ]
    for crop in crops:
        # print('Cropping image (h,w)={}/{}, crop=[{}:{}, {}:{}]'.format(h,w,crop[0], crop[1], crop[2], crop[3]))
        cropped = image[crop[0]:crop[1], crop[2]:crop[3]]
        # print('Cropped image size = {}'.format(cropped.shape))
        resized = resize_image(cropped, height, width)
        cropped_images.append(resized)
        flipped = cv2.flip(resized, 1)
        cropped_images.append(flipped)
    return cropped_images
        

* Get a list of images and resize

In [33]:
os.listdir('../../datasets/building/orig/train')

['ph-b-9jNgv1ZyF_I-unsplash.jpg',
 'adrian-metasboc-g17iT6lhXhg-unsplash-copy.jpg',
 'adrien-olichon-hWGBHKm2ROA-unsplash copy 2.jpg',
 'andre-benz-kai3Tl9XPJY-unsplash copy.jpg',
 'cody-isern-1jKnA-T5Uw0-unsplash.jpg',
 'benjamin-massello-ZtaC_NsZhZo-unsplash.jpg',
 'alexandar-todov-yFIl--YMu2U-unsplash.jpg',
 'ross-sneddon-wJO-C6KWy_A-unsplash.jpg',
 'ian-dooley-GywsuABA3Is-unsplash.jpg',
 'alessio-furlan-GITzhvQiQnk-unsplash copy.jpg',
 'adrien-olichon-B3qarctPsDA-unsplash copy.jpg',
 'alex-motoc-c9XKM5QMEpE-unsplash.jpg',
 'abigail-lynn-uqxEcNHBUQU-unsplash.jpg',
 'adrien-olichon-B3qarctPsDA-unsplash.jpg',
 'olenka-kotyk-9TUkYXQKXec-unsplash.jpg',
 'banter-snaps-D_BSiY4yx80-unsplash copy.jpg',
 'gregory-dalleau-KT4dOfvtZSg-unsplash.jpg',
 'roman-lopez-kloWTIWdzy0-unsplash.jpg',
 'todd-quackenbush-USrZRcRS2Lw-unsplash.jpg',
 'dmitry-zilberstein-AKpZKydR25s-unsplash copy.jpg',
 'dan-gold-4qsW2Wq7nvI-unsplash.jpg',
 'farhan-azam-APgCPuWt1Y8-unsplash.jpg',
 'qin-bennie-E12sxEitjRI-unsp

In [34]:

image_files = os.listdir(orig_images_dir)

In [38]:
for file in image_files:
    print('Loading image {}'.format(file))
    if file != ".DS_Store":
        image = cv2.imread(os.path.join(orig_images_dir, file))
        # cropped = crop(image, crop_pc)
    #     for i, c in enumerate(cropped):
    #         out_file = os.path.join(train_images_dir, '{}_{}'.format(i, file))
    #         # print(out_file)                    
    #         cv2.imwrite(out_file, c)
        resized = resize_image(image, height, width)
        out_file = os.path.join(train_images_dir, file)
        cv2.imwrite(out_file, resized)
    

Loading image ph-b-9jNgv1ZyF_I-unsplash.jpg
Loading image adrian-metasboc-g17iT6lhXhg-unsplash-copy.jpg
Loading image adrien-olichon-hWGBHKm2ROA-unsplash copy 2.jpg
Loading image andre-benz-kai3Tl9XPJY-unsplash copy.jpg
Loading image cody-isern-1jKnA-T5Uw0-unsplash.jpg
Loading image benjamin-massello-ZtaC_NsZhZo-unsplash.jpg
Loading image alexandar-todov-yFIl--YMu2U-unsplash.jpg
Loading image ross-sneddon-wJO-C6KWy_A-unsplash.jpg
Loading image ian-dooley-GywsuABA3Is-unsplash.jpg
Loading image alessio-furlan-GITzhvQiQnk-unsplash copy.jpg
Loading image adrien-olichon-B3qarctPsDA-unsplash copy.jpg
Loading image alex-motoc-c9XKM5QMEpE-unsplash.jpg
Loading image abigail-lynn-uqxEcNHBUQU-unsplash.jpg
Loading image adrien-olichon-B3qarctPsDA-unsplash.jpg
Loading image olenka-kotyk-9TUkYXQKXec-unsplash.jpg
Loading image banter-snaps-D_BSiY4yx80-unsplash copy.jpg
Loading image gregory-dalleau-KT4dOfvtZSg-unsplash.jpg
Loading image roman-lopez-kloWTIWdzy0-unsplash.jpg
Loading image todd-quackenb

Loading image benjamin-suter-qXmVkooeuaQ-unsplash.jpg
Loading image dawid-zawila-FIKFv7lg_os-unsplash.jpg
Loading image eva-m-0jUUpxwhDkk-unsplash.jpg
Loading image brayden-law-NbQlbyXF5U0-unsplash.jpg
Loading image roman-kraft-bW-x0hJM6G4-unsplash.jpg
Loading image sergio-junior-ERLYHAkJj40-unsplash.jpg
Loading image benjamin-massello-ZtaC_NsZhZo-unsplash copy 2.jpg
Loading image orcun-ilbeyli-ocIZ01FAH8s-unsplash.jpg
Loading image markus-spiske-EECEsd1GzIg-unsplash.jpg
Loading image rawkkim-jQteagM9KEo-unsplash.jpg
Loading image joshua-chua-PryZwos8rCI-unsplash.jpg
Loading image adi-constantin-C8Z5DvtWQMw-unsplash-copy.jpg
Loading image free-to-use-sounds-0ctXaz0bnXU-unsplash.jpg
Loading image ann-fossa-w_B5NL5mkZs-unsplash.jpg
Loading image pontus-wellgraf-jc-nit5zzLE-unsplash.jpg
Loading image thitiphum-koonjantuek-p_O64eSRH64-unsplash.jpg
Loading image esther-driehaus-He-y7YR9OII-unsplash.jpg
Loading image heidi-kaden-20wbZZgbRP8-unsplash.jpg
Loading image jared-erondu-6Fq2U47SKtE

Loading image helena-lopes-LKmhv-yzmfE-unsplash.jpg
Loading image annie-spratt-TONsFaqY3FE-unsplash.jpg
Loading image javier-martinez-jc3aagEG3xk-unsplash.jpg
Loading image discovering-film-o9vrlWpwFis-unsplash.jpg
Loading image nachelle-nocom-Rv77p0__6SY-unsplash.jpg
Loading image filip-mroz-023T4jyCRqA-unsplash.jpg
Loading image ryan-kwok-KhFOw9qy_0E-unsplash.jpg
Loading image ross-gilmore-b_dMkXFOoEk-unsplash.jpg
Loading image rose-elena-qC2dLkUPJcE-unsplash.jpg
Loading image steven-skerritt-SS8fnlmFENI-unsplash.jpg
Loading image peter-boccia-EZYGziix_Pc-unsplash.jpg
Loading image matthew-henry-nOhUx3tiaQQ-unsplash.jpg
Loading image cody-isern-1jKnA-T5Uw0-unsplash copy.jpg
Loading image bogdana-uncu-hL1-zYZAfxM-unsplash copy.jpg
Loading image dawid-zawila-FIKFv7lg_os-unsplash copy.jpg
Loading image nico-benedickt-T6y2QE9IIfI-unsplash.jpg
Loading image adrien-olichon-W5HfE3FH5gM-unsplash.jpg
Loading image wellesley-yan-xUVeEIsMiQw-unsplash.jpg
Loading image chuttersnap-JH0wCegJsrQ-un

# Augment dataset

Mirror images to give 2x dataset

In [15]:
image_files = os.listdir(base_images_dir)

In [16]:
for file in image_files:
    img = cv2.imread(os.path.join(base_images_dir, file))
    img_flip = cv2.flip(img, 1)
    flipped_file = 'flipped_{}'.format(file)
    print('Writing flipped image to {}'.format(flipped_file))
    cv2.imwrite(os.path.join(base_images_dir, flipped_file), img_flip)

Writing flipped image to flipped_joshua-harvey-478746-unsplash copy.jpg
Writing flipped image to flipped_anders-jilden-24221-unsplash.jpg
Writing flipped image to flipped_aleks-dahlberg-270040-unsplash.jpg
Writing flipped image to flipped_lightscape-737133-unsplash.jpg
Writing flipped image to flipped_matthias-tillen-1443873-unsplash.jpg
Writing flipped image to flipped_khalil-benihoud-1067393-unsplash.jpg
Writing flipped image to flipped_micael-widell-520896-unsplash.jpg
Writing flipped image to flipped_simon-migaj-631016-unsplash.jpg
Writing flipped image to flipped_alexander-milo-428649-unsplash.jpg
Writing flipped image to flipped_alexander-milo-440023-unsplash.jpg
Writing flipped image to flipped_johannes-groll-253834-unsplash.jpg
Writing flipped image to flipped_jonatan-pie-620981-unsplash copy 2.jpg
Writing flipped image to flipped_dan-russon-511491-unsplash.jpg
Writing flipped image to flipped_matthias-tillen-1443866-unsplash.jpg
Writing flipped image to flipped_jonatan-pie-224

Writing flipped image to flipped_naveen-raj-dhanapal-1077470-unsplash.jpg
Writing flipped image to flipped_oldskool-photography-144777-unsplash.jpg
Writing flipped image to flipped_lightscape-737149-unsplash copy 2.jpg
Writing flipped image to flipped_david-becker-670925-unsplash.jpg
Writing flipped image to flipped_pascal-debrunner-585771-unsplash.jpg
Writing flipped image to flipped_scott-rock-49527-unsplash copy.jpg
Writing flipped image to flipped_kerensa-pickett-282272-unsplash.jpg
Writing flipped image to flipped_mark-autumns-1123858-unsplash.jpg
Writing flipped image to flipped_lorenzo-castagnone-404821-unsplash copy.jpg
Writing flipped image to flipped_chris-ried-664989-unsplash.jpg
Writing flipped image to flipped_oldskool-photography-183943-unsplash.jpg
Writing flipped image to flipped_cameron-worsley-223741-unsplash.jpg
Writing flipped image to flipped_matthias-tillen-1443894-unsplash.jpg
Writing flipped image to flipped_yang-xi-1193303-unsplash.jpg
Writing flipped image to 

Writing flipped image to flipped_josiah-ingels-1307365-unsplash copy 2.jpg
Writing flipped image to flipped_landon-arnold-674959-unsplash.jpg
Writing flipped image to flipped_jonathan-wheeler-737456-unsplash.jpg
Writing flipped image to flipped_kevin-lofthouse-1191108-unsplash.jpg
Writing flipped image to flipped_joshua-harvey-478746-unsplash.jpg
Writing flipped image to flipped_jonatan-pie-400904-unsplash.jpg
Writing flipped image to flipped_christoffer-engstrom-576870-unsplash.jpg
Writing flipped image to flipped_lai-man-nung-1226389-unsplash.jpg
Writing flipped image to flipped_kevin-healy-538808-unsplash.jpg
Writing flipped image to flipped_emily-hon-650064-unsplash.jpg
Writing flipped image to flipped_simon-sun-669781-unsplash copy.jpg
Writing flipped image to flipped_vincent-guth-136868-unsplash copy.jpg
Writing flipped image to flipped_jack-cain-347422-unsplash.jpg
Writing flipped image to flipped_jonatan-pie-1196629-unsplash.jpg
Writing flipped image to flipped_josiah-ingels-13