In [1]:
import os
import numpy as np
import cv2 
import matplotlib.pyplot as plt
import matplotlib.image as img
import re

In [6]:
# Provenance folder is always ./downloads if image_scraper.py is used
provenance_path = r"./downloads"
destination_path = r"./preprocessedImages/"
new_size = 256

In [3]:
def load_images_paths(path):
    '''Load the path to each image from path folder in an array'''
    images_array = [os.path.join(path,file) for file in os.listdir(path)]
    return images_array

def display_image(image):
    '''Display one image'''
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # cv2 is BGR and pyplot RGB so need to convert to display
    plt.imshow(image_rgb)
    plt.xticks([]), plt.yticks([])
    plt.show()
    
def display_image_RGB(image):
    '''Display one image that's already in RGB'''
    plt.imshow(image)
    plt.xticks([]), plt.yticks([])
    plt.show()
    print(image.shape)
    
def load_image(path):
    '''Load one image using its path'''
    image = cv2.imread(path)
    return image

def resize_image(image, size):
    '''Resize 1:1 image to size by size'''
    dimensions = (size,size)
    image_resized = cv2.resize(image,dimensions)
    return image_resized

def crop_image(image):
    '''Crop image to 1:1 format'''
    dimensions = image.shape
    if dimensions[0] > dimensions[1]:
        l = (dimensions[0]-dimensions[1])//2
        cropped_image = image[l:dimensions[1]+l,:,:].copy()
    else:
        l = (dimensions[1]-dimensions[0])//2
        cropped_image = image[:,l:dimensions[0]+l,:].copy()
    return cropped_image
    
def preprocess_and_save_images(prov_folder, dest_folder):
    '''Preprocess and save all images from prov_folder and save them in dest_folder'''
    # First load all the images' paths
    images_paths = [file for file in os.listdir(prov_folder)]
    # And create the destination folder if it does not exist
    if not os.path.exists(dest_folder):
        os.makedirs(dest_folder)
    # Then loop through each one of them
    for image_path in images_paths:
        try:
            # Load the image
            temp_image = load_image(os.path.join(prov_folder, image_path))
            # Crop it
            temp_image = crop_image(temp_image)
            # Resize it
            temp_image = resize_image(temp_image, new_size)
            # And save it in the destination folder as png
            file_name = re.search(r"(.*)(?=\.)", image_path)
            new_path = os.path.join(dest_folder,file_name.group(0) + '.png')
            cv2.imwrite(new_path,temp_image)
        except:
            print("Image " + file_name.group(0) + " could not be loaded/processed")
    print("Saved all preprocessed images to desired folder")
        

In [7]:
# Chose all subfolders from provenance_path that you want to preprocess images from
subfolders = ['morbier']
for root, dirs, files in os.walk(provenance_path):
    for name in dirs:
        subfolders.append(name)
        
for subfolder in subfolders:
    preprocess_and_save_images(os.path.join(provenance_path, subfolder), os.path.join(destination_path, subfolder))

Saved all preprocessed images to desired folder
Saved all preprocessed images to desired folder
