In [1]:
import requests

In [2]:
import numpy as np

In [5]:
from PIL import Image

In [3]:
import cv2

In [4]:
import os

In [6]:
import matplotlib.pyplot as plt

In [7]:
os.chdir('Documents/Projects/ImageRec')

In [68]:
def img_save(query_term, offset):
    '''
    saves and processes images for specified query
    requires a folder to be made in the working directory
    '''
    
    API_KEY = os.getenv('bing_search_api_key')
    URL = "https://api.cognitive.microsoft.com/bing/v5.0/images/search"
    USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)' + \
                 'Chrome/80.0.3987.87 Safari/537.36'
    
    # set path with query term as file name
    path = os.getcwd().replace('\\', '/') + '/' + str(query_term) + '/'
        
    headers = {"Ocp-Apim-Subscription-Key" : API_KEY}
    params = {"q": query_term,
              "count": 150, 
              "offset": offset}

    # Search Bing for images
    search = requests.get(URL, headers=headers, params=params)
    results = search.json()

    # Save all of the resulting images from each page
    num = 0
    for value in results['value']:

        image = requests.get(value["contentUrl"], timeout=30, headers={'User-Agent': USER_AGENT})
        
        # Check the status of the request - If the image does not exist we will skip it
        try:
            image.raise_for_status()
            
            file = open(path + query_term + '_' + str(offset) + '_' + str(num) + '.png', 'wb')
            file.write(image.content)
            file.close()
            
        except(requests.HTTPError):
            print(str(num) + ' NOT FOUND')
            
        num += 1

In [10]:
def img_format(folder):
    '''
    read in all images in given folder, format them and save them to a different folder
    requires a second folder
    '''
    
    # iterate through each file in the specified folder
    for file in os.listdir(folder):
        
        # Read in the image
        # CV2 uses BGR so we need to switch to RGB
        #img = cv2.cvtColor(cv2.imread(folder + '/' + file), cv2.COLOR_BGR2RGB)
        img = np.array(Image.open(folder + '/' + file))
        
        # Identify which sides need to be padded and by how much, to make the image square
        short = np.argmin(img.shape[:2])
        diff_1 = int(np.ceil(abs(img.shape[1] - img.shape[0])/2))
        diff_2 = int(np.floor(abs(img.shape[1] - img.shape[0])/2))
        
        # Set the desired padding on the short side, and apply
        width = [[0, 0], [0, 0], [0, 0]]
        width[short] = [diff_1, diff_2]
        
        img = np.pad(img, pad_width=width)
        
        # Resize square image to 100x100
        img = cv2.resize(img, (100, 100))
        
        # TODO: We may not need this
        # Save image to second folder
        #cv2.imwrite(folder + '_formatted/' + file, img)
        Image.fromarray(img).save(folder + '_formatted/' + file)