In [2]:
import numpy as np

In [1]:
import requests
import os

In [3]:
import cv2
from PIL import Image

In [7]:
os.chdir('Documents/Projects/ImageRec')

In [6]:
import matplotlib.pyplot as plt

In [68]:
def img_save(query_term, offset):
    '''
    saves and processes images for specified query
    requires a folder to be made in the working directory
    '''
    
    API_KEY = os.getenv('bing_search_api_key')
    URL = "https://api.cognitive.microsoft.com/bing/v5.0/images/search"
    USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)' + \
                 'Chrome/80.0.3987.87 Safari/537.36'
    
    # set path with query term as file name
    path = os.getcwd().replace('\\', '/') + '/' + str(query_term) + '/'
        
    headers = {"Ocp-Apim-Subscription-Key" : API_KEY}
    params = {"q": query_term,
              "count": 150, 
              "offset": offset}

    # Search Bing for images
    search = requests.get(URL, headers=headers, params=params)
    results = search.json()

    # Save all of the resulting images from each page
    num = 0
    for value in results['value']:

        image = requests.get(value["contentUrl"], timeout=30, headers={'User-Agent': USER_AGENT})
        
        # Check the status of the request - If the image does not exist we will skip it
        try:
            image.raise_for_status()
            
            file = open(path + query_term + '_' + str(offset) + '_' + str(num) + '.png', 'wb')
            file.write(image.content)
            file.close()
            
        except(requests.HTTPError):
            print(str(num) + ' NOT FOUND')
            
        num += 1

In [298]:
def img_format(folder):
    '''
    read in all images in given folder, format them and put them into a master array
    '''
    
    x_data = np.array([])
    Y_data = np.array([])
    
    # Iterate through each file in the specified folder
    for file in os.listdir(folder):
        
        # Read in the image
        img = np.array(Image.open(folder + '/' + file))
        
        # If the image is greyscale, discard it
        if len(img.shape) == 2:
            continue
        
        # If the image is 4 channel (RGBA), convert to 3 channel (RGB)
        if (len(img.shape) > 2) & (img.shape[2] == 4):
            img = cv2.cvtColor(img, cv2.COLOR_RGBA2RGB)
        
        # Identify which sides need to be padded and by how much, to make the image square
        short = np.argmin(img.shape[:2])
        diff_1 = int(np.ceil(abs(img.shape[1] - img.shape[0])/2))
        diff_2 = int(np.floor(abs(img.shape[1] - img.shape[0])/2))
        
        # Set the desired padding on the short side, and apply
        width = [[0, 0], [0, 0], [0, 0]]
        width[short] = [diff_1, diff_2]
        img = np.pad(img, pad_width=width)
        
        # Resize square image to 100x100
        img = cv2.resize(img, (100, 100))
        
        # Reshape array to be appended to x_data array
        img = img.reshape(1, 100, 100, 3)
        
        # Put the formatted arrays into a master array of training data
        if np.array_equal(x_data, np.array([])):
            # This is the first one, start the array
            x_data = img
            
        else:
            # Append to full array
            x_data = np.concatenate((x_data, img), axis=0)
        
        # The class label will be same as the name of the folder
        Y_data = np.append(Y_data, folder)
    
    return x_data, Y_data

In [300]:
x_data, Y_data = img_format('cat')

cat_0_0.png
(1, 100, 100, 3)
cat_0_1.png
(2, 100, 100, 3)
cat_0_10.png
(3, 100, 100, 3)
cat_0_100.png
(4, 100, 100, 3)
cat_0_101.png
(5, 100, 100, 3)
cat_0_102.png
(6, 100, 100, 3)
cat_0_103.png
(7, 100, 100, 3)
cat_0_104.png
(8, 100, 100, 3)
cat_0_105.png
(9, 100, 100, 3)
cat_0_106.png
(10, 100, 100, 3)
cat_0_107.png
(11, 100, 100, 3)
cat_0_108.png
(12, 100, 100, 3)
cat_0_109.png
(13, 100, 100, 3)
cat_0_11.png
(14, 100, 100, 3)
cat_0_111.png
(15, 100, 100, 3)
cat_0_112.png
(16, 100, 100, 3)
cat_0_113.png
(17, 100, 100, 3)
cat_0_114.png
(18, 100, 100, 3)
cat_0_116.png
(19, 100, 100, 3)
cat_0_117.png
(20, 100, 100, 3)
cat_0_118.png
(21, 100, 100, 3)
cat_0_119.png
(22, 100, 100, 3)
cat_0_12.png
(23, 100, 100, 3)
cat_0_120.png
(24, 100, 100, 3)
cat_0_121.png
(25, 100, 100, 3)
cat_0_122.png
(26, 100, 100, 3)
cat_0_123.png
(27, 100, 100, 3)
cat_0_124.png
(28, 100, 100, 3)
cat_0_125.png
(29, 100, 100, 3)
cat_0_126.png
(30, 100, 100, 3)
cat_0_127.png
(31, 100, 100, 3)
cat_0_128.png
(32, 100, 