### Download images using GoogleImagesSearch API

- Install google_images_search using the following command in the terminal:
<br> __pip install google_images_search__ <br><br>


- Visit https://console.developers.google.com and cretae a project. <br>
- Visit https://console.developers.google.com/apis/library/customsearch.googleapis.com and enable "Custom Search API" for your project.<br>
- Visit https://console.developers.google.com/apis/credentials and generate API key credentials for your project.<br>
- Visit https://cse.google.com/cse/all and in the web form where you create/edit your custom search engine enable "Image search" option and for "Sites to search" option select "Search the entire web but emphasize included sites".<br><br>

After setting up your Google developers account and project you should have been provided with developers API key and project CX.

In [9]:
## Import required packages
from google_images_search import GoogleImagesSearch
import os

## Define the list of keywords you want to download images for
keyword_list = ['data mapping', 'data pipeline']

for keyword in keyword_list:
    print('at keyword: ', keyword)
    
    # you can provide API key and CX using arguments,
    # or you can set environment variables: GCS_DEVELOPER_KEY, GCS_CX
    gis = GoogleImagesSearch(os.environ['AIzaSyDos2A5eM8PYBtvaPKjMXteP-nimHnkiME'], os.environ['82b7ab8bf905d409c'])

    # define search params:
    _search_params = {
        'q': keyword, ## Keywords
        'num': 20,
        'safe': 'high',
        'fileType': 'jpg|gif|png',
        'imgSize': 'HUGE',
        'rights': 'cc_publicdomain|cc_attribute|cc_sharealike|cc_noncommercial|cc_nonderived'
    }
     
    #print(_search_params)
    path = '/Users/rmakhija/Documents/images' ### Change the path to where you want your images to be downloaded

    # Check whether the specified path exists or not
    isExist = os.path.exists(path)

    if not isExist:

      # Create a new directory because it does not exist 
        os.makedirs(path)
        print("The new directory is created!")


    # Check whether the specified path with the keyword folder exists or not
    isExist = os.path.exists(path + '/' + _search_params['q'])

    if not isExist:
        # Create a new directory because it does not exist 
        os.makedirs(path + '/' + _search_params['q'])

    # this will search, download and resize:
    try:
        gis.search(search_params=_search_params, path_to_dir= path + '/' + _search_params['q'])
    except:
        continue

at keyword:  data mapping
at keyword:  data pipeline


### Downloading images using SERP API

- Install requests using the following command in the terminal:
<br> __pip install requests__ <br><br>
- Install serpapi using the following command in the terminal:
<br> __pip install serpapi__ <br><br>

- Go to https://serpapi.com/dashboard and set upo your user account and credentials. You will have your SerpAPI key which you can use as an argument or store it as an environment variable.

In [None]:
## Import required packages
import requests
import urllib.request
import os
from serpapi import GoogleSearch

## Define the list of keywords you want to download images for
keyword_list = ['workflow', 'data mapping']


## Looping over the keywords
for keyword in keyword_list:
    print('at keyword: ', keyword)
    
    params = {
      "q": keyword,  ## query (one keyword at a time)
      "tbm": "isch",  ## image search engine
      "ijn": "0",  ##Page number to get (must be greater than or equal to 0)
      "api_key": os.getenv("SERPAPI_KEY")  ## Store your SERP API key as an environment variable
    }

    search = GoogleSearch(params)
    results = search.get_dict()
    images_results = results['images_results']

    path = '/Users/rmakhija/Documents/images' ### Change the path to where you want your images to be downloaded

    # Check whether the specified path exists or not
    isExist = os.path.exists(path)

    if not isExist:

      # Create a new directory because it does not exist 
        os.makedirs(path)
        print("The new directory is created!")


    # Check whether the specified path with the keyword folder exists or not
    isExist = os.path.exists(path + '/' + params['q'])

    if not isExist:
        # Create a new directory because it does not exist 
        os.makedirs(path + '/' + params['q'])

    ## Download all images at specifies page in the specified path
    for image in images_results:
        try:
            urllib.request.urlretrieve(image['original'], path + '/' + params['q'] + '/' + params['q'] + '_' + str(image['position']) + '.png')
        except:
            continue

### Download images using simple-image-download by PyPI

In [None]:
### https://pypi.org/project/simple-image-download/

## Creates a folder named download in the current working directory

from simple_image_download import simple_image_download as simp 
response = simp.simple_image_download
lst=['Data mapping']
for rep in lst:
    response().download(rep, 40)

### Download images using selenium and chromedriver

- Install Selenium using the following command in the terminal:
<br> __pip install selenium__ <br><br>
- Install Webdriver manager using the following command in the terminal:
<br> __pip install webdriver_manager__ <br>


In [19]:

## Import required packages
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.keys import Keys
import time 

## Define the list of keywords you want to download images for
keyword_list = ['workflow', 'data mapping']

for keyword in keyword_list:
    print('at keyword:', keyword)

    ## Set up the driver
    driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
    driver.get('https://www.google.ca/imghp?hl=en&tab=ri&authuser=0&ogbl')

    box = driver.find_element_by_xpath('//*[@id="sbtc"]/div/div[2]/input')
    box.send_keys(keyword) ## keyword from the keyword_list
    box.send_keys(Keys.ENTER)

    #Will keep scrolling down the webpage until it cannot scroll no more
    last_height = driver.execute_script('return document.body.scrollHeight')
    while True:
        driver.execute_script('window.scrollTo(0,document.body.scrollHeight)')
        time.sleep(2)
        new_height = driver.execute_script('return document.body.scrollHeight')
        try:
            driver.find_element_by_xpath('//*[@id="islmp"]/div/div/div/div/div[5]/input').click()
            time.sleep(2)
        except:
            pass
        if new_height == last_height:
            break
        last_height = new_height

    for i in range(1, 40):
        try:
            driver.find_element_by_xpath('//*[@id="islrg"]/div[1]/div[1]/a[1]/div[1]/img')
        except:
            pass
    path = '/Users/rmakhija/Documents/images' ### Change the path to where you want your images to be downloaded

    # Check whether the specified path exists or not
    isExist = os.path.exists(path)

    if not isExist:

      # Create a new directory because it does not exist 
        os.makedirs(path)
        print("The new directory is created!")


    # Check whether the specified path with the keyword folder exists or not
    isExist = os.path.exists(path + '/' + keyword)

    if not isExist:
        # Create a new directory because it does not exist 
        os.makedirs(path + '/' + keyword)


    for i in range(1, 40):
        try:
            driver.find_element_by_xpath('//*[@id="islrg"]/div[1]/div['+str(i)+']/a[1]/div[1]/img').screenshot(
                path + '/' + keyword + '/' + keyword +  '_'  + str(i) + '.png')
        except:
            pass



Current google-chrome version is 99.0.4844
Get LATEST chromedriver version for 99.0.4844 google-chrome


at keyword: workflow


Driver [/Users/rmakhija/.wdm/drivers/chromedriver/mac64/99.0.4844.51/chromedriver] found in cache


Current google-chrome version is 99.0.4844
Get LATEST chromedriver version for 99.0.4844 google-chrome
Driver [/Users/rmakhija/.wdm/drivers/chromedriver/mac64/99.0.4844.51/chromedriver] found in cache


at keyword: data mapping
