In [0]:
import argparse
import json
import itertools
import logging
import re
import os
from os import path
import uuid
import sys
from urllib.request import urlopen, Request
from bs4 import BeautifulSoup

In [0]:
REQUEST_HEADER = {
    'User-Agent': "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.134 Safari/537.36"}
def get_query_url(query):
    # https://www.google.com/search?q=banana&source=lnms&tbm=isch
    return "https://www.google.co.in/search?q=%s&source=lnms&tbm=isch" % query

def get_soup(url, request_header):
    response = urlopen(Request(url, headers=request_header))
    return BeautifulSoup(response, 'html.parser')
def extract_images_from_response(soup):
    image_elements = soup.find_all("div", {"class": "rg_meta"})
    metadata_dicts = (json.loads(e.text) for e in image_elements)
    /print(metadata_dicts)
    link_type_records = ((d["ou"], d["ity"]) for d in metadata_dicts)
    return link_type_records

def extract_images(query, num_images):
  url = get_query_url(query)
  soup = get_soup(url,REQUEST_HEADER)
  link_type_records = extract_images_from_response(soup)
  return itertools.islice(link_type_records, num_images)


In [0]:
def configure_logging():
    logger = logging.getLogger()
    logger.setLevel(logging.DEBUG)
    handler = logging.StreamHandler()
    handler.setFormatter(
        logging.Formatter('[%(asctime)s %(levelname)s %(module)s]: %(message)s'))
    logger.addHandler(handler)
    Filehandler = logging.FileHandler("G:\log.txt") #Path to your LOG FILE.
    Filehandler.setFormatter(
        logging.Formatter('[%(asctime)s %(levelname)s %(module)s]: %(message)s'))
    logger.addHandler(Filehandler)
    
    return logger

logger = configure_logging()

def get_raw_image(url):
    req = Request(url, headers=REQUEST_HEADER)
    resp = urlopen(req)
    return resp.read()

def save_image(raw_image, image_type, save_directory):
    extension = image_type if image_type else 'jpg'
    file_name = uuid.uuid4().hex + "." + extension
    save_path = os.path.join(save_directory, file_name)
    with open(save_path, 'wb') as image_file:
        image_file.write(raw_image)

def download_images_to_dir(images, save_directory, num_images):
    for i, (url, image_type) in enumerate(images):
        try:
            logger.info("Making request (%d/%d): %s", i, num_images, url)
            raw_image = get_raw_image(url)
            save_image(raw_image, image_type, save_directory)
        except Exception as e:
            logger.exception(e)

In [0]:
def run(query, download_dir, num_images):
    query = '+'.join(query.split())
    print("Extracting image links")
    images = extract_images(query, num_images)
    print("Downloading images")
    print(images)
    download_images_to_dir(images, download_dir, num_images)
    print("Finished")

In [46]:
def main():
      searchText = 'banana' # Default search text
      numImages = 10  # Default search download images count
      if 'google.colab' in sys.modules:    
        from google.colab import drive
        drive.mount('/gdrive')
        download_dir = '/gdrive/My Drive/greatlakes/Capstone/POC/download/' + searchText
      else:
        download_dir = 'c:/greatlakes/Capstone/POC/download/' + searchText
      if not path.exists(download_dir):
        os.makedirs(download_dir)
      run(searchText, download_dir, numImages)

if __name__ == '__main__':
  main()

Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount("/gdrive", force_remount=True).
Extracting image links


[2019-12-27 08:58:48,384 DEBUG charsetgroupprober]: utf-8  confidence = 0.99
[2019-12-27 08:58:48,384 DEBUG charsetgroupprober]: utf-8  confidence = 0.99
[2019-12-27 08:58:48,384 DEBUG charsetgroupprober]: utf-8  confidence = 0.99
[2019-12-27 08:58:48,389 DEBUG charsetgroupprober]: SHIFT_JIS Japanese confidence = 0.01
[2019-12-27 08:58:48,389 DEBUG charsetgroupprober]: SHIFT_JIS Japanese confidence = 0.01
[2019-12-27 08:58:48,389 DEBUG charsetgroupprober]: SHIFT_JIS Japanese confidence = 0.01
[2019-12-27 08:58:48,396 DEBUG charsetgroupprober]: EUC-JP Japanese confidence = 0.01
[2019-12-27 08:58:48,396 DEBUG charsetgroupprober]: EUC-JP Japanese confidence = 0.01
[2019-12-27 08:58:48,396 DEBUG charsetgroupprober]: EUC-JP Japanese confidence = 0.01
[2019-12-27 08:58:48,403 DEBUG charsetgroupprober]: GB2312 Chinese confidence = 0.01
[2019-12-27 08:58:48,403 DEBUG charsetgroupprober]: GB2312 Chinese confidence = 0.01
[2019-12-27 08:58:48,403 DEBUG charsetgroupprober]: GB2312 Chinese confide

<generator object extract_images_from_response.<locals>.<genexpr> at 0x7fa23b7b8410>
Downloading images
<itertools.islice object at 0x7fa2395320e8>


[2019-12-27 08:58:49,014 INFO <ipython-input-41-937eff8ee61c>]: Making request (2/10): https://thumbs-prod.si-cdn.com/_oO5E4sOE9Ep-qk_kuJ945_-qo4=/800x600/filters:no_upscale()/https://public-media.si-cdn.com/filer/d5/24/d5243019-e0fc-4b3c-8cdb-48e22f38bff2/istock-183380744.jpg
[2019-12-27 08:58:49,014 INFO <ipython-input-41-937eff8ee61c>]: Making request (2/10): https://thumbs-prod.si-cdn.com/_oO5E4sOE9Ep-qk_kuJ945_-qo4=/800x600/filters:no_upscale()/https://public-media.si-cdn.com/filer/d5/24/d5243019-e0fc-4b3c-8cdb-48e22f38bff2/istock-183380744.jpg
[2019-12-27 08:58:49,014 INFO <ipython-input-41-937eff8ee61c>]: Making request (2/10): https://thumbs-prod.si-cdn.com/_oO5E4sOE9Ep-qk_kuJ945_-qo4=/800x600/filters:no_upscale()/https://public-media.si-cdn.com/filer/d5/24/d5243019-e0fc-4b3c-8cdb-48e22f38bff2/istock-183380744.jpg
[2019-12-27 08:58:49,125 INFO <ipython-input-41-937eff8ee61c>]: Making request (3/10): https://i5.walmartimages.com/asr/209bb8a0-30ab-46be-b38d-58c2feb93e4a_1.1a15fb5

Finished
