In [3]:
__author__ = "Sibashis Chatterjee (sibashis1992@gmail.com)"

In [None]:
from googleapiclient.discovery import build
import pprint
import requests

from my_secrets import *

IMAGES_TO_DOWNLOAD = 1000

# Usage example: https://github.com/googleapis/google-api-python-client/blob/main/samples/customsearch/main.py#L39
# API Doc: https://developers.google.com/custom-search/v1/reference/rest/v1/cse/list

# Google Developer console: https://console.cloud.google.com/apis/dashboard?project=iisc-cee-project
# Programable Search console: https://programmablesearchengine.google.com/controlpanel/all
service = build("customsearch", "v1", developerKey = search_api_key)

# totalResults = 0;
# startIndex = 0
# Once failed at index 201, retrying
totalResults = 200
startIndex = 200

# loop till <IMAGES_TO_DOWNLOAD> images are downloaded
while totalResults < IMAGES_TO_DOWNLOAD:
    
    res = (
        service.cse()
        .list(
            q = "covid+19+infected+lungs+x+ray",
            hl = "en-US",
            cx = programable_search_engine_id,
            searchType = "image",
            start = startIndex
        )
        .execute()
    )
    
    for item in res["items"]:
        
        pprint.pprint(item["fileFormat"] + " from " + item["link"])
        
        if item["image"]:
            
            totalResults = totalResults + 1
            
            fileExtension = item["fileFormat"].split("/")[1]
            fileName = "scrapped_images/image_" + str(totalResults) + ".jpeg"
            print("Writing " + fileName + " of type " + item["fileFormat"] + " from " + item["link"])
            
            img_data = requests.get(item["link"]).content
            with open(fileName, "wb") as handler:
                handler.write(img_data)
    
    # If there is a next page, use the start index in next iteration of the loop, break out of the loop oitherwise
    if (res and res["queries"] and res["queries"]["nextPage"]):
        startIndex = res["queries"]["nextPage"][0]["startIndex"]
    else:
        break


Sample output

```
'image/jpeg from http://www.itnonline.com/sites/default/files/Chest.jpeg'
Writing /content/scrapped_images/image_1.jpeg of type image/jpeg from http://www.itnonline.com/sites/default/files/Chest.jpeg
('image/jpeg from '
 'https://www.itnonline.com/sites/default/files/styles/content_feed_large/public/Covid_ches_Xray_0.jpg?itok=gixxojWy')
Writing /content/scrapped_images/image_2.jpeg of type image/jpeg from https://www.itnonline.com/sites/default/files/styles/content_feed_large/public/Covid_ches_Xray_0.jpg?itok=gixxojWy
('image/png from '
 'https://media.springernature.com/lw685/springer-static/image/art%3A10.1186%2Fs43055-020-00296-x/MediaObjects/43055_2020_296_Fig1_HTML.png')
Writing /content/scrapped_images/image_3.jpeg of type image/png from https://media.springernature.com/lw685/springer-static/image/art%3A10.1186%2Fs43055-020-00296-x/MediaObjects/43055_2020_296_Fig1_HTML.png
```

Started giving error at startIndex >= 200
```
HttpError: <HttpError 400 when requesting https://customsearch.googleapis.com/customsearch/v1?q=covid%2B19%2Binfected%2Blungs%2Bx%2Bray&hl=en-US&cx=XXXXXXXXXXXXXX&searchType=image&start=200&key=YYYYYYYYYYYYYYYYYYY&alt=json returned "Request contains an invalid argument.". Details: "[{'message': 'Request contains an invalid argument.', 'domain': 'global', 'reason': 'badRequest'}]">
```