In [None]:
import os

image_folder = '/content/images'
image_count = 0

for filename in os.listdir(image_folder):
  if filename.endswith(('.jpg', '.jpeg', '.png', '.gif')):
    image_count += 1

print(f"There are {image_count} images in the '{image_folder}' folder.")

There are 19618 images in the '/content/images' folder.


1. Import Libraries: Imports requests for HTTP requests, os for file operations, and time for delays.
2. Set API Access Key and URL: Defines the Unsplash API access key and the search endpoint URL.
3. Set Headers and Parameters: Sets the authorization header with the access key and search parameters, including the query term and number of results per page.
4. Create Directory for Images: Checks if the directory images3 exists and creates it if it doesn't.
5. Initialize Pagination: Sets the starting page number for the search results.
6. Main Loop to Fetch and Download Images:
    1. Updates the page parameter for the current page number.
    2. Sends a GET request to the Unsplash API.
    3. Checks the response status code:
        - If not 200, prints an error message.
        - If 429 (rate limit exceeded), waits for the specified retry time and retries.
        - Breaks the loop for other errors.
    4. Converts the response to JSON.
    5. If no results are found, prints a message and breaks the loop.
    6. Iterates over the results to download images:
        - Gets the image URL.
        - Downloads the image content.
        - Constructs the filename.
        - Checks if the file already exists and skips if it does.
        - Writes the image content to a file.
        - Prints a success message.
    7. Moves to the next page.
    8. Waits for 1 second before the next request.
7. Completion Message: Prints a message indicating that all images have been downloaded.

In [None]:
import requests
import os
import time

access_key = "D0dsLVpUw5idCZRk9TbfNYZWJJVHIDlDIz2sCcR4CW8"
url = "https://api.unsplash.com/search/photos"
headers = {
    "Authorization": f"Client-ID {access_key}"
}
params = {
    "query": "Spectacles face",
    "per_page": 30
}

if not os.path.exists('images3'):
    os.makedirs('images3')

start_page = 185
page_num = start_page

while True:
    params["page"] = page_num

    response = requests.get(url, headers=headers, params=params)

    if response.status_code != 200:
        print(f"Failed to fetch page {page_num}. Status code: {response.status_code}")

        if response.status_code == 429:
            retry_after = int(response.headers.get('Retry-After', 10))
            print(f"Rate limit exceeded. Retrying after {retry_after} seconds...")
            time.sleep(retry_after)
            continue
        else:
            break

    data = response.json()

    if not data['results']:
        print("No more images found.")
        break

    for i, photo in enumerate(data['results']):
        img_url = photo['urls']['regular']
        try:
            img_data = requests.get(img_url).content
            filename = os.path.join('images3', f'image_page_{page_num}_num_{i}.jpg')

            if os.path.exists(filename):
                print(f"Image {filename} already exists. Skipping...")
                continue

            with open(filename, 'wb') as handler:
                handler.write(img_data)
            print(f"Downloaded image {i + 1} from page {page_num}")
        except Exception as e:
            print(f"Failed to download image {i + 1} from page {page_num}: {e}")

    page_num += 1

    time.sleep(1)

print('All images downloaded successfully!')