In [1]:
import os
import json
import requests
import threading
from PIL import Image

In [2]:
client_id = "33db2bad316ddcc"

In [3]:
def create_download_dir():
    """
    creates a download directory for images.
    """
    dir_images = os.path.join('images')

    if not os.path.exists(dir_images):
        os.mkdir(dir_images)

    return dir_images

In [4]:
def download_image_from_url(url, directory):
    """
    download image and save into given directory.
    """
    response = requests.get(url, stream=True)
    if response.status_code == 200:
        filename = os.path.basename(url)
        filepath = os.path.join(directory, f'{filename}')
        with open(filepath, 'wb') as f:
            f.write(response.content)

In [5]:
def build_link_list(client_id, num_of_images):
    """
    builds a list of image links.
    """
    i = 1
    cnt = 0
    url_list = []
    url_list_len = []

    try:
        while(cnt < num_of_images):
            # get request
            response = requests.get(
                f'https://api.imgur.com/3/gallery/random/random/{i}', 
                headers={'Authorization': f'Client-ID {client_id}'},
                stream=True
            )
            
            # control
            if response.status_code == 200:
                data_list = json.loads(response.content)['data']
                url_list.extend([
                    i['link']
                    for i in data_list 
                    if 'type' in i 
                    and i['type'] in ('image/png', 'image/jpeg')
                    and i['link'] not in url_list
                ])

                cnt = len(url_list)
                url_list_len.append(cnt)
                i += 1
                
                # control if api doesn't return anything new
                if set(url_list_len[-10:]) == 1:
                    break
            
            elif response.status_code == 429:
                print('too many requests, enough, or you can choose to put time.sleep() in here...') 
                break

            else:
                break

    except:
        print('api limit reached!')
        
    
    return url_list

In [6]:
def create_thumbnail(size, path):
    """
    create resized version of the image path given, with the same name 
    extended with _thumbnail.
    """
    try:
        # create thumbnail
        image = Image.open(path)
        image.thumbnail(size)

        # create path for thumbnail
        dir_images = os.path.join(path)
        filename, extension = os.path.splitext(path)
        new_filename = os.path.join('{}{}{}'.format(filename, '_thumbnail', extension))

        # save thumbnail
        image.convert('RGB').save(new_filename)
    except:
        'image error'

In [7]:
NUM_OF_IMAGES = 1000 # max requests can be done per day is 12500

IMAGES_DIR = create_download_dir()

In [8]:
%%time

image_links = build_link_list(client_id, NUM_OF_IMAGES)

for image_link in image_links:
    download_image_from_url(image_link, IMAGES_DIR)

too many requests, enough, or you can choose to put time.sleep() in here...
CPU times: user 33.2 s, sys: 4.07 s, total: 37.3 s
Wall time: 5min 3s


In [9]:
%%time

if __name__ == "__main__":
    image_links = build_link_list(client_id, NUM_OF_IMAGES)
    #Create Thread
    t1 = threading.Thread(target = download_image_from_url,args =(image_link, IMAGES_DIR) )
    
    t2 = threading.Thread(target = download_image_from_url,args =(image_link, IMAGES_DIR) )
    
    t1.start()
    t2.start()
    
    t1.join()
    t2.join()
    
    print("Done")

too many requests, enough, or you can choose to put time.sleep() in here...
Done
CPU times: user 4.1 s, sys: 304 ms, total: 4.41 s
Wall time: 16 s


In [22]:
%%time

image_path_list = os.listdir('images')

for image_path in image_path_list:
    create_thumbnail((128, 128), os.path.join('images', image_path))

CPU times: user 9.88 s, sys: 1.24 s, total: 11.1 s
Wall time: 11.2 s


In [29]:
%%time

import multiprocessing
from multiprocessing import Pool
from itertools import product

p = multiprocessing.Pool(processes=5)

image_path_list = os.listdir('images')

for image_path in image_path_list:
    
    p.starmap(create_thumbnail, product((128, 128), os.path.join('images', image_path)))
    


CPU times: user 3.46 s, sys: 700 ms, total: 4.16 s
Wall time: 4.84 s


In [36]:
from prettytable import PrettyTable

x = PrettyTable()

x.field_names = ["Description", "Time Taken"]

x.add_row(["Download images by John", "5min 3s"])
x.add_row(["Download images - MultiThreading", "16 s"])
x.add_row(["Resize images by John", "11.2 s"])
x.add_row(["Resize images - MultiProcessing", "4.84 s"])

print(x)

+----------------------------------+------------+
|           Description            | Time Taken |
+----------------------------------+------------+
|     Download images by John      |  5min 3s   |
| Download images - MultiThreading |    16 s    |
|      Resize images by John       |   11.2 s   |
| Resize images - MultiProcessing  |   4.84 s   |
+----------------------------------+------------+
