In [19]:
from flickrapi import FlickrAPI  # https://pypi.python.org/pypi/flickrapi
import urllib
import os
from random import randint
import time
from json import load

In [23]:
# load configuration from .env file
with open('.env') as f:
    config = load(f)

assert len(config.get('API_KEY', '')) == 32, 'API_KEY must be present in a file .env'
assert len(config.get('API_SECRET', '')) == 16, 'API_SECRET must be present in a file .env'

IMG_FOLDER = 'images'
print(f"key={config.get('API_KEY', '')}, sec={config.get('API_SECRET', '')}, outputfolder={IMG_FOLDER}")

key=010d425e39b6ff085b3381ed1407e585, sec=8fb09d682a4868fd, outputfolder=images


In [24]:
# Copyright 2014-2017 Bert Carremans
# Author: Bert Carremans <bertcarremans.be>
#
# License: BSD 3 clause
def download_flickr_photos(keywords, size='medium', max_nb_img=-1):
    """
    Downloads images based on keyword search on the Flickr website
    
    Parameters
    ----------
    keywords : string, list of strings
        Keyword to search for or a list of keywords should be given.
    size : one of the following strings 'thumbnail', 'square', 'medium', default: 'original'.
        Size of the image to download. In this function we only provide
        four options. More options are explained at 
        http://librdf.org/flickcurl/api/flickcurl-searching-search-extras.html
    max_nb_img : int, default: -1
        Maximum number of images per keyword to download. If given a value of -1, all images
        will be downloaded
    
    Returns
    ------
    Images found based on the keyword are saved in a separate subfolder.
    
    Notes
    -----
    This function uses the Python package flickrapi and its walk method. 
    FlickrAPI.walk has same parameters as FlickrAPI.search
    http://www.flickr.com/services/api/flickr.photos.search.html
    
    To use the Flickr API a set of API keys needs to be created on 
    https://www.flickr.com/services/api/misc.api_keys.html
    """
    if not (isinstance(keywords, str) or isinstance(keywords, list)):
        raise AttributeError('keywords must be a string or a list of strings')
        
    if not (size in ['thumbnail', 'square', 'medium', 'original']):
        raise AttributeError('size must be "thumbnail", "square", "medium" or "original"')
                             
    if not (max_nb_img == -1 or (max_nb_img > 0 and isinstance(max_nb_img, int))):
        raise AttributeError('max_nb_img must be an integer greater than zero or equal to -1')
    
    flickr = FlickrAPI(config.get('API_KEY'), config.get('API_SECRET'))
    
    if isinstance(keywords, str):
        keywords_list = []
        keywords_list.append(keywords)
    else:
        keywords_list = keywords
        
    if size == 'thumbnail':
        size_url = 'url_t'
    elif size == 'square':
        size_url = 'url_q'
    elif size == 'medium':
        size_url = 'url_c'
    elif size == 'original':
        size_url = 'url_o'
    
    for keyword in keywords_list:
        count = 0
                             
        #print('Downloading images for', keyword)

        results_folder = os.path.join(IMG_FOLDER, keyword.replace(" ", "_"))
        if not os.path.exists(results_folder):
            os.makedirs(results_folder)

        photos = flickr.walk(
                     text=keyword,
                     extras=size_url,
                     license='1,2,4,5',
                     per_page=50)
        
        urls = []
        for photo in photos:
            t = randint(1, 3)
            time.sleep(t)
            count += 1
            if max_nb_img != -1:
                if count > max_nb_img:
                    print('Reached maximum number of images to download')
                    break
            try:
                url=photo.get(size_url)
                urls.append(url)
                
                urllib.request.urlretrieve(url,  os.path.join(results_folder,  str(count) + ".jpg"))
                print('Downloading image #' + str(count) + ' from url ' + url)
            except Exception as e:
                print(e, 'Download failure')
                             
        print("Total images downloaded:", str(count - 1))

In [10]:
query = ['people', 'person', 'man', 'women']
download_flickr_photos(query, size='square', max_nb_img=2)

Downloading image #1 from url https://live.staticflickr.com/65535/50941954361_7e5a84368c_q.jpg
Downloading image #2 from url https://live.staticflickr.com/65535/50941231148_f29225e5ed_q.jpg
Reached maximum number of images to download
Total images downloaded: 2
Downloading image #1 from url https://live.staticflickr.com/65535/50942185102_f2c3cea1e5_q.jpg
Downloading image #2 from url https://live.staticflickr.com/65535/50941801347_93bf5b83d8_q.jpg
Reached maximum number of images to download
Total images downloaded: 2
Downloading image #1 from url https://live.staticflickr.com/65535/50941823246_7e101af63d_q.jpg
Downloading image #2 from url https://live.staticflickr.com/65535/50941895922_e6dacc8413_q.jpg
Reached maximum number of images to download
Total images downloaded: 2
Downloading image #1 from url https://live.staticflickr.com/65535/50940998708_d77c864ae6_q.jpg
Downloading image #2 from url https://live.staticflickr.com/65535/50941801947_93bac6010d_q.jpg
Reached maximum number o