Download images locally to start training

In [1]:
# Import cell
import requests
import os
import shutil


In [2]:
# params
# eventually these will go in the .env etc but keep here for now
API_XAPP_TOKEN = ''

In [3]:
# Get 1000 artworks

url = 'https://api.artsy.net/api/artworks'
params = {'size':'500',
          #'offset':'201',
          'xapp_token':API_XAPP_TOKEN}
response = requests.get(url,params=params).json()

# get list of artworks only
artworks = response['_embedded']['artworks']

In [4]:
# iterate through artworks and get relevant info
artworks_ = []
for artwork in artworks:
    artwork_dict = {}
    artwork_dict['artwork_id'] = artwork['id']
    artwork_dict['title'] = artwork['title']
    artwork_dict['category'] = artwork['category']
    artwork_dict['medium'] = artwork['medium']
    artwork_dict['date'] = artwork['date']
    artwork_dict['height_cm'] = artwork['dimensions']['cm']['height']
    artwork_dict['width_cm'] = artwork['dimensions']['cm']['width']
    artwork_dict['depth_cm'] = artwork['dimensions']['cm']['depth']
    artwork_dict['diameter_cm'] = artwork['dimensions']['cm']['diameter']
    artwork_dict['collecting_institution'] = artwork['collecting_institution']
    # not every artwork has images so use try except
    try:
        artwork_dict['image_versions'] = artwork['image_versions']
        artwork_dict['image_url'] = artwork['_links']['image']['href']
        artwork_dict['image_url_is_template'] = artwork['_links']['image']['templated']
    except:
        artwork_dict['image_versions'] = None
        artwork_dict['image_url'] = None
        artwork_dict['image_url_is_template'] = None
    artworks_.append(artwork_dict)
    
# iterate through artworks and get list of image urls
for artwork in artworks_:
    image_urls = {}
    if artwork['image_url_is_template']:
        for version in artwork['image_versions']:
            image_urls[version] = artwork['image_url'].replace("{image_version}",version)
    artwork['image_url_all'] = image_urls

In [11]:
has_medium_list = []
for artwork in artworks_:
    if artwork['image_versions']:
        has_medium_list.append('medium' in artwork['image_versions'])
    else:
        has_medium_list.append(False)
            

In [14]:
[artwork for ix,artwork in enumerate(artworks_) if not has_medium_list[ix]]

[{'artwork_id': '5035a0ea33ed360002000705',
  'title': 'The Flood of Noah (Genesis 7:11-24)',
  'category': 'Sculpture',
  'medium': 'Ink and pigment on parchment',
  'date': 'ca. 1250',
  'height_cm': 13.2,
  'width_cm': 9.5,
  'depth_cm': None,
  'diameter_cm': None,
  'collecting_institution': '',
  'image_versions': ['large',
   'large_rectangle',
   'larger',
   'medium_rectangle',
   'normalized',
   'square',
   'tall'],
  'image_url': 'https://d32dm0rphc51dk.cloudfront.net/77uzE-BG0-tidzgXtnX8dQ/{image_version}.jpg',
  'image_url_is_template': True,
  'image_url_all': {'large': 'https://d32dm0rphc51dk.cloudfront.net/77uzE-BG0-tidzgXtnX8dQ/large.jpg',
   'large_rectangle': 'https://d32dm0rphc51dk.cloudfront.net/77uzE-BG0-tidzgXtnX8dQ/large_rectangle.jpg',
   'larger': 'https://d32dm0rphc51dk.cloudfront.net/77uzE-BG0-tidzgXtnX8dQ/larger.jpg',
   'medium_rectangle': 'https://d32dm0rphc51dk.cloudfront.net/77uzE-BG0-tidzgXtnX8dQ/medium_rectangle.jpg',
   'normalized': 'https://d32dm

In [3]:
# create /data folder - if it already exists, delete and recreate 
path = './data'
if not os.path.exists(path):
  os.mkdir(path)
  print("Folder %s created!" % path)
else:
    shutil.rmtree(path)
    os.mkdir(path)
    print("Folder deleted and recreated")
  

Folder deleted and recreated


In [11]:
# loop through images, download to local file in /data folder
no_without_medium = 0
for artwork in artworks_:
    
    try:
        url = artwork['image_url_all']['medium']
        file_name = f"data/{artwork['title']}.jpg"
        # This statement requests the resource at
        # the given link, extracts its contents
        # and saves it in a variable
        data = requests.get(url).content
        
        # Opening a new file named img with extension .jpg
        # This file would store the data of the image file
        f = open(file_name,'wb')
        
        # Storing the image data inside the data variable to the file
        f.write(data)
        f.close()
    except:
        no_without_large +=1

In [24]:
# When finished - delete data folder before pushing to git