Initial Exploration of Artsy.net API 

Downloading images and uploading to a bucket

In [27]:
# Import cell
import requests
import os
from google.cloud import storage



In [42]:
# global params

API_XAPP_TOKEN = os.environ.get("API_XAPP_TOKEN")
GOOGLE_APPLICATION_CREDENTIALS = os.environ.get("GOOGLE_APPLICATION_CREDENTIALS")
GCP_PROJECT = os.environ.get("GCP_PROJECT")
BUCKET_NAME = os.environ.get("BUCKET_NAME")


In [48]:
# Get 1000 artworks

url = 'https://api.artsy.net/api/artworks'
params = {'size':'1000',
          'xapp_token':API_XAPP_TOKEN}
response = requests.get(url,params=params).json()

# get list of artworks only
artworks = response['_embedded']['artworks']


In [49]:

# iterate through artworks and get relevant info
artworks_ = []
for artwork in artworks:
    artwork_dict = {}
    artwork_dict['artwork_id'] = artwork['id']
    artwork_dict['title'] = artwork['title']
    artwork_dict['category'] = artwork['category']
    artwork_dict['medium'] = artwork['medium']
    artwork_dict['date'] = artwork['date']
    artwork_dict['height_cm'] = artwork['dimensions']['cm']['height']
    artwork_dict['width_cm'] = artwork['dimensions']['cm']['width']
    artwork_dict['depth_cm'] = artwork['dimensions']['cm']['depth']
    artwork_dict['diameter_cm'] = artwork['dimensions']['cm']['diameter']
    artwork_dict['collecting_institution'] = artwork['collecting_institution']
    # not every artwork has images so use try except
    try:
        artwork_dict['image_versions'] = artwork['image_versions']
        artwork_dict['image_url'] = artwork['_links']['image']['href']
        artwork_dict['image_url_is_template'] = artwork['_links']['image']['templated']
    except:
        artwork_dict['image_versions'] = None
        artwork_dict['image_url'] = None
        artwork_dict['image_url_is_template'] = None
    artworks_.append(artwork_dict)
    
# iterate through artworks and get list of image urls
for artwork in artworks_:
    image_urls = {}
    if artwork['image_url_is_template']:
        for version in artwork['image_versions']:
            image_urls[version] = artwork['image_url'].replace("{image_version}",version)
    artwork['image_url_all'] = image_urls


In [50]:
# loop through images, download to local file, upload to cloud, delete original file
for artwork in artworks_:
    url = artwork['image_url_all']['medium']
    file_name = f"{artwork['title']}_medium.jpg"
    # This statement requests the resource at
    # the given link, extracts its contents
    # and saves it in a variable
    data = requests.get(url).content
    
    # Opening a new file named img with extension .jpg
    # This file would store the data of the image file
    f = open(file_name,'wb')
    
    # Storing the image data inside the data variable to the file
    f.write(data)
    f.close()
    print('file saved locally')
    # Now upload the image to the cloud
    # instantiate storage client
    storage_client = storage.Client.from_service_account_json(GOOGLE_APPLICATION_CREDENTIALS, project=GCP_PROJECT)
    
    # get bucket name
    # TODO this needs to come from .env file - not sure why it isn't atm
    bucket = storage_client.get_bucket('practice_molpl')

    # create blob
    blob = bucket.blob('images/{}.jpg'.format(f"{artwork['title']}_medium"))

    # set content type
    blob.content_type = 'image/jpeg'

    # upload file
    with open(file_name, 'rb') as f:

        blob.upload_from_file(f)

    print('file uploaded to cloud')
    
    #delete file
    os.remove(file_name)
    
    
  

file saved locally
file uploaded to cloud
file saved locally
file uploaded to cloud
file saved locally
file uploaded to cloud
file saved locally
file uploaded to cloud
file saved locally
file uploaded to cloud
file saved locally
file uploaded to cloud
file saved locally
file uploaded to cloud
file saved locally
file uploaded to cloud
file saved locally
file uploaded to cloud
file saved locally
file uploaded to cloud
file saved locally
file uploaded to cloud
file saved locally
file uploaded to cloud
file saved locally
file uploaded to cloud
file saved locally
file uploaded to cloud
file saved locally
file uploaded to cloud
file saved locally
file uploaded to cloud
file saved locally
file uploaded to cloud
file saved locally
file uploaded to cloud
file saved locally
file uploaded to cloud
file saved locally
file uploaded to cloud
file saved locally
file uploaded to cloud
file saved locally
file uploaded to cloud
file saved locally
file uploaded to cloud
file saved locally
file uploaded t

KeyError: 'medium'

In [41]:


storage_client = storage.Client.from_service_account_json(GOOGLE_APPLICATION_CREDENTIALS, project=GCP_PROJECT)

bucket = storage_client.get_bucket('practice_molpl')

path = '/home/mollyppl/code/molpl/artsy-fartsci/notebooks/Der Kuss (The Kiss)_medium.jpg'

filename = '%s%s' % ('',path)

blob = bucket.blob('images/{}.jpg'.format('image_test'))

blob.content_type = 'image/jpeg'

with open(path, 'rb') as f:

    blob.upload_from_file(f)

print('Image Uploaded : ')

Image Uploaded : 
