In [2]:
import os
import requests
import json
import csv

In [6]:
# You need to have exported the API_KEY in the same terminal where you start jupyter notebook:
# e.g. export API_KEY = 'XXXXXXXX'
API_KEY = os.environ['API_KEY']
LANGUAGE = 'en'

print(f'API_KEY: {API_KEY}')

API_KEY: 9HWI0Lk3


In [55]:
def fetch_collection_page(language: str, api_key: str, object_type: str, imgonly: bool, ps: int, p: int):
    return requests.get(
        f'https://www.rijksmuseum.nl/api/{language}/collection?key={api_key}&type={object_type}&imgonly={imgonly}&ps={ps}&p={p}'
    )

def save_collection_page(collection_page: dict, output_dir: str) -> None:
    print(f'Saving objects...')
    for artObject in collection_page['artObjects']:
        object_id = artObject['objectNumber']
        with open(os.path.join(output_dir, f'{object_id}.json'), 'w') as f:
            json.dump(artObject, f)

In [56]:
RESULTS_PER_PAGE = 100
IMAGE_ONLY = True
COLLECTION_OUTPUT_DIR = './data/collection/'

current_page = 1
more_data_exists = True
total_objects = 0

while True:
    print(f'Fetching page: {current_page}...')
    page = fetch_collection_page(LANGUAGE, API_KEY, 'painting', IMAGE_ONLY, RESULTS_PER_PAGE, current_page)
    page = page.json()
    len_objects = len(page['artObjects'])
    
    if len_objects == 0:
        print(f'No objects retrieved, breaking...')
        break

    print(f'Retrieved objects: {len_objects}')

    save_collection_page(page, COLLECTION_OUTPUT_DIR)
    current_page += 1
    total_objects += len_objects

    print(f'Current total: {total_objects}')
    
    print()

Fetching page: 1...
Retrieved objects: 100
Saving objects...
Current total: 100

Fetching page: 2...
Retrieved objects: 100
Saving objects...
Current total: 200

Fetching page: 3...
Retrieved objects: 100
Saving objects...
Current total: 300

Fetching page: 4...
Retrieved objects: 100
Saving objects...
Current total: 400

Fetching page: 5...
Retrieved objects: 100
Saving objects...
Current total: 500

Fetching page: 6...
Retrieved objects: 100
Saving objects...
Current total: 600

Fetching page: 7...
Retrieved objects: 100
Saving objects...
Current total: 700

Fetching page: 8...
Retrieved objects: 100
Saving objects...
Current total: 800

Fetching page: 9...
Retrieved objects: 100
Saving objects...
Current total: 900

Fetching page: 10...
Retrieved objects: 100
Saving objects...
Current total: 1000

Fetching page: 11...
Retrieved objects: 100
Saving objects...
Current total: 1100

Fetching page: 12...
Retrieved objects: 100
Saving objects...
Current total: 1200

Fetching page: 13...
R

In [57]:
painting_ids = [f.split('.json')[0] for f in os.listdir(COLLECTION_OUTPUT_DIR) if os.path.isfile(os.path.join(COLLECTION_OUTPUT_DIR, f))]
painting_ids = set(painting_ids)

In [None]:
# painting_ids = set()

# CSV_PATH = './202001-rma-csv-collection.csv'
# PAINTING_TYPE = 'schilderij'
# with open(CSV_PATH, 'r', encoding='utf-8-sig') as csvFile:
#     reader = csv.DictReader(csvFile)
#     for artObject in reader:
#         objectType = artObject['objectType[1]']
#         objectImage = artObject['objectImage']
        
#         if objectType == PAINTING_TYPE and objectImage != '':
#             objectId = artObject['objectInventoryNumber']
#             painting_ids.add(objectId)

In [58]:
def fetch_single(painting_id: str, language: str, api_key: str) -> dict or None:
    response = None
    data = None
        
    try:
        response = requests.get(f'https://www.rijksmuseum.nl/api/{language}/collection/{painting_id}?key={api_key}')
        data = response.json()
        print(data)
    except Exception as e:
        print('exception')
        print(response.text)
        print(e)

    return data

In [59]:
DETAILED_OUTPUT_DIR = './data/detailed/'

already_collected = [f.split('.json')[0] for f in os.listdir(DETAILED_OUTPUT_DIR) if os.path.isfile(os.path.join(DETAILED_OUTPUT_DIR, f))]
already_collected = set(already_collected)

to_be_collected = painting_ids.difference(already_collected)
print(f'To be collected: {len(to_be_collected)}')

for painting_id in to_be_collected:
    print(f'Processing: {painting_id}...')
    data = fetch_single(painting_id, LANGUAGE, API_KEY)
    
    if data:
        print(f'Saving: {painting_id}...')
        with open(os.path.join(OUTPUT_DIRECTORY, f'{painting_id}.json'), 'w') as f:
            json.dump(data, f)
    else:
        print(f'No data fetched: {painting_id}...')

To be collected: 4316
Processing: SK-A-1526...
exception

Expecting value: line 1 column 1 (char 0)
No data fetched: SK-A-1526...
Processing: SK-A-1425...
exception

Expecting value: line 1 column 1 (char 0)
No data fetched: SK-A-1425...
Processing: SK-A-4495...
exception

Expecting value: line 1 column 1 (char 0)
No data fetched: SK-A-4495...
Processing: SK-A-725...
exception

Expecting value: line 1 column 1 (char 0)
No data fetched: SK-A-725...
Processing: SK-A-303...
exception

Expecting value: line 1 column 1 (char 0)
No data fetched: SK-A-303...
Processing: SK-C-448...
exception

Expecting value: line 1 column 1 (char 0)
No data fetched: SK-C-448...
Processing: SK-A-2153...
exception

Expecting value: line 1 column 1 (char 0)
No data fetched: SK-A-2153...
Processing: SK-C-296...
exception

Expecting value: line 1 column 1 (char 0)
No data fetched: SK-C-296...
Processing: SK-A-3478...
exception

Expecting value: line 1 column 1 (char 0)
No data fetched: SK-A-3478...
Processing: SK

exception

Expecting value: line 1 column 1 (char 0)
No data fetched: SK-A-406...
Processing: SK-A-4715...
exception

Expecting value: line 1 column 1 (char 0)
No data fetched: SK-A-4715...
Processing: SK-A-4294...
exception

Expecting value: line 1 column 1 (char 0)
No data fetched: SK-A-4294...
Processing: SK-A-2034...
exception

Expecting value: line 1 column 1 (char 0)
No data fetched: SK-A-2034...
Processing: SK-A-98...
exception

Expecting value: line 1 column 1 (char 0)
No data fetched: SK-A-98...
Processing: SK-A-1954...
exception

Expecting value: line 1 column 1 (char 0)
No data fetched: SK-A-1954...
Processing: SK-A-4619...
exception

Expecting value: line 1 column 1 (char 0)
No data fetched: SK-A-4619...
Processing: SK-A-3836...
exception

Expecting value: line 1 column 1 (char 0)
No data fetched: SK-A-3836...
Processing: SK-A-2183...
exception

Expecting value: line 1 column 1 (char 0)
No data fetched: SK-A-2183...
Processing: SK-A-1863...
exception

Expecting value: line 

KeyboardInterrupt: 