In [80]:
import requests
import os
import time
from dotenv import load_dotenv

load_dotenv()
api_key = os.getenv("EUROPEANA_API_KEY")

# Define the URL for the API request
base_url = "https://api.europeana.eu/record/v2/search.json"

# Create a directory to store results in, based on timestamp
directory = f'downloads/{int(time.time())}/'
os.makedirs(directory, exist_ok=True)

# Set initial parameters
params = {
    'wskey': api_key,         # Your API key
    'query': 'pl_wgs84_pos_lat:(*) AND Världskulturmuseet',  # Search query
    'media': 'true',               # Filter for records with media
    'profile': 'standard',          # Profile to get standard details
    'rows': 100,  # Number of items per page (adjust as needed)
    'cursor': '*'  # Start cursor-based pagination
}

all_items = []
next_cursor = '*'
fetched_records = 0

while True:
    print(f"Current cursor: {next_cursor}")
    
    # Use cursor directly without encoding
    if next_cursor and next_cursor != '*':
        params['cursor'] = next_cursor
    else:
        params['cursor'] = '*'  # Use '*' if it's the initial request
    
    # Make the request to the API
    response = requests.get(base_url, params=params)
    
    if response.status_code == 200:
        data = response.json()  # Parse the JSON response into a Python object
        items = data.get('items', [])
        total_results = data.get('totalResults', 0)
        
        if not items:
            break
        
        all_items.extend(items)  # Add items to the list
        fetched_records += len(items)
        
        # Print progress
        print(f"Fetched records: {fetched_records}/{total_results}")
        
        # Check for the next cursor
        next_cursor = data.get('nextCursor', None)
        
        if not next_cursor:
            break
        
        # Rate limiting: be considerate to the server
        time.sleep(1)
    else:
        print(f"Request failed with status code {response.status_code}")
        print("Response:", response.text)  # Print response text for more details
        break

print(f"Total items retrieved: {len(all_items)}")

# Optionally save items or process them as needed
# For example, you could save metadata to a CSV file or download images


Current cursor: *
Fetched records: 100/85985
Current cursor: AoREQXfF4P2N/wI8LzkxNjA4L1NNVktfVktNX29iamVrdF8yMDA4OQ==
Fetched records: 200/85985
Current cursor: AoREQXfF4P2N/wI8LzkxNjA4L1NNVktfVktNX29iamVrdF8yNjgyMA==
Fetched records: 300/85985
Current cursor: AoREQXfF4P2N/wI8LzkxNjA4L1NNVktfVktNX29iamVrdF80ODI0Nw==
Fetched records: 400/85985
Current cursor: AoREQXfF4P2N/wI8LzkxNjA4L1NNVktfVktNX29iamVrdF85NjU5Mw==
Fetched records: 500/85985
Current cursor: AoREQXGAxsL+/gI/Ay85MTYwOS9TTVZLX1ZLTV9mb3RvZ3JhZmlfMTUwMDUwNjg=
Fetched records: 600/85985
Current cursor: AoREQXGAxsL+/gI/Ay85MTYwOS9TTVZLX1ZLTV9mb3RvZ3JhZmlfMTUwMTI1Mjg=
Fetched records: 700/85985
Current cursor: AoREQXGAxsL+/gI/Ay85MTYwOS9TTVZLX1ZLTV9mb3RvZ3JhZmlfMTUzNDYzNDg=
Fetched records: 800/85985
Current cursor: AoREQXGAxsL+/gI/Ai85MTYwOS9TTVZLX1ZLTV9mb3RvZ3JhZmlfMjgzMjQyNw==
Fetched records: 900/85985
Current cursor: AoRCQXfF4P2N/wI8LzkxNjA4L1NNVktfVktNX29iamVrdF8xMDAwNA==
Fetched records: 1000/85985
Current cursor: AoRCQX

In [81]:
# Save the items to a JSON file
json_file_path = os.path.join(directory, 'europeana_data.json')
with open(json_file_path, 'w', encoding='utf-8') as json_file:
    json.dump(all_items, json_file, ensure_ascii=False, indent=4)
print(f"Data saved to {json_file_path}")

# Convert the JSON data to a pandas DataFrame
df = pd.DataFrame(all_items)

# Save the DataFrame to a CSV file
csv_file_path = os.path.join(directory, 'europeana_data.csv')
df.to_csv(csv_file_path, index=False)
print(f"Data saved to {csv_file_path}")

Data saved to downloads/1725630920/europeana_data.json
Data saved to downloads/1725630920/europeana_data.csv


## Get Number of Items per Country

In [3]:
import pandas as pd
df = pd.read_csv("downloads/1725630920/europeana_data.csv", low_memory=False)

In [4]:
print(df.columns)

Index(['completeness', 'country', 'dataProvider', 'dcDescription',
       'dcDescriptionLangAware', 'dcLanguage', 'dcLanguageLangAware',
       'dcTitleLangAware', 'edmConcept', 'edmConceptLabel',
       'edmConceptPrefLabelLangAware', 'edmDatasetName', 'edmIsShownAt',
       'edmIsShownBy', 'edmPlaceAltLabel', 'edmPlaceAltLabelLangAware',
       'edmPlaceLabel', 'edmPlaceLabelLangAware', 'edmPlaceLatitude',
       'edmPlaceLongitude', 'edmPreview', 'europeanaCollectionName',
       'europeanaCompleteness', 'guid', 'id', 'index', 'language', 'link',
       'previewNoDistribute', 'provider', 'rights', 'score', 'timestamp',
       'timestamp_created', 'timestamp_created_epoch', 'timestamp_update',
       'timestamp_update_epoch', 'title', 'type', 'ugc', 'edmTimespanLabel',
       'edmTimespanLabelLangAware', 'dcCreator', 'dcCreatorLangAware', 'year'],
      dtype='object')


In [19]:
print(len(df['edmPlaceLabel'].unique()))

191


In [22]:
country_counts = df['edmPlaceLabel'].value_counts()
country_counts.to_csv('country_counts.csv', header=True)