## A Notebook to get Information from existing IIIF-Manifests, select canvases, and arrange them into a new manifest


NOTE: For now, this will only work with existing manifests following the v2 of the IIIF Presentation API, but will produce a manifest following v3.

In [1]:
import requests
import json

In [2]:
# function to create a new manifest
def create_manifest(manifest_id, label, metadata, requiredStatement, provider):
   
    new_manifest = {
        "@context": "http://iiif.io/api/presentation/3/context.json",
        "id": manifest_id,
        "type": "Manifest",
        "label": label,
        "summary": requiredStatement,
        "metadata": metadata,
        "provider": provider,
        "items": []  # No canvases yet
    }  

    return new_manifest

In [5]:
# Provide basic Metadata for the new manifest
base_url="https://raw.githubusercontent.com/r0man-ist/iiif/refs/heads/main/"
filename ="test2.json"
manifest_id=base_url+filename ## required

label={"de": ["Rekonstruktion Yd7829"]} ## required
metadata=[
    {
         "label": "Autor",
         "value": ["Hensel, Fanny", "Hensel, Wilhelm"]
    },
    {
         "label": "Titel",
         "value": "Sammelband Yd7829"
    },
    {
      "label": "Signatur",
      "value": ["Yd7829"]
    },  
    {
      "label": "Sprache",
      "value": "ger"
    },
     {
      "label": "Lizenz",
      "value": "Public Domain Mark 1.0"
    },
        ]
requiredStatement = {
      "label": "Zu diesem Objekt",
      "value": "Bei diesem virtuellen Objekt handelt es sich um eine Rekonstruktion."
    },
provider = [
    {
      "id": "https://lab.sbb.berlin",
      "type": "Agent",
      "label": { "en": [ "Stabi Lab" ] },
      "homepage": [
        {
          "id": "https://lab.sbb.berlin",
          "type": "Text",
          "label": { "de": [ "Stabi Lab Homepage" ] },
          "format": "text/html"
        }]
    }      
  ]
    
new_manifest = create_manifest(manifest_id, label, metadata, requiredStatement, provider)

In [6]:
# function to get manifests from a list of urls
def fetch_manifests(manifest_urls):
    manifests = []
    for url in manifest_urls:
        response = requests.get(url)
        response.raise_for_status()  # Raises an HTTPError for bad responses
        manifest = response.json()  # Parse the JSON response
                             
        manifests.append(manifest)
            
    return manifests

In [None]:
# specify a list of manifests to fetch
manifest_urls = ["https://collections.library.yale.edu/manifests/17384131",
                 "https://collections.library.yale.edu/manifests/17384177",
                 "https://raw.githubusercontent.com/r0man-ist/iiif/refs/heads/main/manifests/bsb00062258v3.manifest.json",
                 "https://collections.library.yale.edu/manifests/17384255",
                 "https://raw.githubusercontent.com/r0man-ist/iiif/refs/heads/main/manifests/835033880v3.manifest.json"]
manifests = fetch_manifests(manifest_urls)
#print(manifests)



[{'@context': ['http://iiif.io/api/search/1/context.json', 'http://iiif.io/api/extension/navplace/context.json', 'http://iiif.io/api/presentation/3/context.json'], 'id': 'https://collections.library.yale.edu/manifests/17384131', 'type': 'Manifest', 'label': {'none': ['Ein new Lied vom Türckenn : inn dem Thon, Vom Künig vonn Franckreich, was wöl wir aber heben an [et]c.']}, 'homepage': [{'id': 'https://collections.library.yale.edu/catalog/17384131', 'type': 'Text', 'format': 'text/html', 'label': {'en': ['Yale Digital Collections page']}}], 'requiredStatement': {'label': {'en': ['Provider']}, 'value': {'en': ['Yale University Library']}}, 'rendering': [{'id': 'https://collections.library.yale.edu/pdfs/17384131.pdf', 'type': 'Text', 'format': 'application/pdf', 'label': {'en': ['Download as PDF']}}], 'seeAlso': [{'id': 'https://collections.library.yale.edu/catalog/oai?verb=GetRecord&metadataPrefix=oai_mods&identifier=oai:collections.library.yale.edu:17384131', 'type': 'Dataset', 'form

In [7]:
# get individual canvases from the manifests
canvases = []
for manifest in manifests:  
    for sequence in manifest.get('sequences', []):
        for canvas in sequence.get('canvases', []):
            if canvas.get('@type') == 'sc:Canvas':
                canvases.append(canvas)

new_manifest["items"] = canvases

[{'@id': 'https://content.staatsbibliothek-berlin.de/dc/1880377578-0001/canvas', '@type': 'sc:Canvas', 'label': '1r [1]', 'height': 900, 'width': 600, 'images': [{'@id': 'https://content.staatsbibliothek-berlin.de/dc/1880377578-0001/annotation', '@type': 'oa:Annotation', 'motivation': '', 'resource': {'@id': 'https://content.staatsbibliothek-berlin.de/dc/1880377578-0001/full/full/0/default.jpg', '@type': 'dctypes:Image', 'format': '', 'service': {'@context': 'http://iiif.io/api/image/2/context.json', '@id': 'https://content.staatsbibliothek-berlin.de/dc/1880377578-0001', 'profile': 'http://iiif.io/api/image/2/level1.json'}}, 'on': 'https://content.staatsbibliothek-berlin.de/dc/1880377578-0001/canvas'}]}, {'@id': 'https://content.staatsbibliothek-berlin.de/dc/1880377578-0002/canvas', '@type': 'sc:Canvas', 'label': '1v [2]', 'height': 900, 'width': 600, 'images': [{'@id': 'https://content.staatsbibliothek-berlin.de/dc/1880377578-0002/annotation', '@type': 'oa:Annotation', 'motivation': '

In [8]:
# OPTIONAL
# specify a list of canvases to REMOVE in the new manifest
to_exclude_canvas_list = [
    "https://content.staatsbibliothek-berlin.de/dc/1880377578-0235/canvas",
   "https://content.staatsbibliothek-berlin.de/dc/1880377578-0236/canvas",
   "https://content.staatsbibliothek-berlin.de/dc/1878144588-0159/canvas",
   "https://content.staatsbibliothek-berlin.de/dc/1880377578-0237/canvas",
   "https://content.staatsbibliothek-berlin.de/dc/1880377578-0238/canvas"
]


In [9]:
matching_canvases = []

# Create a dictionary for quick lookup using the canvas IDs
canvas_ids = {canvas["@id"]: canvas for canvas in canvases}

# Iterate over the canvas_list to maintain the specified order
for canvas_id in canvas_list:
    if canvas_id in to_exclude_canvas_list:
        del canvas_ids[canvas_id]

new_manifest["items"] = matching_canvases

In [10]:
# rename keys to comply with V3 of the IIIF Presentation API
for item in new_manifest["items"]:
    if "@type" in item and item["@type"] == "sc:Canvas":
        item["type"] = "canvas"  # Change to "type"
        del item["@type"]  # Optionally remove the old key

In [11]:

with open("test2.json", "w", encoding="utf-8") as f:
        json.dump(new_manifest, f, ensure_ascii=False, indent=4)