## A Notebook to get Information from existing IIIF-Manifests, select canvases, and arrange them into a new manifest


NOTE: This will only work with existing manifests following the version 3 of the IIIF Presentation API.
For v2 resources first create v3 manifest using https://github.com/IIIF/prezi-2-to-3

In [14]:
import requests
import json

In [2]:
# function to create a new manifest
def create_manifest(*args):
   
    new_manifest = {
        "@context": "http://iiif.io/api/presentation/3/context.json",
        "id": manifest_id,
        "type": "Manifest",
        "label": label,
        "metadata": metadata,
        "provider": provider,
        "seeAlso": seeAlso,
        "items": []  # No canvases yet
    }  

    return new_manifest

In [3]:
# Provide basic Metadata for the new manifest
base_url="https://raw.githubusercontent.com/r0man-ist/iiif/refs/heads/main/FannyWilhelm/"
filename ="briefsequenz.json"
manifest_id=base_url+filename ## required
label={"de": ["Hensel, Wilhelm/Fanny Hensel: Briefsequenz aus der Verlobungszeit 1829"]} ## required
metadata=[
    {
         "label": {"de": ["Autor"]},
         "value": {"de": ["Hensel, Fanny", "Hensel, Wilhelm"]}
    },
    {
         "label":  {"de": ["Titel"]},
         "value":  {"de": ["Briefsequenz aus der Verlobungszeit von Fanny und Wilhelm Hensel, 1829"]}
    },
    {
      "label":  {"de": ["Signaturen"]},
      "value":  {"de": ["MA Depos. 3,5", "MA Depos. 3,6"]}
    },  
    {
      "label":  {"de": ["Sprache"]},
      "value":  {"de": ["ger"]}
    },
     {
      "label":  {"de": ["Lizenz"]},
      "value":  {"de": ["Public Domain Mark 1.0"]}
    },
      {
      "label":  {"de": ["Anmerkung"]},
      "value":  {"de": ["Bei diesem virtuellen Objekt handelt es sich um eine Briefsequenz aus dem Briefwechsel zwischen Fanny Mendelssohn-Bartholdy und Wilhelm Hensel. Der gesamte erhaltene Briefwechsel ist in den digitalisierten Sammlungen der Staatsbibliothek zu Berlin in drei Objekten enthalten (siehe unter seeAlso). Dieses Objekt fasst Briefe zusammen, die offensichtlich direkt aufeinander folgen. Die Transkriptionen und die Zuordnung zu Briefsequenzen sind im Seminarprojekt 'Fanny Loves Wilhelm' in Zusammenarbeit von Staatsbibliothek zu Berlin und UdK entstanden."]}
    }]

seeAlso = [
    {
      "id": "https://resolver.staatsbibliothek-berlin.de/SBB00035B5E00000000",
      "type": "Digital object",
      "label": {
        "de": [
          "Digitalisierte Sammmlungen"
        ]
      }
    },
    {
      "id": "https://resolver.staatsbibliothek-berlin.de/SBB00035B6800000000",
      "type": "Digital object",
      "label": {
        "de": [
          "Digitalisierte Sammmlungen"
        ]
      }
    },
    {
      "id": "https://resolver.staatsbibliothek-berlin.de/SBB00035DA100000000",
      "type": "Digital object",
      "label": {
        "de": [
          "Digitalisierte Sammmlungen"
        ]
      }
    }    
  ]

provider = [
    {
      "id": "https://lab.sbb.berlin",
      "type": "Agent",
      "label": { "en": [ "Stabi Lab" ] },
      "homepage": [
        {
          "id": "https://lab.sbb.berlin",
          "type": "Text",
          "label": { "de": [ "Stabi Lab Homepage" ] },
          "format": "text/html"
        }]
    }      
  ]
    
new_manifest = create_manifest(manifest_id, label, metadata, provider)

In [4]:
# function to get manifests from a list of urls
def fetch_manifests(manifest_urls):
    manifests = []
    for url in manifest_urls:
        response = requests.get(url)
        response.raise_for_status()  # Raises an HTTPError for bad responses
        manifest = response.json()  # Parse the JSON response
                             
        manifests.append(manifest)
            
    return manifests

In [5]:
# specify a list of manifests to fetch
manifest_urls = ["https://raw.githubusercontent.com/r0man-ist/iiif/refs/heads/main/FannyWilhelm/manifests/1880377578v3.manifest.json",
                 "https://raw.githubusercontent.com/r0man-ist/iiif/refs/heads/main/FannyWilhelm/manifests/1878144588v3.manifest.json",
                 "https://raw.githubusercontent.com/r0man-ist/iiif/refs/heads/main/FannyWilhelm/manifests/1878121294v3.manifest.json"]
manifests = fetch_manifests(manifest_urls)
#print(manifests)



In [6]:
# get individual canvases from the manifests
canvases = []
for manifest in manifests:  
    for canvas in manifest.get('items', []):
        if canvas.get('type') == 'Canvas':
            canvases.append(canvas)

# Print the extracted canvases
#print(canvases)

In [7]:
# specify a list of canvases to include in the new manifest
# canvases will be included in the order in which they appear in the list
canvas_list = [
    "https://content.staatsbibliothek-berlin.de/dc/1880377578-0235/canvas",
   "https://content.staatsbibliothek-berlin.de/dc/1880377578-0236/canvas",
   "https://content.staatsbibliothek-berlin.de/dc/1878144588-0159/canvas",
   "https://content.staatsbibliothek-berlin.de/dc/1878144588-0160/canvas",
   "https://content.staatsbibliothek-berlin.de/dc/1880377578-0237/canvas",
   "https://content.staatsbibliothek-berlin.de/dc/1880377578-0238/canvas"
]


In [8]:
matching_canvases = []

# Create a dictionary for quick lookup using the canvas IDs
canvas_ids = {canvas["id"]: canvas for canvas in canvases}

# Iterate over the canvas_list to maintain the specified order
for canvas_id in canvas_list:
    if canvas_id in canvas_ids:
        matching_canvases.append(canvas_ids[canvas_id])

new_manifest["items"] = matching_canvases

In [9]:
# OPTIONAL Get correct image sizes for all canvases

# function to get image sizes for a list of canvases

def get_image_sizes(canvases):
    image_sizes = []
    for canvas in canvases:
        # get the image service URL
        image_service = canvas["items"][0]["items"][0]["body"]["service"][0]["@id"]
        response = requests.get(image_service)
        response.raise_for_status()  # Raises an HTTPError for bad responses    
        image_info = response.json()
        # get height and width of the image
        height = image_info["height"]
        width = image_info["width"]
        image_sizes.append((height, width))
        # print the image sizes
        print(f"Height: {height}, Width: {width}")
    return image_sizes
    
# call the function to get image sizes
image_sizes = get_image_sizes(matching_canvases)

# put image sizes from list into the manifest
for canvas, (height, width) in zip(matching_canvases, image_sizes):
    canvas["height"] = height
    canvas["width"] = width

new_manifest["items"] = matching_canvases

Height: 1540, Width: 1267
Height: 1540, Width: 1267
Height: 1764, Width: 1596
Height: 1764, Width: 1597
Height: 1256, Width: 823
Height: 1256, Width: 823


In [10]:
# provide new URLs for the canvases

canvas_nr = 1
for canvas in matching_canvases:
    canvas["id"] = base_url + filename.split(".")[0] + "/" + str(canvas_nr) + "/" + "canvas"
    canvas_nr += 1



# provide new URLs for the annotation pages

for canvas in matching_canvases:
    for annotation in canvas["items"]:
        annotation["id"] = canvas["id"] + "/annotation-page"

# update the target URL for the annotation pages

for canvas in matching_canvases:
    for annotation in canvas["items"]:
        annotation["items"][0]["target"] = canvas["id"]

new_manifest["items"] = matching_canvases

In [11]:
# renumber canvas labels
canvas_nr = 1
for canvas in matching_canvases:
    canvas["label"]["de"][0] = str(canvas_nr)
    canvas_nr += 1

new_manifest["items"] = matching_canvases

# todo
perform xslt transformation directly in notebook and give xslt params to the function to provide URLs for convases and annotation pages

In [12]:
# add annotation to include full text transcription
# annotations for individual pages can be produces from alto files via https://glenrobson.github.io/iiif_stuff/alto2annotations/alto2annosv3.xsl
# look for file in the subfolder "fulltext" with ".json" ending
import os
fulltext_folder = "fulltext"

for filename in os.listdir(fulltext_folder):
    if filename.endswith(".json"):
        with open(os.path.join(fulltext_folder, filename), encoding="utf-8") as f:
            annotation_list = json.load(f)
            annotation_url = annotation_list["id"]
            # find the corresponding canvas
            for canvas in matching_canvases:
                if canvas["label"]["de"][0] == filename.split("_")[0]:
                    canvas["annotations"] = [{"id": annotation_url, "type": "AnnotationPage"}]
new_manifest["items"] = matching_canvases

In [13]:

with open("briefsequenz.json", "w", encoding="utf-8") as f:
        json.dump(new_manifest, f, ensure_ascii=False, indent=4)