In [66]:
import pandas as pd
import json
import os
from tqdm import tqdm

In [67]:
images = pd.read_csv("tjhm_docs_20200130/images.txt", sep="\t")

In [68]:
image_metadata = pd.read_csv("tjhm_docs_20200130/documents1.txt", sep="\t")

In [69]:
document_metadata = pd.read_csv("tjhm_docs_20200130/documents2.txt", sep="\t")

In [70]:
origin = "https://nakamura196.github.io/toji_iiif/iiif"

In [71]:
def createCanvases_(row_d, images, image_metadata, prefix):

    canvases = []

    d_id = row_d["文書ID"]

    for m, row_m in image_metadata[image_metadata["文書ID"] == d_id].iterrows():

        metadata = []

        for c, col in row_m.items():
            if c == "文書ID":
                continue
            if pd.notna(col):
                metadata.append({"label": c, "value": col})

        metadata_id = row_m["アイテムID"]

        image = images[images["アイテムID"] == metadata_id].iloc[0]

        canvas_width = 1
        canvas_height = 1

        canvas = {
            "@id": f"{prefix}/canvas/{metadata_id}",
            "@type": "sc:Canvas",
            "label": metadata_id,
            "metadata": metadata,
            "height": canvas_height,
            "width": canvas_width,

            "thumbnail": {
                "@id": image["image_s"],
                "@type": "dctypes:Image",
                "format": "image/jpeg",
            },

            "images": [
                {
                    "@id": f"{prefix}/annotation/{metadata_id}",
                    "@type": "oa:Annotation",
                    "motivation": "sc:painting",
                    "resource": {
                        "@id": image["image_l"],
                        "@type": "dctypes:Image",
                        "format": "image/jpeg",
                        "height": canvas_width,
                        "width": canvas_height,
                    },
                    "on": f"{prefix}/canvas/{metadata_id}"
                }
            ]

        }

        canvases.append(canvas)



    return canvases

def createCanvases(row_d, images, image_metadata, prefix):

    canvases = []

    d_id = row_d["文書ID"]

    for m, row_m in image_metadata[image_metadata["文書ID"] == d_id].iterrows():

        item_id = row_m["アイテムID"]

        for i, image in images[images["アイテムID"] == item_id].iterrows():

            # print(image)

            # item_id = image["アイテムID"]

            canvas_width = 1
            canvas_height = 1

            image_id = f"{item_id}-{image['file_index']}"

            canvas = {
                "@id": f"{prefix}/canvas/{image_id}",
                "@type": "sc:Canvas",
                "label": f"[{image_id}]",
                # "metadata": metadata,
                "height": canvas_height,
                "width": canvas_width,

                "thumbnail": {
                    "@id": image["image_s"],
                    "@type": "dctypes:Image",
                    "format": "image/jpeg",
                },

                "images": [
                    {
                        "@id": f"{prefix}/annotation/{image_id}",
                        "@type": "oa:Annotation",
                        "motivation": "sc:painting",
                        "resource": {
                            "@id": image["image_l"],
                            "@type": "dctypes:Image",
                            "format": "image/jpeg",
                            "height": canvas_width,
                            "width": canvas_height,
                        },
                        "on": f"{prefix}/canvas/{image_id}"
                    }
                ]

            }

            canvases.append(canvas)



    return canvases


def createManifest(row_d):

    d_id = row_d['文書ID']

    prefix = f"{origin}/2/{d_id}"
    

    opath = f"docs/iiif/2/{d_id}/manifest.json"

    os.makedirs(os.path.dirname(opath), exist_ok=True)

    metadata = []

    for c, col in document_metadata.loc[d].items():
        if c == "文書ID":
            continue
        if pd.notna(col):
            metadata.append({"label": c, "value": col})

    canvases = createCanvases(row_d, images, image_metadata, prefix)

    manifest = {
        "@context": "http://iiif.io/api/presentation/2/context.json",
        "@id": f"{prefix}/manifest.json",
        "@type": "sc:Manifest",
        "label": row_d["タイトル"],
        "metadata": metadata,
        "sequences": [
            {
                "id": f"{prefix}/sequence/normal",
                "type": "sc:Sequence",
                "label": "Current Page Order",
                "canvases": canvases
            }
        ],
        "license": "https://creativecommons.org/licenses/by/2.1/jp/",
        "attribution": "京都府立京都学・歴彩館 東寺百合文書WEB",
        "related": f"https://hyakugo.pref.kyoto.lg.jp/contents/detail.php?id={d_id}",
        "widthin": "https://hyakugo.pref.kyoto.lg.jp/"
    }

    with open(opath, "w") as f:
        json.dump(manifest, f, ensure_ascii=False)

    return manifest

manifests = []

rows = []

for d, row_d in document_metadata.iterrows():

    rows.append(row_d)

for row_d in tqdm(rows):

    manifest = createManifest(row_d)

    manifests.append({
        "@id": manifest["@id"],
        "@type": manifest["@type"],
        "label": manifest["label"]
    })

    # break

collection = {
    "@id": f"{origin}/collection.json",
    "@type": "sc:Collection",
    "label": "東寺百合文書WEB",
    "manifests": manifests
}

with open("docs/iiif/collection.json", "w") as f:
    json.dump(collection, f, ensure_ascii=False)
    

100%|██████████| 46257/46257 [01:57<00:00, 395.33it/s]
