In [5]:
from iiif_prezi3 import Manifest, AnnotationPage, Annotation, ResourceItem, config
import os
from PIL import Image
from moviepy.editor import VideoFileClip
import iiif_prezi3
from PIL import Image
import json
from glob import glob
from tqdm import tqdm

In [6]:
class VideoManifest:    
    def __init__(self, item_id, metadata={}, dirname="demo", image_format="jpg", base_url="https://nakamura196.github.io/ramp_data/demo", verbose=False):
        self.base_url = base_url
        self.item_id = item_id
        self.dirname = dirname
        self.metadata = metadata
        self.verbose = verbose
        self.image_format = image_format
        self.setup_paths(dirname, item_id)
        self.setup_links()
        self.setup_defaults()

    def setup_paths(self, dirname, item_id):
        base_url = self.base_url

        base_path = f"../../docs/{dirname}/{item_id}"
        
        self.prefix = f"{base_url}/{item_id}"
        self.mp4_path = f"{base_path}/{item_id}.mp4"
        self.mp4_url = f"{self.prefix}/{item_id}.mp4"
        self.vtt_url = f"{self.prefix}/{item_id}.vtt"
        self.vtt_en_url = f"{self.prefix}/{item_id}_en.vtt"
        self.vtt_en_path = f"{base_path}/{item_id}_en.vtt"
        self.summary_path = f"{base_path}/{item_id}_summary.txt"
        self.image_path = f"{base_path}/{item_id}_summary_image.{self.image_format}"
        self.output_path = f"{base_path}/manifest.json"

    def setup_links(self):
        self.attribution = "国立国会図書館 National Diet Library, JAPAN"
        self.rights = "https://www.ndl.go.jp/jp/use/reproduction/index.html"
        self.homepage = "https://rekion.dl.ndl.go.jp/"

    def setup_defaults(self):
        self.canvas = None
        self.manifest = None

    def get_video_duration(self):
        try:
            with VideoFileClip(self.mp4_path) as video:
                return video.duration
        except IOError:
            if self.verbose:
                print("Video file not found.")
            return 0
        
    def create_metadata(self):
        metadata_fixed = []

        for key, values in self.metadata.items():
            if key == "label":
                continue
            for value in values:
                metadata_fixed.append(iiif_prezi3.KeyValueString(label=key, value=value))

        return metadata_fixed

    def add_accompanying_image(self):
        if self.verbose:
            print("Adding accompanying image")
            print(self.image_path)
        if os.path.exists(self.image_path):
            accompanyingCanvas = self.create_accompanying_canvas()
            self.canvas.accompanyingCanvas = accompanyingCanvas

    def create_accompanying_canvas(self):
        im = Image.open(self.image_path)
        w, h = im.size
        accompanyingCanvas = iiif_prezi3.Canvas(id=f"{self.prefix}/canvas/accompanying")
        anno_page, anno = self.create_image_annotation(w, h)
        accompanyingCanvas.set_hwd(height=h, width=w)
        accompanyingCanvas.add_item(anno_page)
        return accompanyingCanvas

    def create_image_annotation(self, width, height):
        anno_page = iiif_prezi3.AnnotationPage(id=f"{self.prefix}/canvas/accompanying/annotation/page")
        anno = iiif_prezi3.Annotation(
            id=f"{self.prefix}/canvas/accompanying/annotation/image",
            motivation="painting",
            body=iiif_prezi3.ResourceItem(
                id=f"{self.prefix}/{self.item_id}_summary_image.{self.image_format}",
                type="Image",
                format="image/jpeg",
                height=height,
                width=width
            ),
            target=anno_page.id
        )
        anno_page.add_item(anno)
        return anno_page, anno

    def create_manifest(self):
        label = self.metadata.get("label", "Unknown")
        metadata_fixed = self.create_metadata()
        manifest = iiif_prezi3.Manifest(
            id=f"{self.prefix}/manifest.json", 
            label=label,
            metadata=metadata_fixed,
            rights=self.rights,
            requiredStatement=iiif_prezi3.KeyValueString(label="Attribution", value=self.attribution),
            provider=iiif_prezi3.ProviderItem(
                id=self.homepage,
                label=self.attribution
            ),
            homepage=iiif_prezi3.HomepageItem(
                id=f"https://rekion.dl.ndl.go.jp/pid/{self.item_id}",
                type="Text",
                label=f"{label} - 国立国会図書館デジタルコレクション",
                format="text/html",
                language="ja"
            )
        )
        self.manifest = manifest

    def create(self):
        self.duration = self.get_video_duration()
        if self.duration > 0:
            self.create_manifest()
            self.add_summary()
            self.create_canvas()
            self.add_accompanying_image()
            self.add_media()
            self.add_vtt_ja()
            self.add_vtt_en()
            self.save_manifest()

    def update(self): # , path
        manifest_json = json.load(open(self.output_path))
        self.manifest = iiif_prezi3.Manifest(**manifest_json)
        self.add_summary()

        # 画像の追加
        self.canvas = self.manifest.items[0]
        self.add_accompanying_image()
        
        # 英語字幕の追加
        self.vtt_anno_page = self.canvas.annotations[0]
        if len(self.canvas.annotations[0].items) == 1:
            self.add_vtt_en()
        elif len(self.canvas.annotations[0].items) == 2:
            self.canvas.annotations[0].items[1].body.id = self.vtt_en_url

        # 保存
        self.save_manifest()


    def add_media(self):
        mp4_url = self.mp4_url
        duration = self.duration
        prefix = self.prefix
        canvas = self.canvas

        anno_body = ResourceItem(id=mp4_url,
                                type="Sound",
                                format="audio/mp4",
                                duration=duration)
        anno_page = AnnotationPage(id=f"{prefix}/canvas/page")
        anno = Annotation(id=f"{prefix}/canvas/page/annotation",
                        motivation="painting",
                        body=anno_body,
                        target=canvas.id)
        anno_page.add_item(anno)
        canvas.add_item(anno_page)

    def add_vtt_ja(self):
        
        prefix = self.prefix
        vtt_url = self.vtt_url
        canvas = self.canvas

        

        vtt_anno_page = AnnotationPage(id=f"{prefix}/canvas/page/2")
        self.vtt_anno_page = vtt_anno_page

        # VTT URLを追加
        vtt_body = ResourceItem(id=vtt_url, type="Text", format="text/vtt") # , label="日本語 (machine-generated)")
        vtt_anno = Annotation(
            id=f"{prefix}/canvas/annotation/webvtt",
            motivation="supplementing",
            body=vtt_body,
            target=canvas.id,
            # label = "WebVTT Transcript (machine-generated)"
            label = "日本語 (machine-generated)"
            )
        
        vtt_body.label = vtt_anno.label

        
        vtt_anno_page.add_item(vtt_anno)

        canvas.annotations = [vtt_anno_page]

    def add_vtt_en(self):
        # en

        vtt_en_path = self.vtt_en_path
        vtt_en_url = self.vtt_en_url
        prefix = self.prefix
        canvas = self.canvas

        vtt_anno_page = self.vtt_anno_page

        if os.path.exists(vtt_en_path):
            vtt_body = ResourceItem(id=vtt_en_url, type="Text", format="text/vtt") # , label="English (machine-generated)")
            vtt_anno = Annotation(
                id=f"{prefix}/canvas/annotation/webvtt/2",
                motivation="supplementing",
                body=vtt_body,
                target=canvas.id,
                # label = "WebVTT Transcript (machine-generated)"
                label = "English (machine-generated)"
                )
            
            vtt_body.label = vtt_anno.label

            vtt_anno_page.add_item(vtt_anno)

    def add_summary(self):
        summary_path = self.summary_path
        manifest = self.manifest
        if os.path.exists(summary_path):
            with open(summary_path) as f:
                summary = f.read()
                manifest.summary = summary

    def create_canvas(self):
        self.canvas = self.manifest.make_canvas(id=f"{self.prefix}/canvas", duration=self.duration)

    def save_manifest(self):
        with open(self.output_path, "w") as f:
            f.write(self.manifest.json(indent=2))

In [7]:
metadata = {
    "3574643": {
      "label": "講演：道徳、経済合一論（一）Union of Morality of Economy",
      "作詞・作曲・編曲・実演家": [
        "Shibusawa（渋沢　栄一）[作詞]",
        "Shibusawa（渋沢　栄一）"
      ],
      "製作者（レーベル）": [
        "ニッポノホン"
      ],
      "注記": [
        "商品番号 : 15289",
        "デジタル変換後ノイズ除去 : ノイズ除去なし",
        "講演"
      ],
      "出版年月日(W3CDTF)": [
        "0000"
      ],
      "歴史的音源ジャンル": [
        "講義、講演、演説"
      ]
  },
  "3571280": {
      "label": "日本のアクセントと言葉調子（下）",
      "作詞・作曲・編曲・実演家": [
        "神保　格"
      ],
      "製作者（レーベル）": [
        "コロムビア（戦前）"
      ],
      "注記": [
        "商品番号 : 33000",
        "デジタル変換後ノイズ除去 : 無",
        "日本語学習"
      ],
      "出版年月日(W3CDTF)": [
        "1929-09"
      ],
      "歴史的音源ジャンル": [
        "教育・児童",
        "語学"
      ]
  }
}

item_ids = ["3571280", "3574643"]

for item_id in item_ids:
    ins = VideoManifest(item_id, metadata = metadata[item_id])
    ins.create()

In [8]:
# item_ids = ["8275887"]

files = glob("../../docs/rekion/*/*.webp")

item_ids = [os.path.basename(file).split("_")[0] for file in files]

for item_id in tqdm(item_ids):
    ins = VideoManifest(item_id, dirname="rekion", image_format="webp", verbose=False, base_url="https://s3ds.mdx.jp/rekion/iiif")
    # ins.update()
    pass

100%|██████████| 5004/5004 [00:00<00:00, 270119.66it/s]


In [9]:
# item_ids = ["8275887"]

files = glob("../../docs/rekion/*/*_en.vtt")

item_ids = [os.path.basename(file).split("_")[0] for file in files]

for item_id in tqdm(item_ids):
    ins = VideoManifest(item_id, dirname="rekion", image_format="webp", verbose=False, base_url="https://s3ds.mdx.jp/rekion/iiif")
    ins.update()

100%|██████████| 5401/5401 [00:40<00:00, 133.99it/s]
