# core

> This is a core library.

In [None]:
#| default_exp core

In [None]:
#| hide
from nbdev.showdoc import *

In [None]:
#| export
import pandas as pd
from tqdm import tqdm
import os
import requests
import json
import time
from dotenv import load_dotenv

In [None]:
#| export
class Client:
    @staticmethod
    def main(endpoint, csv_path, identity, credential, property_id=10):
        """
        Main method to run the client operations.
        
        Args:
            endpoint (str): API endpoint.
            csv_path (str): Path to the CSV file.
            property_id (int): Property ID to query.
            identity (str): Identity for API access.
            credential (str): Credential for API access.
        """
        client = Client(identity, credential)
        client.download(endpoint, csv_path, property_id=property_id)
        client.analyze()
        client.delete()
        client.upload()

    @staticmethod
    def load_env(path):
        """
        Load environment variables from a .env file.
        
        Args:
            path (str): Path to the .env file.
        
        Returns:
            tuple: Base URL, identity, and credential from the environment variables.
        """

        load_dotenv(dotenv_path=path, override=True)
        return os.getenv("OMEKA_BASE_URL"), os.getenv("OMEKA_IDENTITY"), os.getenv("OMEKA_CREDENTIAL")

    def __init__(self, identity, credential):
        """
        Initialize the Client instance.
        
        Args:
            identity (str): Identity for API access.
            credential (str): Credential for API access.
        """
        self.identity = identity
        self.credential = credential
        pass

    def download(self, endpoint, csv_path, property_id=10, debug=False):
        """
        Download item JSON data from the endpoint and check for missing media.

        Args:
            endpoint (str): API endpoint.
            csv_path (str): Path to the CSV file.
            property_id (int): Property ID to query.
            debug (bool, optional): Flag to enable debug mode. Defaults to False.
        """
        print("*", "item json download")
        self.endpoint = endpoint
        self.csv_path = csv_path

        ###

        df = pd.read_csv(self.csv_path)

        # 登録前のアイテム毎のマップ
        before = {}
        self.before = before

        for index, row in df.iterrows():
            id = row["o:item"]
            url = row["iiif"]

            if id not in before:
                before[id] = []
            before[id].append(url)

        after = {}

        count = 0

        missings = {}
        self.missings = missings

        skip = False

        for id in tqdm(before):
            
            count += 1

            path = "tmp/" + id + ".json"
            os.makedirs(os.path.dirname(path), exist_ok=True)

            # jsonファイルがない場合にはダウンロード
            if not os.path.exists(path) or not skip:

                api = f"{self.endpoint}/items?property%5B0%5D%5Bproperty%5D={property_id}&property%5B0%5D%5Btype%5D=eq&property%5B0%5D%5Btext%5D={id}&key_identity={self.identity}&key_credential={self.credential}"
                
                if debug:
                    print(api)
                
                df = requests.get(api).json()

                with open(path, mode='wt', encoding='utf-8') as file:
                    json.dump(df, file, ensure_ascii=False, indent=2)

            with open(path, mode='rt', encoding='utf-8') as file:
                df = json.load(file)

                if len(df) > 0:
                    item = df[0]

                    after[id] = len(item["o:media"])

                    bsize = len(before[id])

                    if bsize != after[id]:
                        missings[id] = {
                            "before": bsize,
                            "after":after[id],
                            "oid" : item["o:id"]
                        }

        opath = "data/missings.json"

        os.makedirs(os.path.dirname(opath), exist_ok=True)

        with open(opath, mode='wt', encoding='utf-8') as file:
            json.dump(missings, file, ensure_ascii=False, indent=2)

    def analyze(self):
        """
        Analyze the downloaded JSON data to find missing media items and save the results to a CSV file.
        """
        print("*", "media json download")
        df = pd.read_csv(self.csv_path)

        # 登録前のアイテム毎のマップ
        before = self.before
        missings = self.missings

        rows = []
        rows.append(["item id", "omeka id", "登録前画像数", "登録後画像数", "不足画像数", "不足画像URL"])


        count = 0

        for uuid in tqdm(missings):

            count += 1

            item = missings[uuid]

            oid = item["oid"]

            api = f"{self.endpoint}/media?item_id=" + str(oid)

            page = 1

            flg = True

            urls = []

            while flg:

                url = api + "&page={}".format(page)

                df = requests.get(url).json()

                if len(df) == 0:
                    flg = False

                for res in df:
                    source = res["o:source"]

                    urls.append(source)

                page += 1

            b_images = before[uuid]

            l = ", ".join(sorted(list(set(b_images).difference(set(urls)))))

            if l == "":
                continue

            missings[uuid]["l"] = l.split(", ")

            rows.append([uuid, oid, item["before"], item["after"], item["before"] - item["after"], l])

        df = pd.DataFrame(rows)
        df.to_csv('data/missings.csv', header=False, index=False)

        

    def delete(self):
        """
        Delete media items from the Omeka server that are listed as missing.
        """
        print("*", "delete media")
        missings = self.missings

        for item_id in tqdm(missings):

            missing = missings[item_id]

            if missing["after"] == 0:
                continue

            if item_id not in self.before:
                continue

            with open(f"tmp/{item_id}.json", mode='rt', encoding='utf-8') as file:
                ts = json.load(file)

            t = ts[0]

            medias = t["o:media"]

            medias_removed = medias # [index:] # 削除はすべて

            for m in tqdm(medias_removed):

                media_uri = f'{m["@id"]}?key_identity={self.identity}&key_credential={self.credential}'

                requests.delete(url=media_uri)

    def upload(self, debug=False, loop=5, property_source=11):
        """
        Upload missing media items back to the Omeka server.

        Args:
            debug (bool, optional): Flag to enable debug mode. Defaults to False.
            loop (int, optional): Number of retry attempts for failed uploads. Defaults to 5.
            property_source (int, optional): Property source for the media items. Defaults to 11.
        """
        print("*", "upload media")
        missings = self.missings
        before = self.before

        final_uri = f'{self.endpoint}/items?key_identity={self.identity}&key_credential={self.credential}'

        finised = []

        for item_id in tqdm(missings):

            if debug:
                print("item_id", item_id)

            exit_flg = False

            if item_id not in before:
                continue

            if item_id in finised:
                continue

            bb = before[item_id]

            with open(f"tmp/{item_id}.json", mode='rt', encoding='utf-8') as file:
                ts = json.load(file)

            t = ts[0]

            oid = t["o:id"]

            # 登録対象の画像
            cc = bb

            # print(item_id)

            for iiif in tqdm(cc):

                if debug:
                    print("iiif", iiif)

                item_payload = {
                    "o:source": iiif,
                    "o:item": {
                        "o:id": oid
                    },
                    "o:ingester": "iiif",
                    "o:renderer": "iiif",
                    "@type": "o:Media",
                    "dcterms:source": [
                        {
                            "property_id": property_source, 
                            "@value": iiif, 
                            "type" : "literal"
                        }
                    ]
                }

                url = final_uri.replace("/api/items", "/api/media")

                # print(url)

                r = requests.post(url, json=item_payload)

                if r.status_code != 200:
                    print("failed", item_id, oid, r.status_code, r.text, iiif)

                    # もう一回試みる
                    count = 0

                    while 1:
                        time.sleep(1)

                        r = requests.post(url, json=item_payload)

                        if r.status_code == 200:
                            break

                        count += 1

                        if count > loop:
                            exit_flg = True
                            break

            if debug:
                break

            if exit_flg:
                continue

            finised.append(item_id)

In [None]:
show_doc(Client.main)

---

### Client.main

>      Client.main (endpoint, csv_path, identity, credential, property_id=10)

*Main method to run the client operations.

Args:
    endpoint (str): API endpoint.
    csv_path (str): Path to the CSV file.
    property_id (int): Property ID to query.
    identity (str): Identity for API access.
    credential (str): Credential for API access.*

In [None]:
show_doc(Client.load_env)

---

### Client.load_env

>      Client.load_env (path)

*Load environment variables from a .env file.

Args:
    path (str): Path to the .env file.

Returns:
    tuple: Base URL, identity, and credential from the environment variables.*

In [None]:
show_doc(Client)

---

### Client

>      Client (identity, credential)

*Initialize the Client instance.

Args:
    identity (str): Identity for API access.
    credential (str): Credential for API access.*

In [None]:
show_doc(Client.download)

---

### Client.download

>      Client.download (endpoint, csv_path, property_id=10, debug=False)

*Download item JSON data from the endpoint and check for missing media.

Args:
    endpoint (str): API endpoint.
    csv_path (str): Path to the CSV file.
    property_id (int): Property ID to query.
    debug (bool, optional): Flag to enable debug mode. Defaults to False.*

In [None]:
show_doc(Client.analyze)

---

### Client.analyze

>      Client.analyze ()

*Analyze the downloaded JSON data to find missing media items and save the results to a CSV file.*

In [None]:
show_doc(Client.delete)

---

### Client.delete

>      Client.delete ()

*Delete media items from the Omeka server that are listed as missing.*

In [None]:
show_doc(Client.upload)

---

### Client.upload

>      Client.upload (debug=False, loop=5, property_source=11)

*Upload missing media items back to the Omeka server.

Args:
    debug (bool, optional): Flag to enable debug mode. Defaults to False.
    loop (int, optional): Number of retry attempts for failed uploads. Defaults to 5.
    property_source (int, optional): Property source for the media items. Defaults to 11.*

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()