In [None]:
import config as c
import requests
from requests_toolbelt.multipart.encoder import MultipartEncoder
import os
from IPython.core.display import HTML
import time
import random
from lxml import etree
import json

In [None]:
def iiif_manifest(urn):
    r = requests.get("https://api.nb.no/catalog/v1/iiif/{urn}/manifest".format(urn=urn))
    return r.json()

def get_pages(manifest):
    try:
        pages = [page['images'][0]['resource']['@id'] for page in manifest['sequences'][0]['canvases']]
    except KeyError:
        pages = []
    return pages

def download_pages(pages, wait=1):
    pageDict = dict()
    for page in pages:
        try:
            filename = page.split('/')[6].split(':')[-1] + '.jpg'
            r = requests.get(page, stream=True)
            pageDict[filename] = r.content
            time.sleep(wait)
        except:
            continue
    return pageDict

# Login

In [None]:
s = requests.Session()
s.post('https://transkribus.eu/TrpServer/rest/auth/login', data={"user": c.login, "pw":c.password})

## List collections

In [None]:
collections = s.get('https://transkribus.eu/TrpServer/rest/collections/list')

In [None]:
json.loads(collections.content)

In [None]:
docs = s.get('https://transkribus.eu/TrpServer/rest/collections/{id}/list')

In [None]:
json.loads(docs.content)

# Set sesamids to upload

In [None]:
sesamids = ['']

# Loop through

In [None]:
skipped = []
for sesamid in sesamids:
    fail = False
    print("Sesamid", sesamid)
    manifest = iiif_manifest(sesamid)
    pages = get_pages(manifest)
    files = download_pages(pages)
    
    pages_metadata = [{'fileName': val, 'pageNr': idx+1} for idx,val in enumerate(sorted(files))]
    
    uploadObj = {
        "md": {
            "title": sesamid
        },
        "pageList": {"pages": pages_metadata}
    }
    
    headers = {'Content-type': 'application/json'}
    try:
        cont = s.post('https://transkribus.eu/TrpServer/rest/uploads?collId=', json=uploadObj, headers=headers)
        # parse and get upload ID
        response = etree.fromstring(cont.content)
        uploadId = response.xpath('//uploadId/text()')[0]
        print('- successfully uploaded metadata, got id', uploadId)
    except:
        print("-- failed to get upload ID, skipping", sesamid)
        skipped.append(sesamid)
        continue
    
    # loop through files
    for key in sorted(files):
        print(key)

        mp_encoder = MultipartEncoder(
        fields={
            'img': (key, files[key], 'application/octet-stream')
            }
        )
        
        try:
            cont = s.put('https://transkribus.eu/TrpServer/rest/uploads/' + uploadId, data=mp_encoder, headers={'Content-Type': mp_encoder.content_type})
        except:
            print("-- failed to upload", file)
            fail = True
            break
        time.sleep(random.randint(0,2))
    if fail == False:
        print("- done!")
    else:
        skipped.append(sesamid)
        print("-- failed to upload file in ", sesamid, "skipping this sesamid")