In [None]:
# create pybash macro
# https://stackoverflow.com/a/67029719/7782
from IPython import get_ipython
from IPython.core.magic import register_cell_magic

ipython = get_ipython()

@register_cell_magic
def pybash(line, cell):
    ipython.run_cell_magic('bash', '', cell.format(**globals()))

In [None]:
# imports
import argparse
import os
import types
import datetime


import ezid_client_tools as ect
from ezid_client_tools.utils import ANVL

import structured_ezid as sezid

import settings



In [None]:
# project constants
EZID_USER = os.environ.get('EZID_USER')
EZID_PASSWD = os.environ.get('EZID_PASSWD')

if (EZID_USER is None) or (EZID_PASSWD is None):
    import settings
    EZID_USER = settings.EZID_USER
    EZID_PASSWD = settings.EZID_PASSWD

TEST_NAAN = "99999"
TEST_SHOULDER = "fk4"
TEST_ID = "isamplestest"
TEST_PROJECT_ID = "prefixmatch"

# New shoulder

OC_NAAN = "28722"
OC_PREREG_SHOULDER = "r2"

# Create test arks

In [None]:
client2 = sezid.Client2()
client2.args.credentials = f"{EZID_USER}:{EZID_PASSWD}"
client2.args.server = "s"

arks_to_create = (
  sezid.ARKIdentifier(TEST_NAAN, TEST_SHOULDER, f"{TEST_ID}"),
  sezid.ARKIdentifier(TEST_NAAN, TEST_SHOULDER, f"{TEST_ID}/{TEST_PROJECT_ID}"),
  sezid.ARKIdentifier(TEST_NAAN, TEST_SHOULDER, f"{TEST_ID}/{TEST_PROJECT_ID}/a"),
  sezid.ARKIdentifier(TEST_NAAN, TEST_SHOULDER, f"{TEST_ID}/{TEST_PROJECT_ID}/a/b"),
  sezid.ARKIdentifier(TEST_NAAN, TEST_SHOULDER, f"{TEST_ID}/{TEST_PROJECT_ID}/a/c"),
  sezid.ARKIdentifier(TEST_NAAN, TEST_SHOULDER, f"{TEST_ID}/{TEST_PROJECT_ID}/a/c1"),
  sezid.ARKIdentifier(TEST_NAAN, TEST_SHOULDER, f"{TEST_ID}/{TEST_PROJECT_ID}/a/c1/d"),
)

for ark_ in arks_to_create:
    
    dt = datetime.datetime.utcnow()

    metadata_ = {
      "profile": "erc",
      "erc.who": "Raymond Yee",
      "erc.what": ark_.postfix,
      "erc.when": dt.replace(microsecond=0).isoformat(),
    }

    try:
        (response, headers, status) = client2.create_identifier(ark_, metadata_, update=True)
    except ect.ClientError as e:
        if isinstance(e, ect.HTTPClientError):
            print (e.status, str(e))
        else:
            print(e, type(e))
    else:
        print(response, status)
        

# Figuring out Client3

In [None]:
from structured_ezid import ARKIdentifier
from typing import Union
from pathlib import PurePath as P
import re

re1 = re.compile(r'(\S+)(?:\s*in_lieu_of\s*(\S+))?')

# return found id_, metadata, identifier_or_ancestor) of id_ or None

class Client3(sezid.Client2):
    pass



In [None]:
client3 = Client3()
client3.args.credentials = f"{EZID_USER}:{EZID_PASSWD}"
client3.args.server = "s"

ark0 = ARKIdentifier(s='ark:/99999/fk4isamplestest/prefixmatch', shoulder_size=3)

TEST_ID_MAPPING = [
    ('a/c1/d', 'a/c1/d'),
    ('a/c/e', 'a/c'),
    ('a/c1/e', 'a/c1'),
    ('a/c12/d/e', 'a')
]

for (k,v) in TEST_ID_MAPPING:
    r = client3.view_identifier_or_ancestor(ark0 / k , prefix_matching=True, shoulder_size=3)
    print (k, v, r[0], ark0 / v == r[0])


In [None]:
r = client3.view_identifier_or_ancestor(ark0 / 'a/c12/d/e', shoulder_size=3, prefix_matching=True)
r[0]

In [None]:
r[0] == ark0 / 'a'
r[0].naan, r[0].shoulder, r[0].postfix

In [None]:
(ark0 / 'a').postfix

In [None]:
ark_.parents

In [None]:
ark_.parents[-1-len(ark_mapped.parts)]

In [None]:
client3.view_identifier_or_ancestor(ark0 / "a/c/e", prefix_matching=True, shoulder_size=3)[0] == ark0 / "a/c"

In [None]:
ark0 = ARKIdentifier(s='ark:/99999/fk4isamplestest/prefixmatch', shoulder_size=3)

ark_ = ark0 / "a/c/e"
ark_

r = client3.view_identifier_or_ancestor(ark_, prefix_matching=True, shoulder_size=3)
# r

In [None]:
# ark:/99999/fk4isamplestest/prefixmatch/a/c1/d -> a/c1/d
# ark:/99999/fk4isamplestest/prefixmatch/a/c/e -> a/c 
# ark:/99999/fk4isamplestest/prefixmatch/a/c1/e -> a/c1
# ark:/99999/fk4isamplestest/prefixmatch/a/c12/d/e -> a (not /a/c1)

ark_ = ARKIdentifier(s='ark:/99999/fk4isamplestest/prefixmatch/a/c/e', shoulder_size=3)

r = client3.view_identifier_or_ancestor(ark_, prefix_matching=True)
r

```
ark:/99999/fk4isamplestest
ark:/99999/fk4isamplestest/prefixmatch
ark:/99999/fk4isamplestest/prefixmatch/a
ark:/99999/fk4isamplestest/prefixmatch/a/b
ark:/99999/fk4isamplestest/prefixmatch/a/c
ark:/99999/fk4isamplestest/prefixmatch/a/c1
ark:/99999/fk4isamplestest/prefixmatch/a/c1/d

ark:/99999/fk4isamplestest/prefixmatch/a/c1/d -> a/c1/d
ark:/99999/fk4isamplestest/prefixmatch/a/c/e -> a/c (not a/c1)
ark:/99999/fk4isamplestest/prefixmatch/a/c1/e -> a/c1
ark:/99999/fk4isamplestest/prefixmatch/a/c12/d/e -> /a
```


In [None]:
%%pybash

ezid s "{EZID_USER}:{EZID_PASSWD}" delete "ark:/{TEST_NAAN}/{TEST_SHOULDER}/00/a6"

In [None]:
%%pybash

ezid p "{EZID_USER}:{EZID_PASSWD}" view ark:/28722/k2154wc6r 

In [None]:
%%pybash

curl -u "{EZID_USER}:{EZID_PASSWD}" -d format=anvl -d type=ark \
  -d permanence=real \
  -d createdAfter=2023-01-01T00:00:00Z \
  https://ezid.cdlib.org/download_request

In [None]:
%%pybash

curl -u "{EZID_USER}:{EZID_PASSWD}" \
  -d format=csv \
  -d column=_id \
  -d column=erc.who \
  -d type=ark \
  -d permanence=real \
  -d createdAfter=2023-01-01T00:00:00Z \
  https://ezid.cdlib.org/download_request


In [None]:
%%pybash

curl -u "{EZID_USER}:{EZID_PASSWD}" \
  -d format=csv \
  -d column=_id \
  -d column=erc.who \
  -d column=erc.what \
  -d column=erc.when \
  -d column=_owner \
  -d column=_ownergroup \
  -d column=_created \
  -d column=_updated \
  -d column=_profile \
  -d column=_target \
  -d column=_status \
  -d column=_export \
  -d type=ark \
  -d permanence=real \
  -d createdAfter=2023-04-01T00:00:00Z \
  https://ezid.cdlib.org/download_request



In [None]:
import requests
url = 'https://ezid.cdlib.org/download_request'
data = {
    'format': 'csv',
    'column': ['_id', 'erc.who', 'erc.what', 'erc.when', '_owner', '_ownergroup', '_created', '_updated', '_profile', '_target', '_status', '_export'],
    'type': 'ark',
    'permanence': 'real',
    'createdAfter': '2023-04-01T00:00:00Z'
}

response = requests.post(url, auth=(EZID_USER, EZID_PASSWD), data=data)

if response.status_code == 200:
    print(response.text)
else:
    print(f"Error: {response.status_code}")

In [None]:
response.status_code, response.text

In [None]:
# write a public fake ARK

seems like we write arbitrary metadata fields. To delete the key, set it to ""


In [None]:
%%pybash

ezid s "{EZID_USER}:{EZID_PASSWD}" create! ark:/{TEST_NAAN}/{TEST_SHOULDER}/00/a5 \
    _status public \
    erc.who "Raymond Yee" \
    erc.what "testing ark 00/05" \
    erc.when "(:unkn) Unknown time" \
    ry.who "" \
    profile erc

In [None]:
from itertools import chain

client.args.server = "s"

metadata_ = {
  "ry.who": "Raymond Yee",
  "erc.who": "Nobody!",
  "profile": "erc",
  "erc.what": "testing ark 00/05",
  "erc.when": "(:unkn) Unknown time"
}


client.args.operation = ['create!', f'ark:/{TEST_NAAN}/{TEST_SHOULDER}/00/a5'] + list(chain(*metadata_.items()))
r = client.operation()
r

In [None]:
# testing prefix matching



In [None]:
%%pybash

ezid s "{EZID_USER}:{EZID_PASSWD}" view ark:/{TEST_NAAN}/{TEST_SHOULDER}/00/a5


In [None]:
%%pybash

ezid -h

In [None]:
%%pybash

ezid s "{EZID_USER}:{EZID_PASSWD}" view "ark:/{TEST_NAAN}/{TEST_SHOULDER}/00/a7"

# Preregistration shoulder

In [None]:
# issue of allowable characters

from urllib.parse import quote, unquote
import re


quote("A.B")

test_string = '12hello.there'
re.match(r'^[\w\-_.~]*$', test_string)

def uri_unreserved(s):
    """
    
    
    """
    return re.match(r'^[\w\-_.~]*$', s) is not None



quote("我") == '%E6%88%91'

uri_unreserved(test_string)



# Analyzing OpenContext arks

In [None]:
import pandas as pd
from pandas import DataFrame, Series, Index
import numpy as np

In [None]:
df = pd.read_csv("data/5lrRHNLMtGqBVN8d.csv")
df.head(2)

In [None]:
len(df)

In [None]:
df['_status'].value_counts()