In [1]:
# https://stackoverflow.com/a/67029719/7782
from IPython import get_ipython
from IPython.core.magic import register_cell_magic

ipython = get_ipython()

@register_cell_magic
def pybash(line, cell):
    ipython.run_cell_magic('bash', '', cell.format(**globals()))

In [2]:
import argparse
import os
import types


import ezid_client_tools as ect
from ezid_client_tools.utils import ANVL

import structured_ezid as sezid

import settings



In [3]:
EZID_USER = os.environ.get('EZID_USER')
EZID_PASSWD = os.environ.get('EZID_PASSWD')

if (EZID_USER is None) or (EZID_PASSWD is None):
    import settings
    EZID_USER = settings.EZID_USER
    EZID_PASSWD = settings.EZID_PASSWD

TEST_NAAN = "99999"
TEST_SHOULDER = "fk4"
TEST_ID = "/00/a5"

# New shoulder

OC_NAAN = "28722"
OC_PREREG_SHOULDER = "r2"

In [4]:
client2 = sezid.Client2()
client2.args.credentials = f"{EZID_USER}:{EZID_PASSWD}"
client2.args.server = "s"

ark_ = sezid.ARKIdentifier(TEST_NAAN, TEST_SHOULDER, TEST_ID)
ark_

client2.view_identifier(ark_)

LastUpdatedOrderedDict([('success', 'ark'),
                        ('ry.who', 'Raymond Yee'),
                        ('erc.who', 'Nobody?'),
                        ('profile', 'erc'),
                        ('erc.what', 'testing ark 00/05'),
                        ('erc.when', '('),
                        ('_owner', 'opencontext'),
                        ('_ownergroup', 'ucblibrary'),
                        ('_created', '1683902153'),
                        ('_updated', '1684438102'),
                        ('_profile', 'erc'),
                        ('_target', 'https'),
                        ('_status', 'public'),
                        ('_export', 'yes')])

In [14]:
# check on prefix matching

try:
    client2.view_identifier(str(ark_) + "/1234", prefix_matching=False)
except ect.ClientError as e:
    print (e)
    print (type(e), str(e))

HTTP Error 400: Bad Request
<class 'ezid_client_tools.client.HTTPClientError'> HTTP Error 400: Bad Request


In [None]:
metadata_ = {
  "erc.who": "Nobody?",
  "profile": "erc",
  "erc.what": "testing ark 00/05",
  "erc.when": "(:unkn) Unknown time"
}

client2.create_identifier(ark_, metadata_)


In [None]:
# create and delete reserved UUID in the TEST NAAN/TEST SHOULDER space

import uuid

id_ = str(uuid.uuid4().hex).replace("-", "")
arkid_ = sezid.ARKIdentifier(TEST_NAAN, TEST_SHOULDER, id_)



In [None]:
%%pybash

ezid s "{EZID_USER}:{EZID_PASSWD}" delete "ark:/{TEST_NAAN}/{TEST_SHOULDER}/00/a6"

In [None]:
%%pybash

ezid p "{EZID_USER}:{EZID_PASSWD}" view ark:/28722/k2154wc6r 

In [None]:
%%pybash

curl -u "{EZID_USER}:{EZID_PASSWD}" -d format=anvl -d type=ark \
  -d permanence=real \
  -d createdAfter=2023-01-01T00:00:00Z \
  https://ezid.cdlib.org/download_request

In [None]:
%%pybash

curl -u "{EZID_USER}:{EZID_PASSWD}" \
  -d format=csv \
  -d column=_id \
  -d column=erc.who \
  -d type=ark \
  -d permanence=real \
  -d createdAfter=2023-01-01T00:00:00Z \
  https://ezid.cdlib.org/download_request


In [None]:
%%pybash

curl -u "{EZID_USER}:{EZID_PASSWD}" \
  -d format=csv \
  -d column=_id \
  -d column=erc.who \
  -d column=erc.what \
  -d column=erc.when \
  -d column=_owner \
  -d column=_ownergroup \
  -d column=_created \
  -d column=_updated \
  -d column=_profile \
  -d column=_target \
  -d column=_status \
  -d column=_export \
  -d type=ark \
  -d permanence=real \
  -d createdAfter=2023-04-01T00:00:00Z \
  https://ezid.cdlib.org/download_request



In [None]:
import requests
url = 'https://ezid.cdlib.org/download_request'
data = {
    'format': 'csv',
    'column': ['_id', 'erc.who', 'erc.what', 'erc.when', '_owner', '_ownergroup', '_created', '_updated', '_profile', '_target', '_status', '_export'],
    'type': 'ark',
    'permanence': 'real',
    'createdAfter': '2023-04-01T00:00:00Z'
}

response = requests.post(url, auth=(EZID_USER, EZID_PASSWD), data=data)

if response.status_code == 200:
    print(response.text)
else:
    print(f"Error: {response.status_code}")

In [None]:
response.status_code, response.text

In [None]:
# write a public fake ARK

seems like we write arbitrary metadata fields. To delete the key, set it to ""


In [None]:
%%pybash

ezid s "{EZID_USER}:{EZID_PASSWD}" create! ark:/{TEST_NAAN}/{TEST_SHOULDER}/00/a5 \
    _status public \
    erc.who "Raymond Yee" \
    erc.what "testing ark 00/05" \
    erc.when "(:unkn) Unknown time" \
    ry.who "" \
    profile erc

In [None]:
from itertools import chain

client.args.server = "s"

metadata_ = {
  "ry.who": "Raymond Yee",
  "erc.who": "Nobody!",
  "profile": "erc",
  "erc.what": "testing ark 00/05",
  "erc.when": "(:unkn) Unknown time"
}


client.args.operation = ['create!', f'ark:/{TEST_NAAN}/{TEST_SHOULDER}/00/a5'] + list(chain(*metadata_.items()))
r = client.operation()
r

In [None]:
# testing prefix matching



In [None]:
%%pybash

ezid s "{EZID_USER}:{EZID_PASSWD}" view ark:/{TEST_NAAN}/{TEST_SHOULDER}/00/a5


In [None]:
%%pybash

ezid -h

# Reserved IDs: creating and deleting

In [None]:
%%pybash

# test shoulder and reserved id
# _status = reserved

ezid s "{EZID_USER}:{EZID_PASSWD}" create! "ark:/{TEST_NAAN}/{TEST_SHOULDER}/00/a7" \
    _status reserved \
    erc.who "Anthony Tuck" \
    erc.what "Photo 19660118 from Italy/Poggio Civitate/Tesoro/Tesoro 2F/1966, ID:476/PC 19660118" \
    erc.when "2017-10-04" \
    profile erc

In [None]:
%%pybash

ezid s "{EZID_USER}:{EZID_PASSWD}" view "ark:/{TEST_NAAN}/{TEST_SHOULDER}/00/a7"

# Preregistration shoulder

In [53]:
# issue of allowable characters

from urllib.parse import quote, unquote
import re


quote("A.B")

test_string = '12hello.there'
re.match(r'^[\w\-_.~]*$', test_string)

def uri_unreserved(s):
    """
    
    
    """
    return re.match(r'^[\w\-_.~]*$', s) is not None



quote("我") == '%E6%88%91'

uri_unreserved(test_string)



# Pathlib 

How well can we use the pathlib to model the "parts" of the ARK?


In [10]:
import pathlib
import os


p = pathlib.PurePath()
os.PathLike

os.PathLike

In [15]:
p = pathlib.PurePath("a/b/c")
p

PurePosixPath('a/b/c')

In [22]:
p.name, p.parent, p.parent.parent, p.parent.parent.parent, p.parent.parent.parent.parent

('c',
 PurePosixPath('a/b'),
 PurePosixPath('a'),
 PurePosixPath('.'),
 PurePosixPath('.'))

In [30]:
p1 = p / "d.py"
p1.suffix, p1.stem, p1.with_name('d1.py')


('.py', 'd', PurePosixPath('a/b/c/d1.py'))

In [29]:
"/".join(p.parts)

'a/b/c'

In [36]:
p.match('b*/c')

True

In [42]:
pathlib.PurePath("a/b/c").joinpath('../e') == pathlib.PurePath("a/b/e")

False

In [43]:
p.parent / "e"

PurePosixPath('a/b/e')

In [46]:
pathlib.PurePath('a/.pyen/b/')

PurePosixPath('a/.pyen/b')

# Analyzing OpenContext arks

In [None]:
import pandas as pd
from pandas import DataFrame, Series, Index
import numpy as np

In [None]:
df = pd.read_csv("data/5lrRHNLMtGqBVN8d.csv")
df.head(2)

In [None]:
len(df)

In [None]:
df['_status'].value_counts()