# Persistent identifiers and associated information

In [None]:
# PyHandleClient Library developed by DKRZ and maintained as part of EUDAT and EOSC
# https://github.com/EUDAT-B2SAFE/PYHANDLE
from pyhandle.handleclient import PyHandleClient
from pprint import pprint
client = PyHandleClient('rest')

In [2]:
tracking_id = "21.14100/1fe581d0-0c10-328d-8b78-941834e09b19"
result = client.retrieve_handle_record_json(tracking_id)

In [4]:
pprint(result)

{'handle': '21.14100/1fe581d0-0c10-328d-8b78-941834e09b19',
 'responseCode': 1,
 'values': [{'data': {'format': 'string',
                      'value': 'https://handle-esgf.dkrz.de/lp/21.14100/1fe581d0-0c10-328d-8b78-941834e09b19'},
             'index': 1,
             'timestamp': '2019-08-05T10:21:43Z',
             'ttl': 86400,
             'type': 'URL'},
            {'data': {'format': 'string', 'value': 'DATASET'},
             'index': 2,
             'timestamp': '2019-08-05T10:21:43Z',
             'ttl': 86400,
             'type': 'AGGREGATION_LEVEL'},
            {'data': {'format': 'string', 'value': 'TRUE'},
             'index': 3,
             'timestamp': '2019-08-05T10:21:43Z',
             'ttl': 86400,
             'type': 'FIXED_CONTENT'},
            {'data': {'format': 'string',
                      'value': 'CMIP6.ScenarioMIP.DKRZ.MPI-ESM1-2-HR.ssp126.r1i1p1f1.6hrPlev.hurs.gn'},
             'index': 4,
             'timestamp': '2019-08-05T10:21:43Z',
     

## QC information management in json manifest files

QC information is managed in json files with PIDs as key entries

this allows for easy manipulation and flexible tools to ingest in any QC storage backends 

### direct interchange between json manifest files and python dictionaries

In [5]:
import json

In [15]:
# python dict to json
c3s_qc_prepare = {}
c3s_qc_prepare["21.14100/1fe581d0-0c10-328d-8b78-941834e09b19"] = {
    "dset_id": "CMIP6.ScenarioMIP.DKRZ.MPI-ESM1-2-HR.ssp126.r1i1p1f1.6hrPlev.hurs.gn",
    "qc_date": "21.04.2020",
    "qc_status": "Warning",
    "qc_message": "Wrong FillValue",
    "prepare_category":"Minor Issue"
}

c3s_json = json.dumps(c3s_qc_prepare,indent=3)
print(c3s_json)


{
   "21.14100/1fe581d0-0c10-328d-8b78-941834e09b19": {
      "dset_id": "CMIP6.ScenarioMIP.DKRZ.MPI-ESM1-2-HR.ssp126.r1i1p1f1.6hrPlev.hurs.gn",
      "qc_date": "21.04.2020",
      "qc_message": "Wrong FillValue",
      "prepare_category": "Minor Issue"
   }
}


## Using pandas dataframes to visualize and search in manifest files

In [14]:
import pandas as pd

In [16]:
tst = pd.read_json(c3s_json)

In [17]:
tst

Unnamed: 0,21.14100/1fe581d0-0c10-328d-8b78-941834e09b19
dset_id,CMIP6.ScenarioMIP.DKRZ.MPI-ESM1-2-HR.ssp126.r1...
prepare_category,Minor Issue
qc_date,21.04.2020
qc_message,Wrong FillValue
qc_status,Warning


## Example manifest files

instead of collecting information in one complex and large json file, the information is
partitioned into separate manifest files, one manifest file per QC information source (e.g. CF check, PrePARE check, Errata, ESMVal, ..)

The manifest files are consistently managed in a github repository 

### CF check manifest files

In [23]:
cf_m = {}
cf_m["21.14100/1fe581d0-0c10-328d-8b78-941834e09b19"]= {
    "qc_date": "....",
    "cf_error_code": "cf doc section 4.1 violation",
    "cf_status": "Warning"
}


In [25]:
pd.DataFrame(cf_m)

Unnamed: 0,21.14100/1fe581d0-0c10-328d-8b78-941834e09b19
cf_error_code,cf doc section 4.1 violation
cf_status,Warning
qc_date,....


### PrePARE manifest files

In [26]:
prepare_m = {}
prepare_m["21.14100/1fe581d0-0c10-328d-8b78-941834e09b19"]= {
    "qc_date": "....",
    "pr_error_code": "cf doc section 4.1 violation",
    "pr_status": "Warning"
}

In [27]:
pd.DataFrame(cf_m)

Unnamed: 0,21.14100/1fe581d0-0c10-328d-8b78-941834e09b19
cf_error_code,cf doc section 4.1 violation
cf_status,Warning
qc_date,....


### ESMVal manifest files

### DDC (DOI etc.) related manifest files ?