In [7]:
from helpers import datastore, registry
import mdsisclienttools.auth.TokenManager as ProvenaAuth
from utils import pprint_json
import json
from env_setup import get_auth, registry_endpoint, data_store_endpoint

## Updating Dataset Metadata (Adding missed fields or changing existing fields)
Updating metadata facilitates the addition of new (previously left blank) fields, as well as overwriting existing fields. The process is implemented as a complete override, meaning you will need to provide the update endpoint with a complete reflection of the final desired metadata after the update process. This necessitates the acquisition of the current existing metadata. To this end, the following sections will demonstrate how to retrieve the existing metadata from the source of truth (Data Store), and how to update it, then post this update to the Data Store via the API.

#### Fetch Existing Metadata using the registry
Incase others have edited the metadata of a dataset, its best practise to fetch the metadata from the registry (source of truth), and update it with the new fields. This ensures your update process is not overwriting any changes made by other users in fields that you actually desire to leave unchanged.

In [8]:
dataset_id = "10378.1/1764777"
current_metadata = registry.fetch_dataset_metadata(registry_endpoint=registry_endpoint, id=dataset_id, auth=get_auth())
pprint_json(current_metadata)


Fetching from registry, id: 10378.1/1764777...
{
  "associations": {
    "organisation_id": "10378.1/1764284",
    "data_custodian_id": "10378.1/1758949",
    "point_of_contact": "Lazaros"
  },
  "approvals": {
    "ethics_registration": {
      "relevant": false,
      "obtained": false
    },
    "ethics_access": {
      "relevant": false,
      "obtained": false
    },
    "indigenous_knowledge": {
      "relevant": false,
      "obtained": false
    },
    "export_controls": {
      "relevant": false,
      "obtained": false
    }
  },
  "dataset_info": {
    "name": "The Test Dataset: A Mirror to the Soul of the Software",
    "description": "This is a test dataset purposed for demonstrating registration via API endpoint.",
    "access_info": {
      "reposited": true,
      "uri": null,
      "description": null
    },
    "publisher_id": "10378.1/1764284",
    "created_date": "2022-10-02",
    "published_date": "2023-10-03",
    "license": "https://creativecommons.org/licenses/b

#### Programatic Edit of Metadata

In [9]:
updated_metadata = current_metadata # "copy"
updated_metadata['dataset_info']["description"] = "This is an updated description for test purposes!"
updated_metadata['dataset_info']['spatial_info']['resolution'] = 0.0013
updated_metadata['dataset_info']['spatial_info']['extent'] = "SRID=4326;POLYGON ((30 10, 40 40, 20 40, 10 20, 30 10))"
updated_metadata['associations']["point_of_contact"] = "Peter Baker"
#pprint_json(updated_metadata)


#### Post update to Datastore API
Once you have the metadata, you can update it with the new fields, and post it to the Datastore API. The API will then update the metadata in the Datastore.


In [10]:
reason = "Testing API update functionality."
update_response = datastore.update_dataset(datastore_endpoint=data_store_endpoint, updated_metadata=updated_metadata, dataset_id=dataset_id, reason=reason, auth=get_auth())
pprint_json(update_response)

Updating dataset with metadata: {'associations': {'organisation_id': '10378.1/1764284', 'data_custodian_id': '10378.1/1758949', 'point_of_contact': 'Peter Baker'}, 'approvals': {'ethics_registration': {'relevant': False, 'obtained': False}, 'ethics_access': {'relevant': False, 'obtained': False}, 'indigenous_knowledge': {'relevant': False, 'obtained': False}, 'export_controls': {'relevant': False, 'obtained': False}}, 'dataset_info': {'name': 'The Test Dataset: A Mirror to the Soul of the Software', 'description': 'This is an updated description for test purposes!', 'access_info': {'reposited': True, 'uri': None, 'description': None}, 'publisher_id': '10378.1/1764284', 'created_date': '2022-10-02', 'published_date': '2023-10-03', 'license': 'https://creativecommons.org/licenses/by/4.0/', 'purpose': "But why, you might ask, was the Test Dataset so important? Well, dear reader, it served as a mirror reflecting the very essence of the software, exposing its vulnerabilities and frailties. 

#### Alternative (manual) Method to editing the metadata payload for update
1. Fetch the current metadata
2. Save to file and edit this file
3. Read in the file and post to API.

This method prevents the need to edit metadata in code which can be confusing with the key-value pairs and correct location. Nevertheless, the API will reject incorrectly formatted metadata payloads.

In [11]:
current_metadata = registry.fetch_dataset_metadata(registry_endpoint=registry_endpoint, id=dataset_id, auth=get_auth())
# write to file
updated_metadata_file_path = "configs/updated_metadata.json"
with open(updated_metadata_file_path, "w") as f:
    json.dump(current_metadata, f, indent=2)

Fetching from registry, id: 10378.1/1764777...


In [12]:
# After editing the file, read it back in and send to the update endpoint
with open(updated_metadata_file_path) as f:
    updated_metadata = json.load(f)

update_response = datastore.update_dataset(datastore_endpoint=data_store_endpoint, updated_metadata=updated_metadata, dataset_id=dataset_id, reason=reason, auth=get_auth())
pprint_json(update_response)

Updating dataset with metadata: {'associations': {'organisation_id': '10378.1/1764284', 'data_custodian_id': '10378.1/1758949', 'point_of_contact': 'Peter Baker'}, 'approvals': {'ethics_registration': {'relevant': False, 'obtained': False}, 'ethics_access': {'relevant': False, 'obtained': False}, 'indigenous_knowledge': {'relevant': False, 'obtained': False}, 'export_controls': {'relevant': False, 'obtained': False}}, 'dataset_info': {'name': 'The Test Dataset: A Mirror to the Soul of the Software', 'description': 'This is an updated description for test purposes!', 'access_info': {'reposited': True, 'uri': None, 'description': None}, 'publisher_id': '10378.1/1764284', 'created_date': '2022-10-02', 'published_date': '2023-10-03', 'license': 'https://creativecommons.org/licenses/by/4.0/', 'purpose': "But why, you might ask, was the Test Dataset so important? Well, dear reader, it served as a mirror reflecting the very essence of the software, exposing its vulnerabilities and frailties. 