In [None]:
import registry
import datastore
import mdsisclienttools.auth.TokenManager as ProvenaAuth
from utils import pprint_json
import json
PROVENA_DOMAIN = "dev.rrap-is.com"

# Edit this to point to the Keycloak instance for your Provena instance
kc_endpoint = "https://auth.dev.rrap-is.com/auth/realms/rrap"

stage = "DEV"
registry_endpoint = "https://registry-api.{}".format(PROVENA_DOMAIN)
provenance_endpoint = "https://prov-api.{}".format(PROVENA_DOMAIN)
data_store_endpoint = "https://data-api.{}".format(PROVENA_DOMAIN)
job_endpoint =  "https://job-api.{}".format(PROVENA_DOMAIN)

# sets up auth connections - could potentially open browser window if not signed
# in recently - caches in .tokens.json - ensure this is included in gitignore
provena_auth = ProvenaAuth.DeviceFlowManager(
    stage=stage,
    keycloak_endpoint=kc_endpoint
)

# expose the get auth function which is used for provena methods 
get_auth = provena_auth.get_auth

## Updating Dataset Metadata (Adding missed missed, changing existing fields)
Updating metadata facilitates the addition of new fields, as well as overwriting existing fields. The process is implemented as a complete override, meaning you will need to provide the update endpoint with a complete reflection of the final desired metadata after the update process. This necessitates the acquisition of the current existing metadata. To this end, the following sections will demonstrate how to retrieve the existing metadata from the source of truth (Data Store), and how to update it, then post this update to the Data Store via the API.

#### Fetch Existing Metadata using the registry
incase others have edited the metadata of a dataset, its best practise to fetch the metadata from the registry (source of truth), and update it with the new fields. This ensures your update process is not overwriting any changes made by other users in fields that you actually desire to leave unchanged.

In [None]:
dataset_id = "10378.1/1764358"
current_metadata = registry.fetch_dataset_metadata(registry_endpoint=registry_endpoint, id=dataset_id, auth=get_auth())
pprint_json(current_metadata)


In [None]:
updated_metadata = current_metadata # "copy"
updated_metadata['dataset_info']["description"] = "This is an updated description for test purposes!"
updated_metadata['dataset_info']['spatial_info']['resolution'] = 0.0013
updated_metadata['associations']["point_of_contact"] = "Peter Baker"
pprint_json(updated_metadata)


#### Post update to Datastore API
Once you have the metadata, you can update it with the new fields, and post it to the Datastore API. The API will then update the metadata in the Datastore.


In [None]:
reason = "Testing API update functionality."
update_response = datastore.update_dataset(datastore_endpoint=data_store_endpoint, updated_metadata=updated_metadata, dataset_id=dataset_id, reason=reason, auth=get_auth())
pprint_json(update_response)

#### Alternative Method to editing the metadata payload for update
1. Fetch the current metadata
2. Save to file and edit this file
3. Read in the file and post to API.

This method prevents the need to edit metadata in code which can be confusing with the key-value pairs and correct location. Nevertheless, the API will reject incorrectly formatted metadata payloads.

In [None]:
current_metadata = registry.fetch_dataset_metadata(registry_endpoint=registry_endpoint, id=dataset_id, auth=get_auth())
# write to file
updated_metadata_file_path = "configs/updated_metadata.json"
with open(updated_metadata_file_path, "w") as f:
    json.dump(current_metadata, f, indent=2)

In [None]:
# After editing the file, read it back in and send to the update endpoint
with open(updated_metadata_file_path) as f:
    updated_metadata = json.load(f)

update_response = datastore.update_dataset(datastore_endpoint=data_store_endpoint, updated_metadata=updated_metadata, dataset_id=dataset_id, reason=reason, auth=get_auth())
pprint_json(update_response)