# Provena Client Library Workflow Guide and Example. 

This notebook contains guidance and examples on how to use the Provena Client Library. 

The client library is an user friendly interface to interact with the various API's of Provena (Registry, Prov, Datastore, etc.) through code and is currently compatiable with the Python programming language.

### Client Configuration and Initialisation

In [9]:
# Import initial modules needed.
from provenaclient import ProvenaClient, Config
from provenaclient.auth import DeviceFlow
from provenaclient.auth.implementations import OfflineFlow
from pprint import pprint


Instantiate the client library by providing the domain your Provena instance is hosted on and the name of your Keycloak realm. 

In [6]:
# Instantiate the client.

config = Config(
    domain="dev.rrap-is.com",
    realm_name="rrap"
)

auth = DeviceFlow(keycloak_endpoint=config.keycloak_endpoint,
                    client_id="client-tools")

client = ProvenaClient(config=config, auth=auth)

## Querying Datastore API.

We will take a look at querying and interacting with the Datastore API exploring common operations of fetching dataset, minting dataset and fetching all datasets in various formats (paginated, all).

In [11]:

dataset = await client.datastore.fetch_dataset(id = "10378.1/1908974")

print(dataset) # Fetched dataset pythonic object.
print()
print("Dataset Query Details:", dataset.status.details) # Accessing fetched dataset query details
print()
print("Dataset Display Name:", dataset.item.display_name) # Accessing fetched dataset name

status=Status(success=True, details="Successfully fetched data for handle '10378.1/1908974'") item=ItemDataset(display_name='TEst', user_metadata=None, collection_format=CollectionFormat(associations=CollectionFormatAssociations(organisation_id='10378.1/1877551', data_custodian_id=None, point_of_contact=None), approvals=CollectionFormatApprovals(ethics_registration=DatasetEthicsRegistrationCheck(relevant=False, obtained=False), ethics_access=DatasetEthicsAccessCheck(relevant=False, obtained=False), indigenous_knowledge=IndigenousKnowledgeCheck(relevant=False, obtained=False), export_controls=ExportControls(relevant=False, obtained=False)), dataset_info=CollectionFormatDatasetInfo(name='TEst', description='TEst', access_info=AccessInfo(reposited=False, uri='http://google.com', description='test'), publisher_id='10378.1/1877551', created_date=datetime.date(2024, 6, 5), published_date=datetime.date(2024, 6, 21), license=AnyHttpUrl('https://gbrrestoration.github.io/rrap-mds-knowledge-hub/i

In [None]:
from ProvenaInterfaces.RegistryModels import *

dataset_to_create = CollectionFormat(
        associations=CollectionFormatAssociations(
        organisation_id="10378.1/1893860",
        data_custodian_id="10378.1/1893843",
        point_of_contact= None
        ),
        approvals=CollectionFormatApprovals(
            ethics_registration = DatasetEthicsRegistrationCheck(relevant=False, obtained=False),
            ethics_access=DatasetEthicsAccessCheck(relevant= False, obtained= False),
            indigenous_knowledge=IndigenousKnowledgeCheck(relevant=False, obtained= False),
            export_controls=ExportControls(relevant=False, obtained=False)
        ),
        dataset_info=CollectionFormatDatasetInfo(
            name="Parth testing",
            description="testing dataset",
            access_info=AccessInfo(reposited=True, uri=None, description=None),
            publisher_id="10378.1/1893860",
            published_date=date.today(),
            license = "https://www.google.com", #type:ignore
            created_date=date.today(),
            purpose= None,
            rights_holder=None,
            usage_limitations=None,
            preferred_citation=None,
            formats = None,
            keywords= None,
            user_metadata= None,
            version = None
        )
    )

created_dataset = await client.datastore.mint_dataset(dataset_mint_info=dataset_to_create)

print("Created Dataset handle is:", created_dataset.handle)
print("Created Dataset reqeuest details:", created_dataset.status.details)

In [24]:
from ProvenaInterfaces.RegistryAPI import *

# Sort criteria to receive datasets.
sort_criteria = NoFilterSubtypeListRequest(
            sort_by=SortOptions(sort_type=SortType.DISPLAY_NAME, ascending=False, begins_with=None), 
            pagination_key=None, 
            page_size=10
        )


list_datasets = await client.datastore.list_datasets(list_dataset_request=sort_criteria)

for i in list_datasets:
    print(i)


('status', Status(success=True, details='Successfully listed items.'))
('items', [ItemDataset(display_name='WaveEcologyModel Output Grapes', user_metadata=None, collection_format=CollectionFormat(associations=CollectionFormatAssociations(organisation_id='10378.1/1875957', data_custodian_id=None, point_of_contact=None), approvals=CollectionFormatApprovals(ethics_registration=DatasetEthicsRegistrationCheck(relevant=False, obtained=False), ethics_access=DatasetEthicsAccessCheck(relevant=False, obtained=False), indigenous_knowledge=IndigenousKnowledgeCheck(relevant=False, obtained=False), export_controls=ExportControls(relevant=False, obtained=False)), dataset_info=CollectionFormatDatasetInfo(name='WaveEcologyModel Output Grapes', description='An opus of oceanic orchestration, where data motifs crescendo into a symphony of ecological enlightenment, harmonizing the cacophony of coral complexity into a serenade of predictive precision.', access_info=AccessInfo(reposited=True, uri=None, descr

In [26]:
# Getting all datasets in datastore with specified sort criteria.
all_datasets = await client.datastore.list_all_datasets(sort_criteria=sort_criteria)
pprint(all_datasets)

[ItemDataset(display_name='TEst', user_metadata=None, collection_format=CollectionFormat(associations=CollectionFormatAssociations(organisation_id='10378.1/1877551', data_custodian_id=None, point_of_contact=None), approvals=CollectionFormatApprovals(ethics_registration=DatasetEthicsRegistrationCheck(relevant=False, obtained=False), ethics_access=DatasetEthicsAccessCheck(relevant=False, obtained=False), indigenous_knowledge=IndigenousKnowledgeCheck(relevant=False, obtained=False), export_controls=ExportControls(relevant=False, obtained=False)), dataset_info=CollectionFormatDatasetInfo(name='TEst', description='TEst', access_info=AccessInfo(reposited=False, uri='http://google.com', description='test'), publisher_id='10378.1/1877551', created_date=datetime.date(2024, 6, 5), published_date=datetime.date(2024, 6, 21), license=AnyHttpUrl('https://gbrrestoration.github.io/rrap-mds-knowledge-hub/information-system/licenses.html#copyright-all-rights-reserved-', ), purpose=None, rights_holder=No

In [28]:
# Get a total number of datasets (specified limit) with provided sort criteria. 
async for dataset in client.datastore.for_all_datasets(list_dataset_request=sort_criteria, total_limit=30):
    pprint(dataset)


ItemDataset(display_name='Parth testing', user_metadata=None, collection_format=CollectionFormat(associations=CollectionFormatAssociations(organisation_id='10378.1/1893860', data_custodian_id='10378.1/1893843', point_of_contact=None), approvals=CollectionFormatApprovals(ethics_registration=DatasetEthicsRegistrationCheck(relevant=False, obtained=False), ethics_access=DatasetEthicsAccessCheck(relevant=False, obtained=False), indigenous_knowledge=IndigenousKnowledgeCheck(relevant=False, obtained=False), export_controls=ExportControls(relevant=False, obtained=False)), dataset_info=CollectionFormatDatasetInfo(name='Parth testing', description='testing dataset', access_info=AccessInfo(reposited=True, uri=None, description=None), publisher_id='10378.1/1893860', created_date=datetime.date(2024, 5, 28), published_date=datetime.date(2024, 5, 28), license=AnyHttpUrl('https://www.google.com', ), purpose=None, rights_holder=None, usage_limitations=None, preferred_citation=None, spatial_info=None, t

## Querying Provenance API.

#### We will now take a look at exploring some of the common operations of the PROV-API with existing and valid entities. 

Exploring Lineage

In [20]:
# Upstream

print("Exploring upstream query")

upstream_result = await client.prov_api.explore_upstream(starting_id="10378.1/1904964")
pprint(upstream_result)
print()
pprint(upstream_result.graph.get('nodes'))

print()

print("Exploring downstream query")

downstream_result = await client.prov_api.explore_downstream(starting_id="10378.1/1904961")
pprint(downstream_result)
print()
pprint(downstream_result.graph.get('nodes'))


Exploring upstream query
LineageResponse(status=Status(success=True, details='Made lineage query (with depth 3) to neo4j backend.'), record_count=3, graph={'directed': True, 'multigraph': False, 'graph': {}, 'nodes': [{'item_category': 'ENTITY', 'item_subtype': 'DATASET', 'id': '10378.1/1904964'}, {'item_category': 'ACTIVITY', 'item_subtype': 'CREATE', 'id': '10378.1/1904975'}, {'item_category': 'AGENT', 'item_subtype': 'PERSON', 'id': '10378.1/1893843'}], 'links': [{'type': 'wasGeneratedBy', 'source': '10378.1/1904964', 'target': '10378.1/1904975'}, {'type': 'wasAttributedTo', 'source': '10378.1/1904964', 'target': '10378.1/1893843'}, {'type': 'wasAssociatedWith', 'source': '10378.1/1904975', 'target': '10378.1/1893843'}]})

[{'id': '10378.1/1904964',
  'item_category': 'ENTITY',
  'item_subtype': 'DATASET'},
 {'id': '10378.1/1904975',
  'item_category': 'ACTIVITY',
  'item_subtype': 'CREATE'},
 {'id': '10378.1/1893843', 'item_category': 'AGENT', 'item_subtype': 'PERSON'}]

Exploring 

In [None]:
# Contributing and Effected Datasets

contributing_datasets = 


effected_datasets = 