In [2]:
from mango_mdschema import Schema

In [3]:
import json
import os, os.path

In [44]:
dv_schema = Schema("../doc/metadata/mango2dv-demo-1.0.0-published.json")
dv_schema

<mango_mdschema.schema.Schema at 0x7fb5a47f6410>

In [45]:
print(dv_schema)

[1mManGO Dataset to Dataverse Demo[0m
Metadata annotated with the schema 'mango2dv-demo' (1.0.0) carry the prefix 'mgs'.
This schema contains the following 5 fields:
- [1mtitle[0m, of type 'text' (required).
- [1mauthor[0m, of type 'object' (required).
- [1mdatasetContact[0m, of type 'object' (required).
- [1mdsDescription[0m, of type 'textarea' (required).
- [1msubject[0m, of type 'select'.


In [46]:
dv_schema.print_requirements("author")

[1mType[0m: object.
[1mRequired[0m: True. (2 of its 2 fields are required.)
[1mRepeatable[0m: True.

Composed of the following fields:
[4mmango2dv-demo.author.authorName[0m
[1mType[0m: text.
[1mRequired[0m: True. [1mDefault[0m: None.
[1mRepeatable[0m: False.

[4mmango2dv-demo.author.authorAffiliation[0m
[1mType[0m: text.
[1mRequired[0m: True. [1mDefault[0m: None.
[1mRepeatable[0m: False.


In [6]:
with open("../doc/metadata/template_Demo.json") as f:
    template = json.load(f)
template

{'datasetVersion': {'metadataBlocks': {'citation': {'fields': [{'value': '...Title...',
      'typeClass': 'primitive',
      'multiple': False,
      'typeName': 'title'},
     {'value': [{'authorName': {'value': '...LastName..., ...FirstName...',
         'typeClass': 'primitive',
         'multiple': False,
         'typeName': 'authorName'},
        'authorAffiliation': {'value': '...Affiliation...',
         'typeClass': 'primitive',
         'multiple': False,
         'typeName': 'authorAffiliation'}}],
      'typeClass': 'compound',
      'multiple': False,
      'typeName': 'author'},
     {'value': [{'datasetContactEmail': {'value': '...Email...',
         'typeClass': 'primitive',
         'multiple': False,
         'typeName': 'datasetContactEmail'},
        'datasetContactName': {'value': '...LastName..., ...FirstName...',
         'typeClass': 'primitive',
         'multiple': False,
         'typeName': 'datasetContactName'}}],
      'typeClass': 'compound',
      'mult

In [11]:
# this is an array of fields
fields = template["datasetVersion"]["metadataBlocks"]["citation"]["fields"]

# Simulate getting metadata from iRODS

The code below is to simulate, based on the template metadata, doing the following:

```python
with iRODSSession(irods_env_file=env_file) as session:
     obj = session.data_objects.get('path/to/my/object'
    avus_as_jsonta dv_schemama.extract(ob

)
`
``

In [38]:
def field(f):
    if f["typeClass"] == "compound":
        return (f["typeName"], {k: v for k, v in [field(ff) for ff in f["value"][0].values()]})
    else:
        return (f["typeName"], f["value"])

In [40]:
metadata_dict = {k: v for k, v in [field(f) for f in fields]}

In [50]:
metadata_dict['dsDescription']= "Some text"

In [51]:
metadata_dict

{'title': '...Title...',
 'author': {'authorName': '...LastName..., ...FirstName...',
  'authorAffiliation': '...Affiliation...'},
 'datasetContact': {'datasetContactEmail': '...Email...',
  'datasetContactName': '...LastName..., ...FirstName...'},
 'dsDescription': 'Some text',
 'subject': ['...One-of-the-subjects-at-/doc/metadata/CVs/subjects_Demo.json...']}

In [52]:
dv_schema.validate(metadata_dict)

{'title': '...Title...',
 'author': [{'authorName': '...LastName..., ...FirstName...',
   'authorAffiliation': '...Affiliation...'}],
 'datasetContact': [{'datasetContactEmail': '...Email...',
   'datasetContactName': '...LastName..., ...FirstName...'}],
 'dsDescription': ['Some text'],
 'subject': []}

In [54]:
# if we extracted the metadata from irods
as_avus = dv_schema.to_avus(metadata_dict)

In [55]:
as_avus

[<iRODSMeta None mgs.mango2dv-demo.title ...Title... None>,
 <iRODSMeta None mgs.mango2dv-demo.author.authorName ...LastName..., ...FirstName... 1>,
 <iRODSMeta None mgs.mango2dv-demo.author.authorAffiliation ...Affiliation... 1>,
 <iRODSMeta None mgs.mango2dv-demo.datasetContact.datasetContactEmail ...Email... 1>,
 <iRODSMeta None mgs.mango2dv-demo.datasetContact.datasetContactName ...LastName..., ...FirstName... 1>,
 <iRODSMeta None mgs.mango2dv-demo.dsDescription Some text None>]

# Convert AVUs to template JSON

In [59]:
avus_as_json = dv_schema.from_avus(as_avus)
avus_as_json

{'author': [{'authorAffiliation': '...Affiliation...',
   'authorName': '...LastName..., ...FirstName...'}],
 'datasetContact': [{'datasetContactEmail': '...Email...',
   'datasetContactName': '...LastName..., ...FirstName...'}],
 'dsDescription': ['Some text'],
 'title': '...Title...'}

In [68]:
fields[4]

{'value': ['...One-of-the-subjects-at-/doc/metadata/CVs/subjects_Demo.json...'],
 'typeClass': 'controlledVocabulary',
 'multiple': True,
 'typeName': 'subject'}

In [69]:
def dictconvert(key, value):
    res = {"value": value, "typeName": key, "multiple": False}
    if type(value) == str:
        res["typeClass"] = "primitive"
    elif type(value[0]) == dict:
        res["typeClass"] = "compound"
        res["value"] = [{k: dictconvert(k, v) for k, v in value[0].items()}]
    else:
        res["typeClass"] = "controlledVocabulary"
        res["multiple"] = True
    return res

In [71]:
new_fields = [dictconvert(key, value) for key, value in avus_as_json.items()]

In [73]:
template["datasetVersion"]["metadataBlocks"]["citation"]["fields"] = new_fields
with open("../doc/metadata/filled_in_template_Demo.json", "w") as f:
    json.dump(template, f)