In [1]:
from pprint import pprint
from pyld import jsonld

In [2]:
file = 'cmip.json'
base = 'https://wcrp-cmip.github.io/WCRP-universe/activity/'    

fullpath = base + file

For a simplified (compacted) view of the file we can use the following. 

In [3]:
# for a simplified (compacted view) of the json-ld file

compacted = jsonld.compact(fullpath, fullpath)

# we can also optionally remove the context and the type information
del compacted['@context'],  compacted['type']

pprint(compacted)

{'description': 'CMIP DECK: 1pctCO2, abrupt-4xCO2, amip, esm-piControl, '
                'esm-historical, historical, and piControl experiments',
 'id': 'universal:activity/cmip',
 'name': 'CMIP',
 'url': 'https://gmd.copernicus.org/articles/9/1937/2016/gmd-9-1937-2016.pdf'}


#### View all files in a group
- 'graph' files are constructed to present all files in a repository or directory in one group. 



- For the activity repository we can view the information using as below. 



In [4]:
activity_graph = base+'graph'

jsonld.frame(activity_graph, base+'_context_')

{'@context': ['../_context_',
  {'@base': 'https://wcrp-cmip.github.io/WCRP-universe/activity/',
   '@vocab': 'https://wcrp-cmip.github.io/WCRP-universe/activity/'}],
 '@graph': [{'id': 'universal:activity/aerchemmip',
   'type': 'activity',
   'description': 'AerChemMIP experiments focus on the role of atmospheric chemistry and aerosols in climate, including piClim and hist-piSLCF simulations.',
   'name': 'AerChemMIP',
   'url': 'https://gmd.copernicus.org/articles/10/585/2017/gmd-10-585-2017.pdf'},
  {'id': 'universal:activity/c4mip',
   'type': 'activity',
   'description': 'C4MIP experiments focus on carbon cycle feedbacks and interactions, including 1pctCO2-bgc and esm-flat10-cdr experiments.',
   'name': 'C4MIP',
   'url': 'https://gmd.copernicus.org/articles/9/2853/2016/gmd-9-2853-2016.pdf'},
  {'id': 'universal:activity/cfmip',
   'type': 'activity',
   'description': 'CFMIP experiments focus on cloud feedbacks and their role in climate, including abrupt-0p5CO2 and amip-piForc

#### Adding other files. 

If we wish to fill links from other repositores offline, we are able to combine multiple expanded graph files. 
Expansion produces a verbose version of the file combining it with the context. 

In [5]:
cmip6plus_graph = 'https://wcrp-cmip.github.io/CMIP6Plus_CVs/source/' + 'graph'

collection = [jsonld.expand(group) for group in [activity_graph, cmip6plus_graph]]


#### Framing
To then select which columns or names we wish to have, we can use framing.
For instance:
- we can use `@types`, to select all items of a certain kind. 
- we are select only values with a specific `@id`
- to select only certain fields we can specify those and use `@explicit = True`  (example given below)

In [6]:
frame = {
    # use the activity context as we are interested in this
    "@context": base+'_context_', #use the default context
    
    # we do not want to expand the source activity
    "source_activity": {"@explicit":True},
    
    # only extract activities
    "@type": "activity",
    
    # only extract the RAMIP activity using its ID
    # ** to view ALL activities in the collection, remove this line
    "@id": "universal:activity/ramip",
    
}


framed_result = jsonld.frame(collection, frame)


pprint(framed_result)

{'@context': 'https://wcrp-cmip.github.io/WCRP-universe/activity/_context_',
 'description': 'Regional Aerosol Model Intercomparison Project',
 'id': 'universal:activity/ramip',
 'name': 'RAMIP',
 'source_activity': [{'id': 'cmip6plus:source/cesm2',
                      'type': 'cmip6plus:source/source'},
                     {'id': 'cmip6plus:source/cnrm_esm2_1',
                      'type': 'cmip6plus:source/source'},
                     {'id': 'cmip6plus:source/ec_earth_aerchem',
                      'type': 'cmip6plus:source/source'},
                     {'id': 'cmip6plus:source/mri_esm2_0',
                      'type': 'cmip6plus:source/source'},
                     {'id': 'cmip6plus:source/ukesm1_0_ll',
                      'type': 'cmip6plus:source/source'}],
 'type': 'activity',
 'url': 'https://ramip.ncas.ac.uk'}


- If only want to select the name and url of all source activities we can do the following: 


In [7]:
frame2 = {
    # use the activity context as we are interested in this
    "@context": base+'_context_', #use the default context
    
    # only extract activities
    "@type": "activity",
    
    # select the following fields only
    "name":{},
    "url":{},
    "@explicit":True
    
}


framed_result = jsonld.frame(collection, frame2)


pprint(framed_result)

{'@context': 'https://wcrp-cmip.github.io/WCRP-universe/activity/_context_',
 '@graph': [{'id': 'universal:activity/aerchemmip',
             'name': 'AerChemMIP',
             'type': 'activity',
             'url': 'https://gmd.copernicus.org/articles/10/585/2017/gmd-10-585-2017.pdf'},
            {'id': 'universal:activity/c4mip',
             'name': 'C4MIP',
             'type': 'activity',
             'url': 'https://gmd.copernicus.org/articles/9/2853/2016/gmd-9-2853-2016.pdf'},
            {'id': 'universal:activity/cfmip',
             'name': 'CFMIP',
             'type': 'activity',
             'url': 'https://gmd.copernicus.org/articles/10/359/2017/gmd-10-359-2017.pdf'},
            {'id': 'universal:activity/cmip',
             'name': 'CMIP',
             'type': 'activity',
             'url': 'https://gmd.copernicus.org/articles/9/1937/2016/gmd-9-1937-2016.pdf'},
            {'id': 'universal:activity/damip',
             'name': 'DAMIP',
             'type': 'activity