In [1]:
# Import the library
from cmipld import *

In [11]:
# we can load the latest cmpi6plus and mip table files from github
latest = await CMIPFileUtils.load_latest(CMIPFileUtils)

Loading latest CMIP6Plus and MIP-CMOR-Tables files...


In [12]:
# or we can load a file from the file store
graph_data = await CMIPFileUtils.read_file_fs('compiled/graph_data.json')

In [20]:
# define our frame (what we want to search, or use one of the presets. 
# here I select all the source_id's using the @type selector 
# from the organisation-id field I select all institutions and consortiums
# finally I ask only for the cmip_acronym field

frame = {
    "@type": [
        "mip:source-id"
    ],
    "source-id:organisation-id": {
        "@type": [
            "mip:institution",
            "mip:consortium"
        ],
        "@explicit": True,
        "consortium:cmip_acronym": "",
        "institution:cmip_acronym": "",
        "@explicit": True
        },
    "@explicit": True
    }

In [21]:
# get the result
result = Frame(latest, frame)

In [23]:
# preview it with print
result.print

[{'@id': 'cmip6plus:source/id/giss-e2-1-g',
  '@type': 'mip:source-id',
  'source-id:organisation-id': {'@id': 'mip-cmor-tables:organisations/institutions/nasa-giss',
   '@type': 'mip:institution',
   'consortium:cmip_acronym': None,
   'institution:cmip_acronym': 'NASA-GISS'}},
 {'@id': 'cmip6plus:source/id/hadgem3-gc31-ll',
  '@type': 'mip:source-id',
  'source-id:organisation-id': {'@id': 'mip-cmor-tables:organisations/institutions/mohc',
   '@type': 'mip:institution',
   'consortium:cmip_acronym': None,
   'institution:cmip_acronym': 'MOHC'}},
 {'@id': 'cmip6plus:source/id/miroc6',
  '@type': 'mip:source-id',
  'source-id:organisation-id': {'@id': 'mip-cmor-tables:organisations/consortia/miroc',
   '@type': 'mip:consortium',
   'consortium:cmip_acronym': 'MIROC',
   'institution:cmip_acronym': None}},
 {'@id': 'cmip6plus:source/id/mpi-esm1-2-hr',
  '@type': 'mip:source-id',
  'source-id:organisation-id': {'@id': 'mip-cmor-tables:organisations/institutions/mpi-m',
   '@type': 'mip:i

In [26]:
# there are several ways to clean this up 
# we can iteratively apply cleaning functions 

# e.g. remove all the null values and the prefixes tags
result.start.rmnull.untag.end.print

[{'@id': 'cmip6plus:source/id/giss-e2-1-g',
  '@type': 'mip:source-id',
  'organisation-id': {'@id': 'mip-cmor-tables:organisations/institutions/nasa-giss',
                      '@type': 'mip:institution',
                      'cmip_acronym': 'NASA-GISS'}},
 {'@id': 'cmip6plus:source/id/hadgem3-gc31-ll',
  '@type': 'mip:source-id',
  'organisation-id': {'@id': 'mip-cmor-tables:organisations/institutions/mohc',
                      '@type': 'mip:institution',
                      'cmip_acronym': 'MOHC'}},
 {'@id': 'cmip6plus:source/id/miroc6',
  '@type': 'mip:source-id',
  'organisation-id': {'@id': 'mip-cmor-tables:organisations/consortia/miroc',
                      '@type': 'mip:consortium',
                      'cmip_acronym': 'MIROC'}},
 {'@id': 'cmip6plus:source/id/mpi-esm1-2-hr',
  '@type': 'mip:source-id',
  'organisation-id': {'@id': 'mip-cmor-tables:organisations/institutions/mpi-m',
                      '@type': 'mip:institution',
                      'cmip_acronym': 

In [27]:
# or we can use the clean function to apply all the cleaning functions
# this has the default process = ['rmld','rmnull','untag','flatten'] arguments 
# which can be changed to suit the needs of the user
result.clean()

In [29]:
# finally to access the cleaned result we can use the .data or .json attribute
result.data

[{'organisation-id': 'NASA-GISS'},
 {'organisation-id': 'MOHC'},
 {'organisation-id': 'MIROC'},
 {'organisation-id': 'MPI-M'},
 {'organisation-id': 'MPI-M'},
 {'organisation-id': 'NCC'}]

In [35]:
# now this may be manipulated further, or saved to a file
# e.g.

[value for row in result.json for key,value in row.items()]

['NASA-GISS', 'MOHC', 'MIROC', 'MPI-M', 'MPI-M', 'NCC']