In [1]:
from datetime import datetime
datetime.now()

datetime.datetime(2025, 9, 16, 13, 4, 49, 864332)

# ESGVOC Documentation 

### https://esgf.github.io/esgf-vocab/

```mermaid
classDiagram
    class Universe {
        - DataDescriptors (DDs)
    }

    class DataDescriptor {
        - Contains Terms
    }

    class Term {
        - Metadata
        - Linked to PydanticModel
    }

    class PydanticModel {
        - Schema definition
        - Validation
    }

    class Cordex {
        - Collections
    }

    class Collection {
        - Contains Terms
        - Terms reference Universe Terms
    }

    class esgvoc {
        - Query interface
        - Accesses Universe and Cordex
        - Returns instantiated Pydantic models
    }

    Universe --> DataDescriptor : contains
    DataDescriptor --> Term : contains
    Term --> PydanticModel : modeled by
    Cordex --> Collection : contains
    Collection --> Term : contains (links to Universe Terms)
    esgvoc --> Universe : queries
    esgvoc --> Cordex : queries
    esgvoc --> PydanticModel : returns


```


# Basic use of esgvoc

## Universe

In [2]:
import esgvoc.api as ev

In [3]:
ev.get_all_data_descriptors_in_universe()


['activity',
 'area_label',
 'branded_suffix',
 'branded_variable',
 'citation_url',
 'comment',
 'consortium',
 'contact',
 'conventions',
 'creation_date',
 'data_specs_version',
 'directory_date',
 'experiment',
 'forcing_index',
 'frequency',
 'further_info_url',
 'grid',
 'horizontal_label',
 'initialisation_index',
 'institution',
 'known_branded_variable',
 'license',
 'member_id',
 'mip_era',
 'model_component',
 'obs_type',
 'organisation',
 'physic_index',
 'product',
 'publication_status',
 'realisation_index',
 'realm',
 'regex',
 'region',
 'resolution',
 'source',
 'source_type',
 'sub_experiment',
 'table',
 'temporal_label',
 'time_range',
 'title',
 'tracking_id',
 'variable',
 'variant_label',
 'vertical_label',
 'archive']

In [4]:
ev.get_all_terms_in_data_descriptor("mip_era") # Return the list of MipEra pydantic class

[MipEra(id='cmip5', type='mip_era', drs_name='CMIP5', start=2006, end=2020, name='CMIP5', url='https://wcrp-cmip.org/CMIP5', @context='000_context.jsonld'),
 MipEra(id='cmip6', type='mip_era', drs_name='CMIP6', start=2013, end=2024, name='CMIP6', url='https://wcrp-cmip.org/CMIP6', @context='000_context.jsonld'),
 MipEra(id='cmip6plus', type='mip_era', drs_name='CMIP6Plus', start=2023, end=2999, name='CMIP6Plus', url='https://wcrp-cmip.org/CMIP6Plus', @context='000_context.jsonld'),
 MipEra(id='cmip7', type='mip_era', drs_name='CMIP7', start=2025, end=2999, name='CMIP7', url='https://wcrp-cmip.org/CMIP7', @context='000_context.jsonld'),
 MipEra(id='cordex_cmip6', type='mip_era', drs_name='CORDEX', start=2006, end=2020, name='CORDEX', url='https://wcrp-cmip.org/CORDEX', @context='000_context.jsonld')]

In [5]:
ev.get_term_in_data_descriptor("mip_era","cordex_cmip6")

MipEra(id='cordex_cmip6', type='mip_era', drs_name='CORDEX', start=2006, end=2020, name='CORDEX', url='https://wcrp-cmip.org/CORDEX', @context='000_context.jsonld')

## Cordex

In [6]:
ev.get_all_projects()

['cmip6', 'cmip6plus', 'cmip7', 'input4mip', 'obs4ref', 'cordex-cmip6']

In [7]:
ev.get_all_collections_in_project("cordex-cmip6")

['activity_id',
 'comment',
 'contact',
 'conventions',
 'creation_date',
 'domain_id',
 'driving_experiment_id',
 'driving_institution_id',
 'driving_source_id',
 'driving_variant_label',
 'frequency',
 'grid',
 'history',
 'institution_id',
 'license',
 'mip_era',
 'product',
 'project_id',
 'references',
 'source_id',
 'source_type',
 'time_range',
 'tracking_id',
 'variable_id',
 'version',
 'version_realization',
 'version_realization_info']

In [8]:
ev.get_all_terms_in_collection("cordex-cmip6","domain_id")[:5] # to only show the 5 first

[Region(id='afr-12', type='region', drs_name='AFR-12', description='Africa', @context='000_context.jsonld'),
 Region(id='afr-25', type='region', drs_name='AFR-25', description='Africa', @context='000_context.jsonld'),
 Region(id='afr-50', type='region', drs_name='AFR-50', description='Africa', @context='000_context.jsonld'),
 Region(id='afr-50i', type='region', drs_name='AFR-50i', description='Africa', @context='000_context.jsonld'),
 Region(id='ant-12', type='region', drs_name='ANT-12', description='Antarctica', @context='000_context.jsonld')]

In [9]:
ev.get_term_in_collection("cordex-cmip6","domain_id","afr-12") # return a Region pydantic class

Region(id='afr-12', type='region', drs_name='AFR-12', description='Africa', @context='000_context.jsonld')

# DRS Validation / Generation with esgvoc 

### if the inputs is already known like in global attribute of the netcdf file: 

In [10]:
from esgvoc.apps.drs.generator import DrsGenerator
dg = DrsGenerator("cordex-cmip6")
bag = ["CORDEX-CMIP6" ,"DD","AFR-25","AUTH","4AOP-v1-5","ssp370","r1i1p1f1","ALARO1-SFX","v1-r1","mon","tas","v20240319"] # i have used random element for those 
dg.generate_directory_from_bag_of_terms(bag)



## we could also validate this directory with the validator

In [11]:
from esgvoc.apps.drs.validator import DrsValidator
dv = DrsValidator("cordex-cmip6")
input_dir = 'DD/AFR-25/AUTH/4AOP-v1-5/ssp370/r1i1p1f1/ALARO1-SFX/v1-r1/mon/tas/v20240319' # took from the generated one
dv.validate_directory(input_dir)

