In [None]:
import sys, os 
import xml.etree.ElementTree as ET

In [None]:
def get_xml_childs(elt, tag='field', groups=['context', 'field_group',
        'field_definition', 'axis_definition','axis', 'domain_definition',
        'domain', 'grid_definition', 'grid' , 'interpolate_axis'  ]) :
        """ 
        Returns a list of elements in tree ELT 
        which have tag TAG, by digging in sub-elements 
        named as in GROUPS 
        """
        if elt.tag in groups :
            rep=[]
            for child in elt : rep.extend(get_xml_childs(child,tag))
            #print "REP=",rep
            return rep
        elif elt.tag==tag : return [elt]
        else :
            #print 'Syntax error : tag %s not allowed'%elt.tag
            # Case of an unkown tag : don't dig in
            return []

In [None]:
def read_defs(filename, tag='field', attrib=None, printout=False) :
    """ 
    Returns a dict of obejcts tagged TAG in FILENAME, which 
    - keys are ids
    - values are corresponding ET elements if 
      attrib is None, otherwise elt attribute ATTRIB
    Returns None if filename does not exist
    """
    #    
    rep=dict()
    if printout : print "processing file %s :"%filename,
    if os.path.exists(filename) :
        if printout : print "OK",filename
        root = ET.parse(filename).getroot()
        #print "ROOT=",root
        defs=get_xml_childs(root,tag) 
        #print "DEFS=",defs
        if defs :
            for field in defs :
                if printout : print ".",
                if attrib is None: 
                    rep[field.attrib['id']]=field
                else :
                    rep[field.attrib['id']]=field.attrib[attrib]
            if printout : print
            return rep
    else :
        if printout : print "No file "
        return None

In [None]:
test_file="output_sample/ping_arpsfx.xml"

In [None]:
test=read_defs(test_file,tag='field',attrib="field_ref",printout=False)
#test=read_defs(test_file,tag='field',attrib=None,printout=False)
print test

In [5]:
import dreq
dq = dreq.loadDreq()
print dq.version

01.00.05


In [6]:
def analyze_ambiguous_MIPvarnames(dq):
    """
    Return the list of MIP varnames whose list of CMORvars for a single realm 
    show distinct values for the area part of the cell_methods
    """
    # Compute a dict which keys are MIP varnames and values = list 
    # of CMORvars items for the varname
    d=dict()
    for v in dq.coll['var'].items :
        if v.label not in d : d[v.label]=[]
        refs=dq.inx.iref_by_sect[v.uid].a['CMORvar']
        for r in refs :
            d[v.label].append(dq.inx.uid[r])
            #if v.label=="prra" : print "one prra"
    #print "d[prra]=",d["prra"]
    # Replace dic values by dic of cell_methods
    for vlabel in d:
        if len(d[vlabel]) > 1 :
            cvl=d[vlabel]
            d[vlabel]=dict()
            for cv in cvl: 
                st=dq.inx.uid[cv.stid]
                try :
                    cm=dq.inx.uid[st.cmid].cell_methods
                     # mpmoine_a_verifier: certaines de ces chaines n existent pas dans les cell_methods de la DR
                    cm1=cm.replace("time: mean","").replace("time: point","").\
                        replace(" within years  over years","") .\
                        replace('time: maximum within days  over days','').\
                        replace('time: minimum within days  over days','').\
                        replace('time: minimum','').\
                        replace('time: maximum','').\
                        replace('with samples ','')
                    realm=cv.modeling_realm
                    if realm=="ocean" or realm=="ocnBgchem" :
                        cm1=cm1.replace("area: mean where sea ","")
                    #if realm=='land':
                    #    cm1=cm1.replace('area: mean where land ','')
                    if True or "area:" in cm1 :
                        cm2=cm1 #.replace("area:","")
                        if realm not in d[vlabel]:
                            d[vlabel][realm] =[]
                        if cm2 not in d[vlabel][realm] :
                            d[vlabel][realm].append(cm2)
                        #if vlabel=="prra" : 
                        #    print "cm2=",cm2, d[vlabel]
                except : 
                    pass
                    #print "No cell method for %s %s"%(st.label,cv.label)
        else : d[vlabel]=None
    #for l in d : print l,d[l]
    #print "d[prra]=",d["prra"]
    #sd=d.keys() ; sd.sort()
    #for var in sd :
    #    if d[var] and any( [ len(l) > 1 for l in d[var].values() ]) :
    #        print "%20s %s"%(var,`d[var]`)
    #        pass
    # Analyze ambiguous cases regarding area part of the cell_method
    ambiguous=[]
    for vlabel in d:
        if d[vlabel]:
            #print vlabel,d[vlabel]
            for realm in d[vlabel] :
                if len(d[vlabel][realm])>1 and \
                   any([ "area" in cm for cm in d[vlabel][realm] ]):
                    ambiguous.append(( vlabel,(realm,d[vlabel][realm])))
    return ambiguous

In [10]:
ambig=analyze_ambiguous_MIPvarnames(dq)
print ambig

[('sci', ('atmos', ['area: point ', 'area: '])), ('agesno', ('landIce land', ['area: mean where land  (weighted by snow mass)', 'area: mean where land '])), ('rsutcs', ('atmos', ['area: ', 'area: point ', 'area: mean  within days  over days'])), ('snm', ('landIce land', ['area: mean where land ', 'area:  where ice_sheet'])), ('vas', ('atmos', ['', 'area: point ', 'area: '])), ('tslsi', ('land', ['area: ', ''])), ('rsdscs', ('atmos', ['area: ', 'area: point '])), ('orog', ('land', ['area: mean', 'area:  where ice_sheet', 'area: '])), ('sftflf', ('landIce', ['', 'area:  where floating_ice_shelf (comment: mask=sftflf)', 'area: mean'])), ('tauv', ('atmos', ['area: point ', 'area: '])), ('tasmin', ('atmos', ['area: mean ', ''])), ('sftgif', ('land', ['area: mean', 'area:  where ice_sheet', 'area: '])), ('va', ('aerosol', ['longitude: mean ', 'area: '])), ('n2o', ('aerosol', ['area: ', 'longitude: mean '])), ('zg500', ('aerosol', ['area: mean', 'area: ', ''])), ('sbl', ('landIce', ['area: me

In [1]:
def make_source_string(sources,source_id):
    """ 
    From the dic of sources in CMIP6-CV, Creates the string representation of a 
    given model (source_id) according to doc on global_file_attributes, so :

    <modified source_id> (<year>): atmosphere: <model_name> (<technical_name>, <resolution_and_levels>); ocean: <model_name> (<technical_name>, <resolution_and_levels>); sea_ice: <model_name> (<technical_name>); land: <model_name> (<technical_name>); aerosol: <model_name> (<technical_name>); atmospheric_chemistry <model_name> (<technical_name>); ocean_biogeochemistry <model_name> (<technical_name>); land_ice <model_name> (<technical_name>);

    """
    source=sources[source_id] 
    components=source['model_component']
    rep=rep+"("+source['release_year']+")"
    for realm in ["atmos","ocean","seaIce","land","aerosol","atmosChem","ocnBgchem"]:
        component=components[realm]
        description=component['description']
        rep=rep+"\n"+realm+": "+description
    return rep

In [2]:
cvspath="/Users/moine/Codes/MyDevel_Codes/CMIP6_DATA_SUITE/CMIP6_CVs/"
with open(cvspath+"CMIP6_source_id.json","r") as json_fp :
    sources=json.loads(json_fp.read())['source_id']
    source=make_source_string(sources,source_id)
    print ">>> DBG >>> OK, SOURCE READ IN JSON FILE"

NameError: name 'json' is not defined