# Checking local data vs ESGF published data (using a data_versions dict and its ancillary dicts)

In [1]:
from IPython.core.display import display, HTML, Image
display(HTML("<style>.container { width:100% !important; }</style>"))
import requests  # use pip or conda to install it if needed
import json
from climaf import period

python => 2.7.15 | packaged by conda-forge | (default, Jul  2 2019, 00:39:44) 
[GCC 7.3.0]
---
Required softwares to run CliMAF => you are using the following versions/installations:
ncl 6.6.2 => /modfs/ipslfs/dods/jservon/miniconda/envs/analyse_env_2.7/bin/ncl
cdo 1.9.6 => /opt/nco/1.9/bin/cdo
nco (ncks) "4.3.6" => /usr/bin/ncks
ncdump fichier => /prodigfs/ipslfs/dods/jservon/miniconda/envs/cesmep_env/bin/ncdump
Check stamping requirements
nco (ncatted) found -> /usr/bin/ncatted
convert found -> /usr/bin/convert
pdftk found -> /usr/bin/pdftk
exiv2 found -> /home/ssenesi/climaf_installs/climaf_running/bin/exiv2
---
CliMAF install => /home/ssenesi/climaf_installs/climaf_running


CliMAF version = 2.0.0
Cache directory set to : /data/ssenesi/climafcache (use $CLIMAF_CACHE if set) 
Cache directory for remote data set to : /data/ssenesi/climafcache/remote_data (use $CLIMAF_REMOTE_CACHE if set) 
No reachable lmdz_SE library
Available macros read from ~/.climaf.macros are : []


In [2]:
def jrequest(q,node="esgf-data.dkrz.de") :  
    #"esgf-node.ipsl.upmc.fr"    #"esgf-data.dkrz.de"    #"esgf-node.jpl.nasa.gov"
    form="&format=application%2Fsolr%2Bjson"
    reqs="http://%s/esg-search/search?%s%s"%(node,q,form)
    #print reqs
    return requests.get(reqs).json()

## List all models which run an experiment

In [3]:
def esgf_models_for_experiment(variable,table,experiment="piControl"):
    rep=jrequest('distrib=true&experiment_id=%s&variable=%s&table_id=%s&limit=10000&fields=source_id'%(experiment,variable,table))
    docs=rep['response']['docs']
    models=set()
    for e in docs :
        models.add(e['source_id'][0])
    return sorted(list(models))

## Returns published_period(s), for a model, an experiment and a variable

In [4]:
def published_period(model,experiment,variable,table) :
    dic={'distrib'     : 'true',
         'limit'       : '10000',
         'type'        : 'File',
         'fields'      : 'title,instance_id',
         'experiment_id':experiment,
         'source_id'   : model,
         'variable'    : variable,
         'table_id'    : table,
         'latest'      : 'true',
         #'id'     : "*"+version+"*",
         }
    #
    # Form request string
    reqs=""
    for k in dic : reqs+="%s=%s&"%(k,dic[k])
    reqs=reqs[0:-1]
    #
    rep=jrequest(reqs)
    #return rep
    docs=rep['response']['docs']
    if len(docs)==0 : 
        return ([])
    #None
    #
    # Analyze response to gather periods among all files and shards
    periods=dict()
    for e in docs :
        #print "%-30s %s"%(e['data_node'],e['title'])
        instance_id=e['instance_id'].encode('ascii')
        realization=instance_id.split(".")[5]
        version=instance_id.split(".")[9]
        #
        filename=e['title'].encode('ascii')
        #if "2569" in filename : print "got one:",filename,instance_id
        file_period=filename.split("_")[-1].replace(".nc","")
        if (realization,version) not in periods :
            periods[(realization,version)]=set()
        periods[(realization,version)].add(file_period)
    for pair in periods :
        ps=list(periods[pair])
        ps.sort()
        if len(ps) > 900 : ps=ps[0:900] # Isseu with some models ...
        periods[pair]=period.merge_periods([ period.init_period(p) for p in ps ],handle_360_days_year=True)
    return periods

## Check length of published periods for a variable, an experiment and all models (for all versions/grid/)

In [5]:
def experiment_length_check(variable,table,experiment="piControl") :
    #print variable,table
    ok = []
    nok=[]
    models_ok=dict()
    models_nok=dict()
    #
    if experiment=="piControl" :
        length=500
    elif experiment=="historical":
        length=165
    elif experiment[0:3] =="ssp":
        length=86
    #
    for model in esgf_models_for_experiment(variable,table,experiment) :
        periods=published_period(model,experiment,variable,table)
        for pair in periods :
            for aperiod in periods[pair]:
                if aperiod.end.year - aperiod.start.year +1 < length :
                    nok.append((model,pair,aperiod))
                    if model not in models_nok :
                        models_nok[model]=[]
                    models_nok[model].append([pair,str(aperiod)])
                else :
                    ok.append((model,pair,aperiod))
                    if model not in models_ok :
                        models_ok[model]=[]
                    models_ok[model].append([pair,str(aperiod)])
    return models_ok,models_nok

## Check, for one experiment, if some published data are either uncomplete on ESGF, or missing in a data_versions directory (built from file system content on the local machine)

In [6]:
def check_versions_dict(experiment,versions_tag,var_tables=None): 
    print 90*"_"
    print
    print "Data published for %s vs data on local machine according to data_versions dict %s"%(experiment,versions_tag)
    print 90*"_"
    oks=dict()
    noks=dict()
    models_not_recorded=dict()
    models_with_holes=dict()
    with open("Data_versions_selection_%s.json"%versions_tag,"r") as f :
        versions_dic=json.load(f)
    with open("Data_versions_selection_%s_holes.json"%versions_tag,"r") as f :
        holes=json.load(f)
    with open("Data_versions_selection_%s_resolve.json"%versions_tag,"r") as f :
        coverage_issues=json.load(f)
    for variable in versions_dic[experiment]:
        for table in versions_dic[experiment][variable] :
            pair=variable+","+table
            if var_tables is None or (variable,table) in var_tables :
                oks[pair],noks[pair]=experiment_length_check(variable,table,experiment)
                print "\n\n",experiment, variable, table," : ", len(oks[pair])," models have published data OK, ", len(noks[pair])," models have NOK published data",
                count=len([m for m in oks[pair] if m not in versions_dic[experiment][variable][table]])
                print " and %d models have issue on local machine\n"%count
                for model in noks[pair] :
                    count=0
                    print "\t%-10s %-5s %-20s has NOK published data"%(variable, table, model), 
                    for i in noks[pair][model] :
                        count+=1
                        (r,v),p=i
                        if count < 5 :
                            print r,p,", ",
                        else:
                            print ".",
                    print
                print
                for model in oks[pair] :
                    if model not in versions_dic[experiment][variable][table]:
                        (r,v),p=oks[pair][model][0]
                        # Check if it is idenitifed as having holes in versions dic companion
                        try :
                            h=holes[experiment][variable][table][model]
                            reals=h[h.keys()[0]]
                            version=reals[r]
                            print "\t%-10s %-5s %-20s has holes on local machine"%(variable, table, model), oks[pair][model][0]
                        except :
                            try : 
                                h=coverage_issues[experiment][variable][table][model]
                                reals=h[h.keys()[0]]
                                cov_issue=reals[r][v][v]
                                print "\t%-10s %-5s %-20s  has incomplete_coverage on local machine"%(variable, table, model), oks[pair][model][0]," vs. ",cov_issue
                            except:
                                print "\t%-10s %-5s %-20s is missing on local machine"%(variable, table, model), oks[pair][model]
                        if model not in models_not_recorded :
                            models_not_recorded[model]=[]
                        models_not_recorded[model].append(pair)
    #
    print "\n\nSummary of variable(s) with issues for ",experiment
    for model in models_not_recorded :
        print "\t%-20s"%model, 
        for variable_table in models_not_recorded[model]:
            if "day" in variable_table : print variable_table,
            else: print variable_table.split(",")[0],
        print
    return oks,noks

In [7]:
if False :
    pairs=[("pr","Amon")] 
    experiment="piControl"
    versions_tag="20200720"
    a=check_versions_dict(experiment,versions_tag,pairs)

## List all kind of consisteny issues between ESGF and local data according to a data_versions dict

In [8]:
def check_hydro_variables_all_experiments(versions_tag="2020918",date_label="20200918"):
    pairs=[("tas","Amon"),("pr","Amon"),("evspsbl","Amon"),("prw","Amon"),("mrro","Lmon"),("mrso","Lmon"),("pr","day")] 
    experiments=["piControl","historical","ssp119","ssp126","ssp245","ssp585"]
    #pairs=[("tas","Amon")]
    #experiments=["ssp119"]
    #
    oksnoks=dict()
    for experiment in experiments :
        oksnoks[experiment]=check_versions_dict(experiment,versions_tag,pairs)
        oksnoksf="esgf_vs_local_data_%s_%s_%s.json"%(experiment,date_label,versions_tag)
        with open(oksnoksf,"w") as f :
            json.dump(oksnoks[experiment],f,separators=(',', ': '),indent=3,ensure_ascii=True)
    return oksnoks

In [33]:
on=check_hydro_variables_all_experiments("20200719","test")

__________________________________________________________________________________________

Data published for piControl vs data on local machine according to data_versions dict 20200719
__________________________________________________________________________________________


piControl pr day  :  30  models have published data OK,  9  models have NOK published data  and 6 models have issue on local machine

	pr         day   CNRM-CM6-1-HR        has NOK published data r1i1p1f2 1850-2149 , 
	pr         day   SAM0-UNICON          has NOK published data r1i1p1f1 0001-0019 ,  r1i1p1f1 0021-0273 ,  r1i1p1f1 02740102-07001231 , 
	pr         day   KACE-1-0-G           has NOK published data r1i1p1f1 20000101-24491230 , 
	pr         day   GISS-E2-2-G          has NOK published data r1i1p1f1 2015-2054 ,  r1i1p1f1 2110-2150 , 
	pr         day   IITM-ESM             has NOK published data r1i1p1f1 1926-2125 , 
	pr         day   MRI-ESM2-0           has NOK published data r1i2p1f1 1850-2100 ,  



piControl prw Amon  :  45  models have published data OK,  15  models have NOK published data  and 5 models have issue on local machine

	prw        Amon  GISS-E2-1-G          has NOK published data r1i1p1f3 2900-2999 ,  r2i1p1f1 2900-2999 ,  r1i1p5f1 2000-2200 ,  r1i1p3f1 2000-2350 ,  . . .
	prw        Amon  CNRM-CM6-1-HR        has NOK published data r1i1p1f2 1850-2149 , 
	prw        Amon  NorESM1-F            has NOK published data r1i1p1f1 1501-1700 , 
	prw        Amon  GISS-E2-1-H          has NOK published data r1i1p3f1 2000-2300 , 
	prw        Amon  E3SM-1-1-ECA         has NOK published data r1i1p1f1 1850-2014 , 
	prw        Amon  KACE-1-0-G           has NOK published data r1i1p1f1 2000-2449 , 
	prw        Amon  GISS-E2-2-G          has NOK published data r1i1p1f1 2000-2150 , 
	prw        Amon  IPSL-CM6A-LR         has NOK published data r1i2p1f1 1850-2099 , 
	prw        Amon  MPI-ESM1-2-LR        has NOK published data r2i1p1f1 1900-1999 , 
	prw        Amon  EC-Earth3-LR   



historical pr Amon  :  55  models have published data OK,  2  models have NOK published data  and 10 models have issue on local machine

	pr         Amon  GISS-E2-1-G          has NOK published data r7i1p3f1 1850-1950 ,  r7i1p3f1 2001-2014 , 
	pr         Amon  EC-Earth3            has NOK published data r113i1p1f1 1970-2014 ,  r130i1p1f1 1970-2014 ,  r101i1p1f1 1970-2014 ,  r111i1p1f1 1970-2014 ,  . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .

	pr         Amon  MPI-ESM-1-2-HAM       has incomplete_coverage on local machine [('r1i1p1f1', 'v20190627'), '1850-2014']  vs.  [1890-1909]
	pr         Amon  CMCC-CM2-HR4         is missing on local machine [[('r1i1p1f1', 'v20200904'), '1850-2014']]
	pr         Amon  AWI-ESM-1-1-LR       is missing on local machine [[('r1i1p1f1', 'v20200212'), '1850-2014']]
	pr         Amon  GISS-E2-1-G-CC        has incomplete_coverage on local machine [('r1i1p1f1', 'v20190815'), '1850-2014']  vs.  [1850-1900, 19



historical mrso Lmon  :  47  models have published data OK,  3  models have NOK published data  and 9 models have issue on local machine

	mrso       Lmon  GISS-E2-1-G          has NOK published data r7i1p3f1 1850-1950 ,  r7i1p3f1 2001-2014 , 
	mrso       Lmon  E3SM-1-1             has NOK published data r1i1p1f1 1850-1999 ,  r1i1p1f1 2010-2014 , 
	mrso       Lmon  EC-Earth3            has NOK published data r113i1p1f1 1970-2014 ,  r130i1p1f1 1970-2014 ,  r101i1p1f1 1970-2014 ,  r111i1p1f1 1970-2014 ,  . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .

	mrso       Lmon  FGOALS-g3            is missing on local machine [[('r3i1p1f1', 'v20190820'), '1850-2016'], [('r6i1p1f1', 'v20191119'), '1850-2016'], [('r4i1p1f1', 'v20191120'), '1850-2016'], [('r5i1p1f1', 'v20191119'), '1850-2016'], [('r1i1p1f1', 'v20190820'), '1850-2016'], [('r2i1p1f1', 'v20190820'), '1850-2016']]
	mrso       Lmon  MPI-ESM-1-2-HAM       has incomplete_coverage on local ma



ssp119 pr Amon  :  13  models have published data OK,  0  models have NOK published data  and 3 models have issue on local machine


	pr         Amon  EC-Earth3            is missing on local machine [[('r113i1p1f1', 'v20200412'), '2015-2100'], [('r130i1p1f1', 'v20200412'), '2015-2100'], [('r101i1p1f1', 'v20200412'), '2015-2100'], [('r111i1p1f1', 'v20200412'), '2015-2100'], [('r125i1p1f1', 'v20200412'), '2015-2100'], [('r4i1p1f1', 'v20200425'), '2015-2100'], [('r103i1p1f1', 'v20200412'), '2015-2100'], [('r119i1p1f1', 'v20200412'), '2015-2100'], [('r107i1p1f1', 'v20200412'), '2015-2100'], [('r143i1p1f1', 'v20200412'), '2015-2100'], [('r148i1p1f1', 'v20200412'), '2015-2100'], [('r126i1p1f1', 'v20200412'), '2015-2100'], [('r134i1p1f1', 'v20200412'), '2015-2100'], [('r124i1p1f1', 'v20200412'), '2015-2100'], [('r136i1p1f1', 'v20200412'), '2015-2100'], [('r104i1p1f1', 'v20200412'), '2015-2100'], [('r116i1p1f1', 'v20200412'), '2015-2100'], [('r115i1p1f1', 'v20200412'), '2015-2100'], [('r144



ssp119 prw Amon  :  13  models have published data OK,  0  models have NOK published data  and 3 models have issue on local machine


	prw        Amon  EC-Earth3            is missing on local machine [[('r113i1p1f1', 'v20200412'), '2015-2100'], [('r130i1p1f1', 'v20200412'), '2015-2100'], [('r101i1p1f1', 'v20200412'), '2015-2100'], [('r111i1p1f1', 'v20200412'), '2015-2100'], [('r125i1p1f1', 'v20200412'), '2015-2100'], [('r4i1p1f1', 'v20200425'), '2015-2100'], [('r103i1p1f1', 'v20200412'), '2015-2100'], [('r119i1p1f1', 'v20200412'), '2015-2100'], [('r107i1p1f1', 'v20200412'), '2015-2100'], [('r143i1p1f1', 'v20200412'), '2015-2100'], [('r148i1p1f1', 'v20200412'), '2015-2100'], [('r126i1p1f1', 'v20200412'), '2015-2100'], [('r134i1p1f1', 'v20200412'), '2015-2100'], [('r124i1p1f1', 'v20200412'), '2015-2100'], [('r136i1p1f1', 'v20200412'), '2015-2100'], [('r104i1p1f1', 'v20200412'), '2015-2100'], [('r116i1p1f1', 'v20200412'), '2015-2100'], [('r115i1p1f1', 'v20200412'), '2015-2100'], [('r14



ssp126 pr day  :  32  models have published data OK,  0  models have NOK published data  and 4 models have issue on local machine


	pr         day   KIOST-ESM            is missing on local machine [[('r1i1p1f1', 'v20191202'), '2015-2100']]
	pr         day   CAMS-CSM1-0          is missing on local machine [[('r2i1p1f1', 'v20191106'), '2015-2099']]
	pr         day   CMCC-CM2-SR5         is missing on local machine [[('r1i1p1f1', 'v20200717'), '2015-2100']]
	pr         day   HadGEM3-GC31-MM      is missing on local machine [[('r1i1p1f3', 'v20200515'), '20150101-21001230']]


ssp126 pr Amon  :  37  models have published data OK,  0  models have NOK published data  and 8 models have issue on local machine


	pr         Amon  MPI-ESM1-2-LR         has incomplete_coverage on local machine [('r1i1p1f1', 'v20190710'), '2015-2100']  vs.  [2035-2094]
	pr         Amon  NorESM2-LM            has incomplete_coverage on local machine [('r1i1p1f1', 'v20191108'), '2015-2100']  vs.  [2015-2030, 204



ssp245 pr day  :  31  models have published data OK,  3  models have NOK published data  and 3 models have issue on local machine

	pr         day   MIROC6               has NOK published data r38i1p1f1 2015-2039 ,  r14i1p1f1 2015-2039 ,  r49i1p1f1 2015-2039 ,  r21i1p1f1 2015-2039 ,  . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
	pr         day   HadGEM3-GC31-LL      has NOK published data r3i1p1f3 20150101-20201230 ,  r2i1p1f3 20150101-20201230 ,  r4i1p1f3 20150101-20201230 , 
	pr         day   MRI-ESM2-0           has NOK published data r2i1p1f1 2015-2030 ,  r4i1p1f1 2015-2030 ,  r3i1p1f1 2015-2030 ,  r5i1p1f1 2015-2030 , 

	pr         day   KIOST-ESM            is missing on local machine [[('r1i1p1f1', 'v20191202'), '2015-2100']]
	pr         day   CAMS-CSM1-0          is missing on local machine [[('r2i1p1f1', 'v20200720'), '2015-2099']]
	pr         day   CMCC-CM2-SR5         is missing on local machine [[('r1i1p1f1', 'v20200617'), '2015-21



ssp585 pr day  :  34  models have published data OK,  0  models have NOK published data  and 7 models have issue on local machine


	pr         day   KIOST-ESM            is missing on local machine [[('r1i1p1f1', 'v20191202'), '2015-2100']]
	pr         day   CAMS-CSM1-0          is missing on local machine [[('r2i1p1f1', 'v20191106'), '2015-2099']]
	pr         day   TaiESM1              is missing on local machine [[('r1i1p1f1', 'v20200902'), '2015-2100']]
	pr         day   CMCC-CM2-SR5         is missing on local machine [[('r1i1p1f1', 'v20200622'), '2015-2100']]
	pr         day   HadGEM3-GC31-MM      is missing on local machine [[('r1i1p1f3', 'v20200515'), '20150101-21001230'], [('r2i1p1f3', 'v20200515'), '20150101-21001230'], [('r3i1p1f3', 'v20200507'), '20150101-21001230']]
	pr         day   NorESM2-MM            has incomplete_coverage on local machine [('r1i1p1f1', 'v20191108'), '2015-2100']  vs.  [2015-2080, 2091-2100]
	pr         day   GFDL-ESM4            is missing on loca

In [35]:
on=check_hydro_variables_all_experiments("20200913","20200913")

__________________________________________________________________________________________

Data published for piControl vs data on local machine according to data_versions dict 20200913
__________________________________________________________________________________________


piControl pr day  :  30  models have published data OK,  9  models have NOK published data  and 4 models have issue on local machine

	pr         day   CNRM-CM6-1-HR        has NOK published data r1i1p1f2 1850-2149 , 
	pr         day   SAM0-UNICON          has NOK published data r1i1p1f1 0001-0019 ,  r1i1p1f1 0021-0273 ,  r1i1p1f1 02740102-07001231 , 
	pr         day   KACE-1-0-G           has NOK published data r1i1p1f1 20000101-24491230 , 
	pr         day   GISS-E2-2-G          has NOK published data r1i1p1f1 2015-2054 ,  r1i1p1f1 2110-2150 , 
	pr         day   IITM-ESM             has NOK published data r1i1p1f1 1926-2125 , 
	pr         day   MRI-ESM2-0           has NOK published data r1i2p1f1 1850-2100 ,  



piControl mrso Lmon  :  38  models have published data OK,  15  models have NOK published data  and 1 models have issue on local machine

	mrso       Lmon  TaiESM1              has NOK published data r1i1p1f1 0601-0700 , 
	mrso       Lmon  GISS-E2-1-G          has NOK published data r1i1p1f3 2900-2999 ,  r2i1p1f1 2900-2999 ,  r1i1p5f1 2000-2200 ,  r1i1p3f1 2000-2350 ,  . . .
	mrso       Lmon  CNRM-CM6-1-HR        has NOK published data r1i1p1f2 1850-2149 , 
	mrso       Lmon  GISS-E2-1-H          has NOK published data r1i1p3f1 2000-2300 , 
	mrso       Lmon  SAM0-UNICON          has NOK published data r1i1p1f1 0001-0280 ,  r1i1p1f1 029012 ,  r1i1p1f1 030012 ,  r1i1p1f1 031012 ,  . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
	mrso       Lmon  E3SM-1-1-ECA         has NOK published data r1i1p1f1 1850-2014 , 
	mrso       Lmon  GISS-E2-2-G          has NOK published data r1i1p1f1 2000-2150 , 
	mrso       Lmon  IPSL-CM6A-LR         has NOK published data r1i



historical tas Amon  :  56  models have published data OK,  3  models have NOK published data  and 7 models have issue on local machine

	tas        Amon  GISS-E2-1-G          has NOK published data r7i1p3f1 1850-1950 ,  r7i1p3f1 2001-2014 , 
	tas        Amon  EC-Earth3            has NOK published data r113i1p1f1 1970-2014 ,  r130i1p1f1 1970-2014 ,  r101i1p1f1 1970-2014 ,  r111i1p1f1 1970-2014 ,  . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
	tas        Amon  AWI-CM-1-1-MR        has NOK published data r1i1p1f1 1850 , 

	tas        Amon  KIOST-ESM            is missing on local machine [[('r1i1p1f1', 'v20191106'), '1850-2014']]
	tas        Amon  CMCC-CM2-HR4         is missing on local machine [[('r1i1p1f1', 'v20200904'), '1850-2014']]
	tas        Amon  AWI-ESM-1-1-LR        has incomplete_coverage on local machine [('r1i1p1f1', 'v20200212'), '1850-2014']  vs.  [1851, 1858, 1864, 1866, 1868, 1871, 1873-1874, 1876-1878, 1882, 1889, 1896



historical evspsbl Amon  :  53  models have published data OK,  2  models have NOK published data  and 8 models have issue on local machine

	evspsbl    Amon  GISS-E2-1-G          has NOK published data r7i1p3f1 1850-1950 ,  r7i1p3f1 2001-2014 , 
	evspsbl    Amon  EC-Earth3            has NOK published data r113i1p1f1 1970-2014 ,  r130i1p1f1 1970-2014 ,  r101i1p1f1 1970-2014 ,  r111i1p1f1 1970-2014 ,  . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .

	evspsbl    Amon  MPI-ESM-1-2-HAM       has incomplete_coverage on local machine [('r1i1p1f1', 'v20190627'), '1850-2014']  vs.  [1850-1869, 1970-1989]
	evspsbl    Amon  CMCC-CM2-HR4         is missing on local machine [[('r1i1p1f1', 'v20200904'), '1850-2014']]
	evspsbl    Amon  AWI-ESM-1-1-LR        has incomplete_coverage on local machine [('r1i1p1f1', 'v20200212'), '1850-2014']  vs.  [1853, 1855-1857, 1860, 1863-1864, 1867-1868, 1870-1872, 1875-1877, 1880-1882, 1884-1885, 1887, 1891, 1902, 1



ssp126 pr day  :  32  models have published data OK,  0  models have NOK published data  and 2 models have issue on local machine


	pr         day   KIOST-ESM            is missing on local machine [[('r1i1p1f1', 'v20191202'), '2015-2100']]
	pr         day   HadGEM3-GC31-MM       has incomplete_coverage on local machine [('r1i1p1f3', 'v20200515'), '20150101-21001230']  vs.  [20200101-20241230, 20300101-20341230, 20400101-20641230, 20700101-20741230, 20800101-21001230]


ssp126 pr Amon  :  37  models have published data OK,  0  models have NOK published data  and 3 models have issue on local machine


	pr         Amon  MPI-ESM1-2-LR         has incomplete_coverage on local machine [('r1i1p1f1', 'v20190710'), '2015-2100']  vs.  [2035-2094]
	pr         Amon  HadGEM3-GC31-MM       has incomplete_coverage on local machine [('r1i1p1f3', 'v20200515'), '2015-2100']  vs.  [2030-2069, 2090-2100]
	pr         Amon  NorESM2-MM            has incomplete_coverage on local machine [('r1i1p1f1', 'v2



ssp245 tas Amon  :  38  models have published data OK,  4  models have NOK published data  and 2 models have issue on local machine

	tas        Amon  MIROC6               has NOK published data r38i1p1f1 2015-2039 ,  r14i1p1f1 2015-2039 ,  r49i1p1f1 2015-2039 ,  r21i1p1f1 2015-2039 ,  . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
	tas        Amon  HadGEM3-GC31-LL      has NOK published data r3i1p1f3 2015-2020 ,  r2i1p1f3 2015-2020 ,  r4i1p1f3 2015-2020 , 
	tas        Amon  CNRM-CM6-1           has NOK published data r8i1p1f2 2015-2020 ,  r9i1p1f2 2015-2020 ,  r7i1p1f2 2015-2020 ,  r10i1p1f2 2015-2020 , 
	tas        Amon  MRI-ESM2-0           has NOK published data r5i1p1f1 2015-2030 ,  r4i1p1f1 2015-2030 ,  r3i1p1f1 2015-2030 ,  r2i1p1f1 2015-2030 , 

	tas        Amon  KIOST-ESM            is missing on local machine [[('r1i1p1f1', 'v20191106'), '2015-2100']]
	tas        Amon  CMCC-CM2-SR5         is missing on local machine [[('r1i1p1f1', 'v2



ssp585 pr Amon  :  39  models have published data OK,  1  models have NOK published data  and 5 models have issue on local machine

	pr         Amon  EC-Earth3            has NOK published data r9i1p1f1 2015-2051 , 

	pr         Amon  NorESM2-LM            has incomplete_coverage on local machine [('r1i1p1f1', 'v20191108'), '2015-2100']  vs.  [2071-2080]
	pr         Amon  CMCC-CM2-SR5         is missing on local machine [[('r1i1p1f1', 'v20200622'), '2015-2100']]
	pr         Amon  HadGEM3-GC31-MM       has incomplete_coverage on local machine [('r1i1p1f3', 'v20200515'), '2015-2100']  vs.  [2030-2049, 2070-2089]
	pr         Amon  TaiESM1              is missing on local machine [[('r1i1p1f1', 'v20200901'), '2015-2100']]
	pr         Amon  NorESM2-MM            has incomplete_coverage on local machine [('r1i1p1f1', 'v20191108'), '2015-2100']  vs.  [2051-2070, 2081-2090]


ssp585 tas Amon  :  40  models have published data OK,  1  models have NOK published data  and 4 models have issue on

In [9]:
on=check_hydro_variables_all_experiments("20200918","20200918")

__________________________________________________________________________________________

Data published for piControl vs data on local machine according to data_versions dict 20200918
__________________________________________________________________________________________


piControl pr day  :  30  models have published data OK,  9  models have NOK published data  and 4 models have issue on local machine

	pr         day   CNRM-CM6-1-HR        has NOK published data r1i1p1f2 1850-2149 , 
	pr         day   SAM0-UNICON          has NOK published data r1i1p1f1 0001-0019 ,  r1i1p1f1 0021-0273 ,  r1i1p1f1 02740102-07001231 , 
	pr         day   KACE-1-0-G           has NOK published data r1i1p1f1 20000101-24491230 , 
	pr         day   GISS-E2-2-G          has NOK published data r1i1p1f1 2015-2054 ,  r1i1p1f1 2110-2150 , 
	pr         day   IITM-ESM             has NOK published data r1i1p1f1 1926-2125 , 
	pr         day   MRI-ESM2-0           has NOK published data r1i2p1f1 1850-2100 ,  



piControl mrso Lmon  :  38  models have published data OK,  15  models have NOK published data  and 1 models have issue on local machine

	mrso       Lmon  TaiESM1              has NOK published data r1i1p1f1 0601-0700 , 
	mrso       Lmon  GISS-E2-1-G          has NOK published data r1i1p1f3 2900-2999 ,  r2i1p1f1 2900-2999 ,  r1i1p5f1 2000-2200 ,  r1i1p3f1 2000-2350 ,  . . .
	mrso       Lmon  CNRM-CM6-1-HR        has NOK published data r1i1p1f2 1850-2149 , 
	mrso       Lmon  GISS-E2-1-H          has NOK published data r1i1p3f1 2000-2300 , 
	mrso       Lmon  SAM0-UNICON          has NOK published data r1i1p1f1 0001-0280 ,  r1i1p1f1 029012 ,  r1i1p1f1 030012 ,  r1i1p1f1 031012 ,  . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
	mrso       Lmon  E3SM-1-1-ECA         has NOK published data r1i1p1f1 1850-2014 , 
	mrso       Lmon  GISS-E2-2-G          has NOK published data r1i1p1f1 2000-2150 , 
	mrso       Lmon  IPSL-CM6A-LR         has NOK published data r1i



historical tas Amon  :  56  models have published data OK,  3  models have NOK published data  and 6 models have issue on local machine

	tas        Amon  GISS-E2-1-G          has NOK published data r7i1p3f1 1850-1950 ,  r7i1p3f1 2001-2014 , 
	tas        Amon  EC-Earth3            has NOK published data r113i1p1f1 1970-2014 ,  r130i1p1f1 1970-2014 ,  r101i1p1f1 1970-2014 ,  r111i1p1f1 1970-2014 ,  . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
	tas        Amon  AWI-CM-1-1-MR        has NOK published data r1i1p1f1 1850 , 

	tas        Amon  KIOST-ESM            is missing on local machine [[('r1i1p1f1', 'v20191106'), '1850-2014']]
	tas        Amon  AWI-ESM-1-1-LR        has incomplete_coverage on local machine [('r1i1p1f1', 'v20200212'), '1850-2014']  vs.  [1851, 1858, 1864, 1866, 1868, 1871, 1873-1874, 1876-1878, 1882, 1889, 1896, 1899, 1901-1903, 1905, 1909, 1912-1913, 1916, 1919-1922, 1924-1925, 1927, 1930, 1935-1937, 1939-1944, 1946-1



historical evspsbl Amon  :  53  models have published data OK,  2  models have NOK published data  and 6 models have issue on local machine

	evspsbl    Amon  GISS-E2-1-G          has NOK published data r7i1p3f1 1850-1950 ,  r7i1p3f1 2001-2014 , 
	evspsbl    Amon  EC-Earth3            has NOK published data r113i1p1f1 1970-2014 ,  r130i1p1f1 1970-2014 ,  r101i1p1f1 1970-2014 ,  r111i1p1f1 1970-2014 ,  . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .

	evspsbl    Amon  MPI-ESM-1-2-HAM       has incomplete_coverage on local machine [('r1i1p1f1', 'v20190627'), '1850-2014']  vs.  [1850-1869, 1970-1989]
	evspsbl    Amon  AWI-ESM-1-1-LR        has incomplete_coverage on local machine [('r1i1p1f1', 'v20200212'), '1850-2014']  vs.  [1853, 1855-1857, 1860, 1863-1864, 1867-1868, 1870-1872, 1875-1877, 1880-1882, 1884-1885, 1887, 1891, 1902, 1905, 1915-1916, 1918-1921, 1923, 1925-1926, 1930-1932, 1934-1936, 1938, 1942, 1945, 1948, 1953, 1955, 1958, 19



ssp126 pr day  :  32  models have published data OK,  0  models have NOK published data  and 2 models have issue on local machine


	pr         day   KIOST-ESM            is missing on local machine [[('r1i1p1f1', 'v20191202'), '2015-2100']]
	pr         day   HadGEM3-GC31-MM       has incomplete_coverage on local machine [('r1i1p1f3', 'v20200515'), '20150101-21001230']  vs.  [20200101-20241230, 20300101-20341230, 20400101-20641230, 20700101-20741230, 20800101-21001230]


ssp126 pr Amon  :  37  models have published data OK,  0  models have NOK published data  and 3 models have issue on local machine


	pr         Amon  MPI-ESM1-2-LR         has incomplete_coverage on local machine [('r1i1p1f1', 'v20190710'), '2015-2100']  vs.  [2035-2094]
	pr         Amon  HadGEM3-GC31-MM       has incomplete_coverage on local machine [('r1i1p1f3', 'v20200515'), '2015-2100']  vs.  [2030-2069, 2090-2100]
	pr         Amon  NorESM2-MM            has incomplete_coverage on local machine [('r1i1p1f1', 'v2



ssp245 mrro Lmon  :  36  models have published data OK,  4  models have NOK published data  and 5 models have issue on local machine

	mrro       Lmon  MIROC6               has NOK published data r38i1p1f1 2015-2039 ,  r14i1p1f1 2015-2039 ,  r49i1p1f1 2015-2039 ,  r21i1p1f1 2015-2039 ,  . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
	mrro       Lmon  HadGEM3-GC31-LL      has NOK published data r3i1p1f3 2015-2020 ,  r2i1p1f3 2015-2020 ,  r4i1p1f3 2015-2020 , 
	mrro       Lmon  CNRM-CM6-1           has NOK published data r8i1p1f2 2015-2020 ,  r9i1p1f2 2015-2020 ,  r7i1p1f2 2015-2020 ,  r10i1p1f2 2015-2020 , 
	mrro       Lmon  MRI-ESM2-0           has NOK published data r2i1p1f1 2015-2030 ,  r4i1p1f1 2015-2030 ,  r3i1p1f1 2015-2030 ,  r5i1p1f1 2015-2030 , 

	mrro       Lmon  KIOST-ESM            is missing on local machine [[('r1i1p1f1', 'v20191104'), '2015-2100']]
	mrro       Lmon  MPI-ESM1-2-LR         has incomplete_coverage on local machine [('



ssp585 mrro Lmon  :  38  models have published data OK,  0  models have NOK published data  and 7 models have issue on local machine


	mrro       Lmon  KIOST-ESM            is missing on local machine [[('r1i1p1f1', 'v20191104'), '2015-2100']]
	mrro       Lmon  MPI-ESM1-2-LR         has incomplete_coverage on local machine [('r1i1p1f1', 'v20190710'), '2015-2100']  vs.  [2035-2074]
	mrro       Lmon  NorESM2-LM            has incomplete_coverage on local machine [('r1i1p1f1', 'v20191108'), '201502-210012']  vs.  [2021-2030, 2041-2050, 2091-2100]
	mrro       Lmon  CMCC-CM2-SR5         is missing on local machine [[('r1i1p1f1', 'v20200622'), '2015-2100']]
	mrro       Lmon  HadGEM3-GC31-MM       has incomplete_coverage on local machine [('r1i1p1f3', 'v20200515'), '2015-2100']  vs.  [2050-2069]
	mrro       Lmon  TaiESM1              is missing on local machine [[('r1i1p1f1', 'v20200901'), '201502-210012']]
	mrro       Lmon  NorESM2-MM            has incomplete_coverage on local machine [(

## Summarizing missing/uncomplete published variables per model for each experiment (according to ESGF)

In [50]:
def esgf_missing_variables(oksnoks):
    print "Missing or uncomplete variables on the ESGF (among those scrutinized locally)"
    if type(oksnoks===tuple) :
        oksnoks={"unknown" :oksnoks}
            
    for experiment in oksnoks:
        print experiment
        oks,noks=oksnoks[experiment]
        all_models=set()
        for pair in oks : 
            for model in oks[pair]  : all_models.add(model)
            for model in noks[pair] : all_models.add(model)
        all_models=list(all_models)
        all_models.sort()
        #
        #moks=dict()
        #for pair in oks :
        #    for model in oks[pair] :
        #        if model not in moks : moks[model]=[]
        #        moks[model].append(pair)
        mnoks=dict()
        for pair in noks :
            for model in noks[pair] :
                if model not in mnoks : mnoks[model]=[]
                mnoks[model].append(pair)
        for model in all_models :
            if model in mnoks :
                print "%-20s "%model,
                for var,tab in mnoks[model] :
                    if tab != "day" : print var,
                    else : print "%s_%s"%(var,tab),
                print
        

AWI-ESM-1-1-LR        mrsos mrso evspsbl prw sos tas pr pr_day mrro
BCC-ESM1              mrsos mrso evspsbl prw sos tas pr mrro
CAS-ESM2-0            sos
CESM2-FV2             mrsos mrro
CESM2-WACCM-FV2       mrsos mrro
CNRM-CM6-1            evspsbl
CNRM-CM6-1-HR         mrsos mrso evspsbl prw sos tas pr pr_day mrro
E3SM-1-1              mrsos mrso evspsbl prw sos tas pr mrro
E3SM-1-1-ECA          mrsos mrso evspsbl prw sos tas pr mrro
EC-Earth3-LR          mrsos mrso evspsbl prw sos tas pr pr_day mrro
GISS-E2-1-G           mrsos mrso evspsbl prw sos tas pr mrro
GISS-E2-1-G-CC        mrsos mrso evspsbl prw sos tas pr mrro
GISS-E2-1-H           mrsos mrso evspsbl prw sos tas pr mrro
GISS-E2-2-G           mrsos mrso evspsbl prw sos tas pr pr_day mrro
HadGEM3-GC31-LL       pr_day
IITM-ESM              evspsbl sos tas pr pr_day
IPSL-CM6A-LR          mrsos mrso evspsbl prw sos tas pr pr_day mrro
KACE-1-0-G            evspsbl prw tas pr
MPI-ESM1-2-LR         mrsos mrso evspsbl prw sos tas p

In [None]:
#esgf_missing_variables(on)

In [311]:
models_having_one_missing_var=set()
variables=[("tas","Amon"),("pr","Amon"),("evspsbl","Amon"),("prw","Amon"),("mrro","Lmon"),("mrso","Lmon"),("pr","day")] 

for pair in variables:
    variable,table=pair
    for model in noks[pair] :
        models_having_one_missing_var.add(model)

#print "\nModels having one + missing var"
#for m in models_having_one_missing_var:
#    print "\t",m

# listing models which have all vars missing
models_having_all_vars_missing=models_having_one_missing_var.copy()
for m in models_having_one_missing_var :
    for p in oks : 
        if m in oks[p] :
            if m in models_having_all_vars_missing :
                models_having_all_vars_missing.remove(m)

print "Models having all vars missing"
for m in models_having_all_vars_missing:
    print "\t",m


print "\nDelta list of models having some vars missing"
for pair in variables:
    variable,table=pair
    print "\n%-10s %4s"%(variable,table)
    for model in noks[pair] :
        if model in oks[pair] :
            continue
        if model in models_having_all_vars_missing :
            continue
        print "\t%-25s"%model,
        for opair,period in noks[pair][model] :
            real,version = opair
            print real,version,period,"|",
        print 
        #models_having_one_missing_var.add(model)


        
print "\nFull list of models having some vars missing"
for pair in variables:
    variable,table=pair
    print "\n%-10s %4s"%(variable,table)
    for model in noks[pair] :
        if model in oks[pair] :
            continue
        print "\t%-25s"%model,
        for opair,period in noks[pair][model] :
            real,version = opair
            print real,version,period,"|",
        print 
        #models_having_one_missing_var.add(model)




all_models=set()
for p in oks : 
    for m in oks[p] :
        all_models.add(m)
for p in noks : 
    for m in noks[p] :
        all_models.add(m)

    










Models having all vars missing
	CNRM-CM6-1-HR
	NorESM1-F
	E3SM-1-1-ECA
	KACE-1-0-G
	GISS-E2-2-G
	EC-Earth3-LR
	AWI-ESM-1-1-LR
	GISS-E2-1-G-CC
	BCC-ESM1
	E3SM-1-1
	IITM-ESM

Delta list of models having some vars missing

tas        Amon

pr         Amon
	NorESM2-LM                r1i1p1f1 v20200217 1600-1709 |

evspsbl    Amon
	CNRM-CM6-1                r1i1p1f2 v20180814 2050-2240 | r1i1p1f2 v20180814 2261-2349 |

prw        Amon

mrro       Lmon
	SAM0-UNICON               r1i1p1f1 v20190910 0001-0280 | r1i1p1f1 v20190910 029012 | r1i1p1f1 v20190910 030012 | r1i1p1f1 v20190910 031012 | r1i1p1f1 v20190910 032012 | r1i1p1f1 v20190910 033012 | r1i1p1f1 v20190910 034012 | r1i1p1f1 v20190910 035012 | r1i1p1f1 v20190910 036012 | r1i1p1f1 v20190910 037012 | r1i1p1f1 v20190910 038012 | r1i1p1f1 v20190910 039012 | r1i1p1f1 v20190910 040012 | r1i1p1f1 v20190910 041012 | r1i1p1f1 v20190910 042012 | r1i1p1f1 v20190910 043012 | r1i1p1f1 v20190910 044012 | r1i1p1f1 v20190910 045012 | r1i1p1f

In [99]:
def check_if_version_is_last_published(model,experiment,variable,table,grid,real,version,distrib="true") :
    dic={'distrib'     : distrib,
         'limit'       : '10000',
         'type'        : 'Dataset',
         'fields'      : 'version',
         'experiment_id':experiment,
         'source_id'   : model,
         'variable'    : variable,
         'table_id'    : table,
         'variant_label': real,
         'latest'     : 'true'
         }
    # Form request string
    reqs=""
    for k in dic : reqs+="%s=%s&"%(k,dic[k])
    reqs=reqs[0:-1]
    #
    #print reqs
    rep=jrequest(reqs)
    docs=rep['response']['docs']
    if len(docs)==0 : 
        return None
    #
    rep=True
    for e in docs :
        pversion=e['version'].encode('ascii')
        if version[0]=='v' : version=version[1:]
        if version != pversion: 
            # Don't worry for out-dated index info
            if int(version) < int(pversion) : 
                rep = pversion
    return rep
