In [2]:
import pandas as pd
import gcsfs
import xarray as xr
from datetime import datetime
import itertools

import myconfig
from mytasks import Check, Download, ReadFiles, SaveAsZarr, Upload, Cleanup
from mysearch import esgf_search

In [3]:
# CONFIGURE ESGF Search here
node_pref = myconfig.node_pref
dtype = myconfig.dtype
myconfig.local_target_prefix = '/h112/naomi/zarr-minimal/'

# reset the preference rank to omit a particular data node
# node_pref['esgf-data1.llnl.gov'] = 999

ESGF_site = dtype['llnl']
#ESGF_site = dtype['dkrz']

print('zarrs will be written to: ',myconfig.local_target_prefix)

zarrs will be written to:  /h112/naomi/zarr-minimal/


In [4]:
# CONFIGURE GCS
fs     = gcsfs.GCSFileSystem(token='anon', access='read_only',cache_timeout=-1)
df_GCS = pd.read_csv('https://cmip6.storage.googleapis.com/cmip6-zarr-consolidated-stores-noQC.csv', dtype='unicode')

# make available to all modules
myconfig.fs = fs
myconfig.df_GCS = df_GCS

In [5]:
df_GCS.query("table_id == 'Amon' & experiment_id == 'historical'")

Unnamed: 0,activity_id,institution_id,source_id,experiment_id,member_id,table_id,variable_id,grid_label,zstore,dcpp_init_year,version,status,severity,issue_url
14862,CMIP,AS-RCEC,TaiESM1,historical,r1i1p1f1,Amon,cct,gn,gs://cmip6/CMIP/AS-RCEC/TaiESM1/historical/r1i...,,20200624,good,none,none
14863,CMIP,AS-RCEC,TaiESM1,historical,r1i1p1f1,Amon,cl,gn,gs://cmip6/CMIP/AS-RCEC/TaiESM1/historical/r1i...,,20200623,good,none,none
14864,CMIP,AS-RCEC,TaiESM1,historical,r1i1p1f1,Amon,cli,gn,gs://cmip6/CMIP/AS-RCEC/TaiESM1/historical/r1i...,,20200623,good,none,none
14865,CMIP,AS-RCEC,TaiESM1,historical,r1i1p1f1,Amon,clivi,gn,gs://cmip6/CMIP/AS-RCEC/TaiESM1/historical/r1i...,,20200218,good,none,none
14866,CMIP,AS-RCEC,TaiESM1,historical,r1i1p1f1,Amon,clt,gn,gs://cmip6/CMIP/AS-RCEC/TaiESM1/historical/r1i...,,20200218,good,none,none
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
107233,CMIP,UA,MCM-UA-1-0,historical,r1i1p1f2,Amon,ua,gn,gs://cmip6/CMIP/UA/MCM-UA-1-0/historical/r1i1p...,,20190731,good,none,none
107234,CMIP,UA,MCM-UA-1-0,historical,r1i1p1f2,Amon,uas,gn,gs://cmip6/CMIP/UA/MCM-UA-1-0/historical/r1i1p...,,20190731,good,none,none
107235,CMIP,UA,MCM-UA-1-0,historical,r1i1p1f2,Amon,va,gn,gs://cmip6/CMIP/UA/MCM-UA-1-0/historical/r1i1p...,,20190731,good,none,none
107236,CMIP,UA,MCM-UA-1-0,historical,r1i1p1f2,Amon,vas,gn,gs://cmip6/CMIP/UA/MCM-UA-1-0/historical/r1i1p...,,20190731,good,none,none


In [4]:
core_experiments = [
    '1pctCO2', 'abrupt-4xCO2',  'historical', 'piControl' 
    ,'ssp119', 'ssp126', 'ssp245', 'ssp370', 'ssp434', 'ssp460', 'ssp534-over', 'ssp585'
                   ]
more_experiments = [
     'piControl-spinup', 'amip-hist', 'esm-hist', 'esm-piControl', 'esm-piControl-spinup'
    ,'1pctCO2-bgc','lgm', 'past1000', 'amip'
                   ]
                   
core_Amon_2dvars = ['evspsbl', 'hfls', 'pr', 'prc', 'ps', 'psl', 'sfcWind', 'tas', 'ts', 'uas', 'vas','huss','hurs']
flux_Amon_2dvars = ['rlds', 'rlus', 'rsds', 'rsus', 'hfds', 'hfls', 'hfss','tauu','tauv']
core_Omon_2dvars = ['tos', 'sos', 'zos']
flux_Omon_2dvars = ['tauuo', 'tauuo']
core_Amon_3dvars = ['ta', 'ua', 'va', 'zg', 'wap', 'hur', 'hus']
core_Omon_3dvars = ['masscello', 'so', 'thetao', 'umo', 'uo', 'vmo', 'vo', 'wmo', 'wo']
core_Omon_tracers = ['chl', 'chlos', 'dfe', 'dfeos', 'epc100', 'fgco2', 'intpp', 'no3', 'no3os', 'phyc', 'phycos', 'phydiat', 'phydiatos', 'si', 'sios', 'spco2', 'zooc', 'zoocos']

In [5]:
# Pick keyword values to specify your search here. Not specifying a particular keyword means it will find all.
all_search = {
     'table_id'      : ['Omon']
    ,'experiment_id' : ['historical']
    ,'variable_id'   : ["tos"]
    #,'member_id'     : ['r1i1p1f1']
    ,'source_id'     : ['CESM2-FV2']
    ,'grid_label'     : ['gn']
}

all_search

In [11]:
files_type = 'OPENDAP' # HTTPServer, GridFTP, Globus

update_ESGF = True
if update_ESGF:
    x = [value for key,value in all_search.items()]
    searches = [p for p in itertools.product(*x)]

    dESGF = []
    for s in searches:
        search = dict(zip(all_search.keys(),s))
        print(search)
        df = esgf_search(search, server=ESGF_site, files_type=files_type)  
        if len(df)>0:
            dESGF += [df]

    df_ESGF = pd.concat(dESGF)
    df_ESGF.to_csv(f'csv/ESGF_{label}.csv',index=False)
else:
    df_ESGF = pd.read_csv(f'csv/ESGF_{label}.csv', dtype='unicode')

len(df_ESGF), len(df_ESGF.ds_dir.unique())

{'table_id': 'Omon', 'experiment_id': 'historical', 'variable_id': 'tos', 'source_id': 'CESM2-FV2', 'grid_label': 'gn'}


(12, 3)

In [12]:
urls = df_ESGF.url.unique()
for url in urls:
    print(url)
    ds = xr.open_dataset(url)
    print(ds.lat.max())

http://esgf-data1.llnl.gov/thredds/dodsC/css03_data/CMIP6/CMIP/NCAR/CESM2-FV2/historical/r1i1p1f1/Omon/tos/gn/v20191120/tos_Omon_CESM2-FV2_historical_r1i1p1f1_gn_185001-189912.nc
<xarray.DataArray 'lat' ()>
array(9.96920997e+36)
http://esgf-data1.llnl.gov/thredds/dodsC/css03_data/CMIP6/CMIP/NCAR/CESM2-FV2/historical/r1i1p1f1/Omon/tos/gn/v20191120/tos_Omon_CESM2-FV2_historical_r1i1p1f1_gn_190001-194912.nc
<xarray.DataArray 'lat' ()>
array(9.96920997e+36)
http://esgf-data1.llnl.gov/thredds/dodsC/css03_data/CMIP6/CMIP/NCAR/CESM2-FV2/historical/r1i1p1f1/Omon/tos/gn/v20191120/tos_Omon_CESM2-FV2_historical_r1i1p1f1_gn_195001-199912.nc
<xarray.DataArray 'lat' ()>
array(9.96920997e+36)
http://esgf-data1.llnl.gov/thredds/dodsC/css03_data/CMIP6/CMIP/NCAR/CESM2-FV2/historical/r1i1p1f1/Omon/tos/gn/v20191120/tos_Omon_CESM2-FV2_historical_r1i1p1f1_gn_200001-201412.nc
<xarray.DataArray 'lat' ()>
array(9.96920997e+36)
http://esgf-data1.llnl.gov/thredds/dodsC/css03_data/CMIP6/CMIP/NCAR/CESM2-FV2/histor