In [1]:
%matplotlib inline
import os
import importlib
import yaml

import xarray as xr
import numpy as np

import intake
import intake_cesm
import xcollection as xc

In [2]:
USER = os.environ['USER']
import dask
from dask.distributed import Client
from dask_jobqueue import PBSCluster

Nnodes = 4
processes = 18
project = 'NCGD0011'

cluster = PBSCluster(queue='regular',
                     cores = 18,
                     processes = processes,
                     memory = '100GB',          
                     project = project,
                     walltime = '04:00:00',
                     local_directory=f'/glade/scratch/{USER}/dask-tmp')
client = Client(cluster)

cluster.scale(processes*Nnodes)

In [3]:
col = intake.open_cesm_metadatastore('cesm1_le')
col.df.info()

Active collection: cesm1_le
<class 'pandas.core.frame.DataFrame'>
Int64Index: 262092 entries, 0 to 262091
Data columns (total 18 columns):
resource            262092 non-null object
resource_type       262092 non-null object
direct_access       262092 non-null bool
experiment          262092 non-null object
case                262092 non-null object
component           262092 non-null object
stream              262092 non-null object
variable            262092 non-null object
date_range          262092 non-null object
ensemble            262092 non-null int64
files               262092 non-null object
files_basename      262092 non-null object
files_dirname       262092 non-null object
ctrl_branch_year    0 non-null float64
year_offset         34112 non-null float64
sequence_order      262092 non-null int64
has_ocean_bgc       262092 non-null bool
grid                52764 non-null object
dtypes: bool(2), float64(2), int64(2), object(12)
memory usage: 34.5+ MB


In [4]:
experiments = ['20C', 'RCP85']
ensembles = col.search(experiment=experiments, has_ocean_bgc=True).results.ensemble.unique().tolist()

In [5]:
query_base = dict(experiment=experiments, ensemble=ensembles)      
resource_constraints = dict(resource='GLADE:posix:/glade/collections/cdg/data/cesmLE/CESM-CAM5-BGC-LE')

In [10]:
with open('variables.yml', 'r') as fid:
    variables = yaml.load(fid)
    
variable_list = []
for k, v in variables.items():
    variable_list.extend(v)
print(variable_list)

query = {**dict(variable=variable_list), **query_base, **resource_constraints}

cat = col.search(**query)
variable_list = cat.results.variable.unique().tolist()
print(variable_list)

['ECOSYS_IFRAC_2', 'HMXL_2', 'SST', 'XMXL_2', 'diatC_zint_100m', 'diatChl_SURF', 'photoC_diat_zint', 'photoC_sp_zint', 'spC_zint_100m', 'spChl_SURF', 'zooC_zint_100m', 'ECOSYS_IFRAC', 'Fe', 'HMXL', 'HOR_DIFF', 'Jint_100m_DIC', 'NH4', 'NO3', 'NOx_FLUX', 'PAR_avg', 'PD', 'PO4', 'POC_FLUX_IN', 'POC_PROD', 'SALT', 'SiO3', 'TBLT', 'TEMP', 'XMXL', 'diatC', 'diatChl', 'diat_Fe_lim', 'diat_N_lim', 'diat_PO4_lim', 'diat_SiO3_lim', 'diat_agg', 'diat_light_lim', 'diat_loss', 'graze_diat', 'graze_diaz', 'graze_sp', 'photoC_NO3_diat', 'photoC_NO3_diat_zint', 'photoC_NO3_sp', 'photoC_NO3_sp_zint', 'photoC_diat', 'photoC_sp', 'spC', 'spChl', 'sp_Fe_lim', 'sp_N_lim', 'sp_PO4_lim', 'sp_agg', 'sp_light_lim', 'sp_loss', 'tend_zint_100m_NO3', 'zooC', 'zoo_loss']
['HMXL_2', 'SST', 'XMXL_2', 'ECOSYS_IFRAC', 'HMXL', 'Jint_100m_DIC', 'PD', 'SALT', 'TBLT', 'TEMP', 'XMXL', 'diatChl', 'photoC_diat', 'photoC_sp', 'spChl']


In [7]:
recipes = 'analysis_recipes.yml'
with open('analysis_recipes.yml') as fid:
    recipes = yaml.load(fid)

In [9]:
analysis_list = ['monclim_1940s', 'monclim_1990s', 'monclim_2090s']

for analysis in analysis_list:
    namer = lambda q: '.'.join(['cesm1_le', '+'.join(q['experiment']), q['variable'], analysis])
    for v in variable_list:
        query = {**dict(variable=v), **query_base, **resource_constraints}
        dc = xc.analyzed_collection(collection='cesm1_le',
                                    analysis_recipe=recipes[analysis],
                                    analysis_name=namer(query),
                                    overwrite_existing=False,
                                    file_format="nc",
                                    **query)


Active collection: cesm1_le
Active collection: cesm1_le
['/glade/collections/cdg/data/cesmLE/CESM-CAM5-BGC-LE/ocn/proc/tseries/daily/SST/b.e11.B20TRC5CNBDRD.f09_g16.001.pop.h.nday1.SST.18500102-20051231.nc', '/glade/collections/cdg/data/cesmLE/CESM-CAM5-BGC-LE/ocn/proc/tseries/monthly/SST/b.e11.B20TRC5CNBDRD.f09_g16.001.pop.h.SST.185001-200512.nc', '/glade/collections/cdg/data/cesmLE/CESM-CAM5-BGC-LE/ocn/proc/tseries/daily/SST/b.e11.BRCP85C5CNBDRD.f09_g16.001.pop.h.nday1.SST.20060102-20801231.nc', '/glade/collections/cdg/data/cesmLE/CESM-CAM5-BGC-LE/ocn/proc/tseries/daily/SST/b.e11.BRCP85C5CNBDRD.f09_g16.001.pop.h.nday1.SST.20810101-21001231.nc', '/glade/collections/cdg/data/cesmLE/CESM-CAM5-BGC-LE/ocn/proc/tseries/monthly/SST/b.e11.BRCP85C5CNBDRD.f09_g16.001.pop.h.SST.200601-208012.nc', '/glade/collections/cdg/data/cesmLE/CESM-CAM5-BGC-LE/ocn/proc/tseries/monthly/SST/b.e11.BRCP85C5CNBDRD.f09_g16.001.pop.h.SST.208101-210012.nc']


KeyboardInterrupt: 

In [None]:
ds = dc.to_xarray()
d

In [None]:
%load_ext watermark

In [None]:
%watermark --iversion -g -h -m -v -u -d