In [3]:
%matplotlib inline
import os
import importlib
import yaml
import copy

import xarray as xr
import numpy as np

import intake
import intake_esm
import xcollection as xc

import dask
import dask.distributed
import ncar_jobqueue

In [4]:
col = intake.open_esm_metadatastore(collection_input_file='cesm1-le-collection.yml',
                                    overwrite_existing=False)
col.df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 116276 entries, 0 to 116275
Data columns (total 18 columns):
resource            116276 non-null object
resource_type       116276 non-null object
direct_access       116276 non-null bool
experiment          116276 non-null object
case                116276 non-null object
component           116276 non-null object
stream              116276 non-null object
variable            116276 non-null object
date_range          116276 non-null object
ensemble            116276 non-null int64
files               116276 non-null object
files_basename      116276 non-null object
files_dirname       116276 non-null object
ctrl_branch_year    0 non-null float64
year_offset         15145 non-null float64
sequence_order      116276 non-null int64
has_ocean_bgc       116276 non-null bool
grid                13013 non-null object
dtypes: bool(2), float64(2), int64(2), object(12)
memory usage: 15.3+ MB


In [5]:
experiments = ['20C', 'RCP85']
ensembles = col.search(experiment=experiments, has_ocean_bgc=True).results.ensemble.unique().tolist()
print(ensembles)

[1, 2, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 34, 35, 101, 102, 103, 104, 105]


In [6]:
query_base = dict(experiment=experiments, ensemble=ensembles, stream='pop.h')      
resource_constraints = dict(direct_access=True)

In [7]:
with open('variables.yml', 'r') as fid:
    variables = yaml.load(fid)
    
variable_list = []
for k, v in variables.items():
    variable_list.extend(v)

query = {**dict(variable=variable_list), **query_base, **resource_constraints}

cat = col.search(**query)
variable_list = cat.results.variable.unique().tolist()
print(variable_list)

['ECOSYS_IFRAC', 'HMXL', 'Jint_100m_DIC', 'PD', 'SALT', 'SST', 'TBLT', 'TEMP', 'XMXL', 'diatChl', 'photoC_diat', 'photoC_sp', 'spChl']


In [34]:
base_recipe = {'name': 'monclim',
                 'description': 'compute monthly climatology',
                 'operators': [xc.operator(applied_method="time:mon_clim",
                                           module='esmlab.climatology',
                                           function='compute_mon_climatology')]}

In [35]:
variable_control = yaml.load(f'''
- slice: !!python/object/apply:slice
         args: ['1940', '1949']
  varlist: {variable_list}

- slice: !!python/object/apply:slice
         args: ['1990', '1999']
  varlist: {variable_list}

- slice: !!python/object/apply:slice
         args: ['2040', '2049']
  varlist: {variable_list}  
  
- slice: !!python/object/apply:slice
         args: ['2090', '2099']
  varlist: {variable_list}  

- slice: !!python/object/apply:slice
         args: ['1995', '2005']
  varlist: ['TEMP', 'SALT']
  
- slice: !!python/object/apply:slice
         args: ['1995', '2010']
  varlist: ['photoC_diat', 'photoC_sp', 'diatChl', 'spChl']
''')
print(yaml.dump(variable_control))

- slice: !!python/object/apply:builtins.slice ['1940', '1949', null]
  varlist: [ECOSYS_IFRAC, HMXL, Jint_100m_DIC, PD, SALT, SST, TBLT, TEMP, XMXL, diatChl,
    photoC_diat, photoC_sp, spChl]
- slice: !!python/object/apply:builtins.slice ['1990', '1999', null]
  varlist: [ECOSYS_IFRAC, HMXL, Jint_100m_DIC, PD, SALT, SST, TBLT, TEMP, XMXL, diatChl,
    photoC_diat, photoC_sp, spChl]
- slice: !!python/object/apply:builtins.slice ['2040', '2049', null]
  varlist: [ECOSYS_IFRAC, HMXL, Jint_100m_DIC, PD, SALT, SST, TBLT, TEMP, XMXL, diatChl,
    photoC_diat, photoC_sp, spChl]
- slice: !!python/object/apply:builtins.slice ['2090', '2099', null]
  varlist: [ECOSYS_IFRAC, HMXL, Jint_100m_DIC, PD, SALT, SST, TBLT, TEMP, XMXL, diatChl,
    photoC_diat, photoC_sp, spChl]
- slice: !!python/object/apply:builtins.slice ['1995', '2005', null]
  varlist: [TEMP, SALT]
- slice: !!python/object/apply:builtins.slice ['1995', '2010', null]
  varlist: [photoC_diat, photoC_sp, diatChl, spChl]



In [30]:
cluster = ncar_jobqueue.NCARCluster()
client = dask.distributed.Client(cluster)
n_workers = 9 * 6
cluster.scale(n_workers)

In [None]:
for d in variable_control:
    time_slice = d.get('slice')
    varlist = d.get('varlist')
    slice_str = f'{time_slice.start}-{time_slice.stop}'
    for v in varlist:
        
        print(f'working on {v}')
        query = {**dict(variable=v), **query_base, **resource_constraints}

        namer = lambda q: '.'.join(
            ['cesm1_le', 
             '+'.join(q['experiment']), 
             q['variable'], 
             base_recipe['name'], 
             slice_str])

        recipe = copy.deepcopy(base_recipe)
        recipe['operators'].insert(0, xc.operator(module='esmlab.utils.time', 
                                                  function='sel_time',
                                                  kwargs={'indexer_val': time_slice}))
        
        dc = xc.analyzed_collection(collection_obj=col,
                                    analysis_recipe=recipe,
                                    analysis_name=namer(query),
                                    overwrite_existing=False,
                                    file_format="nc",
                                    **query)


working on ECOSYS_IFRAC
working on HMXL
working on Jint_100m_DIC
working on PD
working on SALT
working on SST
working on TBLT
working on TEMP


INFO:xcollection.core:applying operator: <function sel_time at 0x2b76609e9488>
INFO:xcollection.core:applying operator: <function compute_mon_climatology at 0x2b76609e9510>
INFO:xcollection.core:writing /glade/scratch/mclong/future-arctic-data/cesm1_le.20C+RCP85.TEMP.monclim.1940-1949.028.nc
INFO:xcollection.core:applying operator: <function sel_time at 0x2b76609e9488>
INFO:xcollection.core:applying operator: <function compute_mon_climatology at 0x2b76609e9510>
INFO:xcollection.core:writing /glade/scratch/mclong/future-arctic-data/cesm1_le.20C+RCP85.TEMP.monclim.1940-1949.029.nc
INFO:xcollection.core:applying operator: <function sel_time at 0x2b76609e9488>
INFO:xcollection.core:applying operator: <function compute_mon_climatology at 0x2b76609e9510>
INFO:xcollection.core:writing /glade/scratch/mclong/future-arctic-data/cesm1_le.20C+RCP85.TEMP.monclim.1940-1949.030.nc
INFO:xcollection.core:applying operator: <function sel_time at 0x2b76609e9488>
INFO:xcollection.core:applying operator: <

working on XMXL


INFO:xcollection.core:applying operator: <function sel_time at 0x2b76609e9488>
INFO:xcollection.core:applying operator: <function compute_mon_climatology at 0x2b76609e9510>
INFO:xcollection.core:writing /glade/scratch/mclong/future-arctic-data/cesm1_le.20C+RCP85.XMXL.monclim.1940-1949.001.nc
INFO:xcollection.core:applying operator: <function sel_time at 0x2b76609e9488>
INFO:xcollection.core:applying operator: <function compute_mon_climatology at 0x2b76609e9510>
INFO:xcollection.core:writing /glade/scratch/mclong/future-arctic-data/cesm1_le.20C+RCP85.XMXL.monclim.1940-1949.002.nc
INFO:xcollection.core:applying operator: <function sel_time at 0x2b76609e9488>
INFO:xcollection.core:applying operator: <function compute_mon_climatology at 0x2b76609e9510>
INFO:xcollection.core:writing /glade/scratch/mclong/future-arctic-data/cesm1_le.20C+RCP85.XMXL.monclim.1940-1949.009.nc
INFO:xcollection.core:applying operator: <function sel_time at 0x2b76609e9488>
INFO:xcollection.core:applying operator: <

In [None]:
%load_ext watermark

In [None]:
%watermark --iversion -g -h -m -v -u -d