In [41]:
import os

import ast
import intake

from ecgtools import Builder
from ecgtools.parsers.cesm import parse_cesm_history

In [28]:
archive = '/glade/scratch/mclong/archive'

data_info = dict(
    ctrl=dict(
        casename='g.e22.GOMIPECOIAF_JRA-1p4-2018.TL319_g17.SMYLE.005',
        histpath='/glade/campaign/cesm/development/espwg/SMYLE/initial_conditions/SMYLE-FOSI/',
    ),
)

runs = [
    'g.e22.GOMIPECOIAF_JRA-1p4-2018.TL319_g17.SMYLE.005.PCref2x', 
#    'g.e22.GOMIPECOIAF_JRA-1p4-2018.TL319_g17.SMYLE.005.PCref1.5x',
#    'g.e22.GOMIPECOIAF_JRA-1p4-2018.TL319_g17.SMYLE.005.PCref1.2x',
]
        
        
data_info.update(
    {k.split('.')[-1]: dict(casename=k, histpath=f'{archive}/{k}')
    for k in runs}
)
data_info

{'ctrl': {'casename': 'g.e22.GOMIPECOIAF_JRA-1p4-2018.TL319_g17.SMYLE.005',
  'histpath': '/glade/campaign/cesm/development/espwg/SMYLE/initial_conditions/SMYLE-FOSI/'},
 'PCref2x': {'casename': 'g.e22.GOMIPECOIAF_JRA-1p4-2018.TL319_g17.SMYLE.005.PCref2x',
  'histpath': '/glade/scratch/mclong/archive/g.e22.GOMIPECOIAF_JRA-1p4-2018.TL319_g17.SMYLE.005.PCref2x'}}

In [35]:
b = Builder(
    data_info["ctrl"]["histpath"],
    depth=1,
    exclude_patterns=["*/rest/*", "*/logs/*", "*/proc/*"],
    njobs=5,
)
b = b.build( 
    parse_cesm_history,
)

[Parallel(n_jobs=5)]: Using backend LokyBackend with 5 concurrent workers.
[Parallel(n_jobs=5)]: Done   8 tasks      | elapsed:    0.0s
[Parallel(n_jobs=5)]: Done 604 tasks      | elapsed:    0.4s
[Parallel(n_jobs=5)]: Done 756 out of 756 | elapsed:    0.7s finished
[Parallel(n_jobs=5)]: Using backend LokyBackend with 5 concurrent workers.
[Parallel(n_jobs=5)]: Done   8 tasks      | elapsed:    0.7s
[Parallel(n_jobs=5)]: Done  62 tasks      | elapsed:    4.5s
[Parallel(n_jobs=5)]: Done 152 tasks      | elapsed:   11.2s
[Parallel(n_jobs=5)]: Done 278 tasks      | elapsed:   20.0s
[Parallel(n_jobs=5)]: Done 440 tasks      | elapsed:   31.6s
[Parallel(n_jobs=5)]: Done 638 tasks      | elapsed:   45.8s
[Parallel(n_jobs=5)]: Done 872 tasks      | elapsed:  1.0min
[Parallel(n_jobs=5)]: Done 1142 tasks      | elapsed:  1.4min
[Parallel(n_jobs=5)]: Done 1448 tasks      | elapsed:  1.7min
[Parallel(n_jobs=5)]: Done 1790 tasks      | elapsed:  2.1min
[Parallel(n_jobs=5)]: Done 2168 tasks      | 

In [42]:
os.makedirs("catalogs", exist_ok=True)
b.save(
    "catalogs/smyle-fosi-ctrl.csv.gz",
    path_column_name='path',
    variable_column_name='variables',
    data_format="netcdf",
    groupby_attrs=["component", "stream", "case"],
    aggregations=[
        {
            "type": "join_existing",
            "attribute_name": "date",
            "options": {"dim": "time", "coords": "minimal", "compat": "override"},
        }
    ],
)

Saved catalog location: catalogs/smyle-fosi-ctrl.csv.json and catalogs/smyle-fosi-ctrl.csv.gz


In [48]:
cat = intake.open_esm_datastore(
    'catalogs/smyle-fosi-ctrl.csv.json',
    csv_kwargs={"converters": {"variables": ast.literal_eval}},
)
cat

Unnamed: 0,unique
component,2
stream,5
date,5235
case,1
member_id,1
frequency,3
variables,620
path,6015


In [57]:
cat_sub = cat.search(variables='FG_CO2')
cat_sub.df

Unnamed: 0,component,stream,date,case,member_id,frequency,variables,path
0,ocn,pop.h,0306-01,g.e22.GOMIPECOIAF_JRA-1p4-2018.TL319_g17.SMYLE...,5,month_1,"[UVEL, UVEL2, VVEL, VVEL2, TEMP, dTEMP_POS_2D,...",/glade/campaign/cesm/development/espwg/SMYLE/i...
1,ocn,pop.h,0306-02,g.e22.GOMIPECOIAF_JRA-1p4-2018.TL319_g17.SMYLE...,5,month_1,"[UVEL, UVEL2, VVEL, VVEL2, TEMP, dTEMP_POS_2D,...",/glade/campaign/cesm/development/espwg/SMYLE/i...
2,ocn,pop.h,0306-03,g.e22.GOMIPECOIAF_JRA-1p4-2018.TL319_g17.SMYLE...,5,month_1,"[UVEL, UVEL2, VVEL, VVEL2, TEMP, dTEMP_POS_2D,...",/glade/campaign/cesm/development/espwg/SMYLE/i...
3,ocn,pop.h,0306-04,g.e22.GOMIPECOIAF_JRA-1p4-2018.TL319_g17.SMYLE...,5,month_1,"[UVEL, UVEL2, VVEL, VVEL2, TEMP, dTEMP_POS_2D,...",/glade/campaign/cesm/development/espwg/SMYLE/i...
4,ocn,pop.h,0306-05,g.e22.GOMIPECOIAF_JRA-1p4-2018.TL319_g17.SMYLE...,5,month_1,"[UVEL, UVEL2, VVEL, VVEL2, TEMP, dTEMP_POS_2D,...",/glade/campaign/cesm/development/espwg/SMYLE/i...
...,...,...,...,...,...,...,...,...
751,ocn,pop.h,0368-08,g.e22.GOMIPECOIAF_JRA-1p4-2018.TL319_g17.SMYLE...,5,month_1,"[UVEL, UVEL2, VVEL, VVEL2, TEMP, dTEMP_POS_2D,...",/glade/campaign/cesm/development/espwg/SMYLE/i...
752,ocn,pop.h,0368-09,g.e22.GOMIPECOIAF_JRA-1p4-2018.TL319_g17.SMYLE...,5,month_1,"[UVEL, UVEL2, VVEL, VVEL2, TEMP, dTEMP_POS_2D,...",/glade/campaign/cesm/development/espwg/SMYLE/i...
753,ocn,pop.h,0368-10,g.e22.GOMIPECOIAF_JRA-1p4-2018.TL319_g17.SMYLE...,5,month_1,"[UVEL, UVEL2, VVEL, VVEL2, TEMP, dTEMP_POS_2D,...",/glade/campaign/cesm/development/espwg/SMYLE/i...
754,ocn,pop.h,0368-11,g.e22.GOMIPECOIAF_JRA-1p4-2018.TL319_g17.SMYLE...,5,month_1,"[UVEL, UVEL2, VVEL, VVEL2, TEMP, dTEMP_POS_2D,...",/glade/campaign/cesm/development/espwg/SMYLE/i...


In [58]:
cdf_kwargs=dict(
    use_cftime=True, 
    chunks={'time': 12},
)
dsets = cat_sub.to_dataset_dict(cdf_kwargs=cdf_kwargs)
dsets


--> The keys in the returned dictionary of datasets are constructed as follows:
	'component.stream.case'
█

OSError: 
            Failed to open netCDF/HDF dataset.

            *** Arguments passed to xarray.open_dataset() ***:

            - filename_or_obj: /glade/campaign/cesm/development/espwg/SMYLE/initial_conditions/SMYLE-FOSI/ocn/hist/g.e22.GOMIPECOIAF_JRA-1p4-2018.TL319_g17.SMYLE.005.pop.h.0350-04.nc
            - kwargs: {'use_cftime': True, 'chunks': {'time': 12}}

            *** fsspec options used ***:

            - root: /glade/campaign/cesm/development/espwg/SMYLE/initial_conditions/SMYLE-FOSI/ocn/hist/g.e22.GOMIPECOIAF_JRA-1p4-2018.TL319_g17.SMYLE.005.pop.h.0350-04.nc
            - protocol: None

            ********************************************
            

In [59]:
import xarray as xr

In [61]:
ds = xr.open_dataset("/glade/campaign/cesm/development/espwg/SMYLE/initial_conditions/SMYLE-FOSI/ocn/hist/g.e22.GOMIPECOIAF_JRA-1p4-2018.TL319_g17.SMYLE.005.pop.h.0350-04.nc")
ds