# How to build a catalog for a CESM run

## Import packages

In [1]:
from ecgtools import Builder
from ecgtools.parsers.cesm import parse_cesm_history

## Instatiate a Builder object

In [2]:
b = Builder(
    "/glade/scratch/kristenk/archive/",
    depth=2,
    parsing_func=parse_cesm_history,
    exclude_patterns=["*/tseries/*", "*/rest/*"],
    njobs=20,
)

## Build catalog and inspect built catalog

In [3]:
b = b.build()

[Parallel(n_jobs=20)]: Using backend LokyBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  32 tasks      | elapsed:    0.4s
[Parallel(n_jobs=20)]: Done 248 out of 248 | elapsed:    0.8s finished
[Parallel(n_jobs=20)]: Using backend LokyBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  32 tasks      | elapsed:    2.5s
[Parallel(n_jobs=20)]: Done 122 tasks      | elapsed:    4.0s
[Parallel(n_jobs=20)]: Done 248 tasks      | elapsed:    6.0s
[Parallel(n_jobs=20)]: Done 410 tasks      | elapsed:    9.0s
[Parallel(n_jobs=20)]: Done 608 tasks      | elapsed:   12.3s
[Parallel(n_jobs=20)]: Done 842 tasks      | elapsed:   14.9s
[Parallel(n_jobs=20)]: Done 1112 tasks      | elapsed:   17.8s
[Parallel(n_jobs=20)]: Done 1418 tasks      | elapsed:   22.7s
[Parallel(n_jobs=20)]: Done 1760 tasks      | elapsed:   28.0s
[Parallel(n_jobs=20)]: Done 2336 tasks      | elapsed:   35.2s
[Parallel(n_jobs=20)]: Done 3164 tasks      | elapsed:   46.1s
[Parallel(n_jobs=20)]: Don

In [4]:
b.df.head()

Unnamed: 0,component,stream,case,frequency,date,variables,path
0,ocn,pop.h.ecosys.nday1,g.e22a06.g1850ecoiaf_jra_phys_dev.tl319_g17.4p...,day_1,0053-01-01,"[photoC_sp_zint_2, photoC_diat_zint_2, photoC_...",/glade/scratch/kristenk/archive/g.e22a06.G1850...
1,ocn,pop.h,g.e22a06.g1850ecoiaf_jra_phys_dev.tl319_g17.4p...,month_1,0046-08,"[UVEL, UVEL2, VVEL, VVEL2, TEMP, dTEMP_POS_2D,...",/glade/scratch/kristenk/archive/g.e22a06.G1850...
2,ocn,pop.h,g.e22a06.g1850ecoiaf_jra_phys_dev.tl319_g17.4p...,month_1,0061-07,"[UVEL, UVEL2, VVEL, VVEL2, TEMP, dTEMP_POS_2D,...",/glade/scratch/kristenk/archive/g.e22a06.G1850...
3,ocn,pop.h,g.e22a06.g1850ecoiaf_jra_phys_dev.tl319_g17.4p...,month_1,0042-09,"[UVEL, UVEL2, VVEL, VVEL2, TEMP, dTEMP_POS_2D,...",/glade/scratch/kristenk/archive/g.e22a06.G1850...
4,ocn,pop.h,g.e22a06.g1850ecoiaf_jra_phys_dev.tl319_g17.4p...,month_1,0043-11,"[UVEL, UVEL2, VVEL, VVEL2, TEMP, dTEMP_POS_2D,...",/glade/scratch/kristenk/archive/g.e22a06.G1850...


In [5]:
b.invalid_assets

Unnamed: 0,INVALID_ASSET,TRACEBACK
189,/glade/scratch/kristenk/archive/g.e22a06.G1850...,"Traceback (most recent call last):\n File ""/g..."
742,/glade/scratch/kristenk/archive/g.e22a06.G1850...,"Traceback (most recent call last):\n File ""/g..."
743,/glade/scratch/kristenk/archive/g.e22a06.G1850...,"Traceback (most recent call last):\n File ""/g..."
744,/glade/scratch/kristenk/archive/g.e22a06.G1850...,"Traceback (most recent call last):\n File ""/g..."
745,/glade/scratch/kristenk/archive/g.e22a06.G1850...,"Traceback (most recent call last):\n File ""/g..."
...,...,...
45122,/glade/scratch/kristenk/archive/g.e22a06.G1850...,"Traceback (most recent call last):\n File ""/g..."
45123,/glade/scratch/kristenk/archive/g.e22a06.G1850...,"Traceback (most recent call last):\n File ""/g..."
45124,/glade/scratch/kristenk/archive/g.e22a06.G1850...,"Traceback (most recent call last):\n File ""/g..."
45125,/glade/scratch/kristenk/archive/g.e22a06.G1850...,"Traceback (most recent call last):\n File ""/g..."


## Save built catalog to disk

In [6]:
b.save(
    "/glade/scratch/abanihi/cesm-hist-test.csv",
    path_column='path',
    variable_column='variables',
    data_format="netcdf",
    groupby_attrs=["component", "stream", "case"],
    aggregations=[
        {
            "type": "join_existing",
            "attribute_name": "date",
            "options": {"dim": "time", "coords": "minimal", "compat": "override"},
        }
    ],
)

Saved catalog location: /glade/scratch/abanihi/cesm-hist-test.json and /glade/scratch/abanihi/cesm-hist-test.csv
