# How to build a catalog for a CESM run

## Import packages

In [1]:
from ecgtools import Builder
from ecgtools.parsers.cesm import parse_cesm_history

## Instatiate a Builder object

In [2]:
b = Builder(
    "/glade/scratch/kristenk/archive/",
    depth=2,
    parsing_func=parse_cesm_history,
    exclude_patterns=["*/tseries/*", "*/rest/*"],
    njobs=20,
)

## Build catalog and inspect built catalog

In [3]:
b = b.build()

[Parallel(n_jobs=20)]: Using backend LokyBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  32 tasks      | elapsed:    0.5s
[Parallel(n_jobs=20)]: Done 209 out of 248 | elapsed:    0.8s remaining:    0.1s
[Parallel(n_jobs=20)]: Done 248 out of 248 | elapsed:    1.1s finished
[Parallel(n_jobs=20)]: Using backend LokyBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  32 tasks      | elapsed:    4.4s
[Parallel(n_jobs=20)]: Done 122 tasks      | elapsed:    6.8s
[Parallel(n_jobs=20)]: Done 248 tasks      | elapsed:    9.7s
[Parallel(n_jobs=20)]: Done 410 tasks      | elapsed:   13.7s
[Parallel(n_jobs=20)]: Done 608 tasks      | elapsed:   18.9s
[Parallel(n_jobs=20)]: Done 842 tasks      | elapsed:   23.4s
[Parallel(n_jobs=20)]: Done 1112 tasks      | elapsed:   27.9s
[Parallel(n_jobs=20)]: Done 1418 tasks      | elapsed:   34.1s
[Parallel(n_jobs=20)]: Done 1760 tasks      | elapsed:   41.9s
[Parallel(n_jobs=20)]: Done 2138 tasks      | elapsed:   49.0s
[Paralle

In [4]:
b.df.head()

Unnamed: 0,component,stream,case,date,frequency,variables,path
0,ocn,pop.h.ecosys.nday1,g.e22a06.G1850ECOIAF_JRA_PHYS_DEV.TL319_g17.4p...,0053-01-01,day_1,"[photoC_sp_zint_2, photoC_diat_zint_2, photoC_...",/glade/scratch/kristenk/archive/g.e22a06.G1850...
1,ocn,pop.h,g.e22a06.G1850ECOIAF_JRA_PHYS_DEV.TL319_g17.4p...,0046-08,month_1,"[UVEL, UVEL2, VVEL, VVEL2, TEMP, dTEMP_POS_2D,...",/glade/scratch/kristenk/archive/g.e22a06.G1850...
2,ocn,pop.h,g.e22a06.G1850ECOIAF_JRA_PHYS_DEV.TL319_g17.4p...,0061-07,month_1,"[UVEL, UVEL2, VVEL, VVEL2, TEMP, dTEMP_POS_2D,...",/glade/scratch/kristenk/archive/g.e22a06.G1850...
3,ocn,pop.h,g.e22a06.G1850ECOIAF_JRA_PHYS_DEV.TL319_g17.4p...,0042-09,month_1,"[UVEL, UVEL2, VVEL, VVEL2, TEMP, dTEMP_POS_2D,...",/glade/scratch/kristenk/archive/g.e22a06.G1850...
4,ocn,pop.h,g.e22a06.G1850ECOIAF_JRA_PHYS_DEV.TL319_g17.4p...,0043-11,month_1,"[UVEL, UVEL2, VVEL, VVEL2, TEMP, dTEMP_POS_2D,...",/glade/scratch/kristenk/archive/g.e22a06.G1850...


In [5]:
b.invalid_assets

Unnamed: 0,INVALID_ASSET,TRACEBACK
189,/glade/scratch/kristenk/archive/g.e22a06.G1850...,"Traceback (most recent call last):\n File ""/g..."
1051,/glade/scratch/kristenk/archive/g.e22a06.G1850...,"Traceback (most recent call last):\n File ""/g..."
2250,/glade/scratch/kristenk/archive/g.e22a06.G1850...,"Traceback (most recent call last):\n File ""/g..."
4969,/glade/scratch/kristenk/archive/g.e22a06.G1850...,"Traceback (most recent call last):\n File ""/g..."
31455,/glade/scratch/kristenk/archive/g.e22a06.G1850...,"Traceback (most recent call last):\n File ""/g..."
32343,/glade/scratch/kristenk/archive/g.e22a06.G1850...,"Traceback (most recent call last):\n File ""/g..."
33218,/glade/scratch/kristenk/archive/g.e22a06.G1850...,"Traceback (most recent call last):\n File ""/g..."
33678,/glade/scratch/kristenk/archive/g.e22a06.G1850...,"Traceback (most recent call last):\n File ""/g..."
34841,/glade/scratch/kristenk/archive/g.e22a06.G1850...,"Traceback (most recent call last):\n File ""/g..."
35948,/glade/scratch/kristenk/archive/g.e22a06.G1850...,"Traceback (most recent call last):\n File ""/g..."


## Save built catalog to disk

In [6]:
b.save(
    "/glade/scratch/abanihi/cesm-hist-test.csv",
    path_column='path',
    variable_column='variables',
    data_format="netcdf",
    groupby_attrs=["component", "stream", "case"],
    aggregations=[
        {
            "type": "join_existing",
            "attribute_name": "date",
            "options": {"dim": "time", "coords": "minimal", "compat": "override"},
        }
    ],
)

Saved catalog location: /glade/scratch/abanihi/cesm-hist-test.json and /glade/scratch/abanihi/cesm-hist-test.csv


In [7]:
!cat /glade/scratch/abanihi/cesm-hist-test.json

{
  "catalog_file": "/glade/scratch/abanihi/cesm-hist-test.csv",
  "attributes": [
    {
      "column_name": "component",
      "vocabulary": ""
    },
    {
      "column_name": "stream",
      "vocabulary": ""
    },
    {
      "column_name": "case",
      "vocabulary": ""
    },
    {
      "column_name": "date",
      "vocabulary": ""
    },
    {
      "column_name": "frequency",
      "vocabulary": ""
    },
    {
      "column_name": "variables",
      "vocabulary": ""
    },
    {
      "column_name": "path",
      "vocabulary": ""
    }
  ],
  "assets": {
    "column_name": "path",
    "format": "netcdf"
  },
  "aggregation_control": {
    "variable_column": "variables",
    "groupby_attrs": [
      "component",
      "stream",
      "case"
    ],
    "aggregations": [
      {
        "type": "join_existing",
        "attribute_name": "date",
        "options": {
          "dim": "time",
          "coords": "minimal",
          "compat": "override"
        }
      }
    ]
  