# Data collections used in this project

This notebook documents some basic commands to access collections.

In [1]:
import intake
import intake_esm

`intake_esm` has a configuration file: .intake_esm/config.yaml

This specifies the location of collection files.

In [2]:
for key in ['data-cache-directory', 'database-directory']:
    print(f'{key}: {intake_esm.config.get(key)}')

data-cache-directory: /glade/p/cgd/oce/projects/cesm2-marbl/intake-esm-data
database-directory: /glade/p/cgd/oce/projects/cesm2-marbl/collections


## Collections

In [3]:
cesm2 = intake.open_esm_metadatastore(collection_name='CESM2-CMIP6')
cesm2.df.head()

Unnamed: 0_level_0,resource,resource_type,direct_access,experiment,case,component,stream,variable,date_range,member_id,file_fullpath,file_basename,ctrl_branch_year,year_offset,sequence_order,grid
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
0,esm-piControl:glade:posix:/glade/collections/c...,posix,True,esm-piControl,b.e21.B1850.f09_g17.CMIP6-esm-piControl.001,atm,cam.h1,ACTNL,00010101-00101231,0,/glade/collections/cdg/timeseries-cmip6/b.e21....,b.e21.B1850.f09_g17.CMIP6-esm-piControl.001.ca...,501,,0,
1,esm-piControl:glade:posix:/glade/collections/c...,posix,True,esm-piControl,b.e21.B1850.f09_g17.CMIP6-esm-piControl.001,atm,cam.h1,ACTNL,00110101-00201231,0,/glade/collections/cdg/timeseries-cmip6/b.e21....,b.e21.B1850.f09_g17.CMIP6-esm-piControl.001.ca...,501,,0,
2,esm-piControl:glade:posix:/glade/collections/c...,posix,True,esm-piControl,b.e21.B1850.f09_g17.CMIP6-esm-piControl.001,atm,cam.h1,ACTNL,00210101-00301231,0,/glade/collections/cdg/timeseries-cmip6/b.e21....,b.e21.B1850.f09_g17.CMIP6-esm-piControl.001.ca...,501,,0,
3,esm-piControl:glade:posix:/glade/collections/c...,posix,True,esm-piControl,b.e21.B1850.f09_g17.CMIP6-esm-piControl.001,atm,cam.h1,ACTNL,00310101-00401231,0,/glade/collections/cdg/timeseries-cmip6/b.e21....,b.e21.B1850.f09_g17.CMIP6-esm-piControl.001.ca...,501,,0,
4,esm-piControl:glade:posix:/glade/collections/c...,posix,True,esm-piControl,b.e21.B1850.f09_g17.CMIP6-esm-piControl.001,atm,cam.h1,ACTNL,00410101-00501231,0,/glade/collections/cdg/timeseries-cmip6/b.e21....,b.e21.B1850.f09_g17.CMIP6-esm-piControl.001.ca...,501,,0,


## Experiments contained in this collection

In [4]:
cesm2.df.experiment.unique().tolist()

['esm-piControl',
 'piControl',
 '1pctCO2-bgc',
 '1pctCO2',
 'abrupt-4xCO2',
 'historical',
 'esm-hist',
 'piControl-waccm',
 'historical-waccm',
 'SSP5-8.5']

## Components

In [5]:
cesm2.df.component.unique().tolist()

['atm', 'glc', 'ice', 'lnd', 'ocn', 'rof']

## Variables

In [6]:
print('[%s]' % ', '.join(map(str, cesm2.search(component='ocn').df.variable.unique().tolist())))

[CaCO3_form_zint_2, DpCO2_2, ECOSYS_IFRAC_2, ECOSYS_XKW_2, FG_CO2_2, STF_O2_2, diatC_zint_100m, diatChl_SURF, diazC_zint_100m, diazChl_SURF, photoC_diat_zint_2, photoC_diaz_zint_2, photoC_sp_zint_2, spC_zint_100m, spCaCO3_zint_100m, spChl_SURF, zooC_zint_100m, HMXL_DR_2, SSS, SST, SST2, XMXL_2, ABIO_ALK_SURF, ABIO_CO2STAR, ABIO_D14Catm, ABIO_D14Cocn, ABIO_DCO2STAR, ABIO_DIC, ABIO_DIC14, ABIO_DpCO2, ABIO_PH_SURF, ABIO_pCO2, ABIO_pCO2SURF, ADVS, ADVS_ISOP, ADVS_SUBM, ADVT, ADVT_ISOP, ADVT_SUBM, ADV_3D_SALT, ADV_3D_TEMP, ALK, ALK_ALT_CO2, ALK_ALT_CO2_RESTORE_TEND, ALK_ALT_CO2_RIV_FLUX, ALK_RESTORE_TEND, ALK_RIV_FLUX, AOU, ATM_ALT_CO2, ATM_BLACK_CARBON_FLUX_CPL, ATM_CO2, ATM_COARSE_DUST_FLUX_CPL, ATM_FINE_DUST_FLUX_CPL, BSF, CO2STAR, CO2STAR_ALT_CO2, CO3, CaCO3_FLUX_100m, CaCO3_PROD_zint, CaCO3_PROD_zint_100m, CaCO3_REMIN_zint, CaCO3_REMIN_zint_100m, CaCO3_form_zint, CaCO3_form_zint_100m, DCO2STAR, DCO2STAR_ALT_CO2, DENITRIF, DIA_DEPTH, DIA_IMPVF_SALT, DIA_IMPVF_TEMP, DIC, DIC_ALT_CO2, DIC