# Demo for using C3S intake catalog

Intake Example:
https://github.com/intake/intake-examples/blob/master/tutorial/data_scientist.ipynb


In [None]:
import intake

In [None]:
# filter catalog
def filter_by_time(df, collection, time=None):
    # a common search we do in rook
    start = end = None
    if time:
        if "/" in time:
            start, end = time.split("/")
            start = start.strip()
            end = end.strip()
        else:
            start = time.strip()
    
    start = start or "1800-01-01"
    end = end or "2500-12-31"
    
    sdf = df.fillna({'start_time': '1000-01-01T12:00:00', 'end_time': '3000-12-31T12:00:00'})

    result = sdf.loc[(sdf.ds_id == collection) & (sdf.end_time >= start) & (sdf.start_time <= end)]
    return list(result.path.sort_values().to_dict().values())
    

## Open remote catalog

In [None]:
cat_url = "https://raw.githubusercontent.com/cp4cds/c3s_34g_manifests/master/intake/catalogs/c3s.yaml"
# cat_url = "https://github.com/cehbrecht/c3s_34g_manifests/raw/fix-intake-catalog/intake/catalogs/c3s.yaml"
cat = intake.open_catalog(cat_url)
list(cat)

## Load catalog for c3s-cmip6
Catalogs will be cached locally in `~/.intake/cache`.

See: https://intake.readthedocs.io/en/latest/catalog.html?highlight=simplecache#caching-source-files-locally

In [None]:
print(cat['c3s-cmip6'])

In [None]:
df_cmip6 = cat['c3s-cmip6'].read()
df_cmip6

### Filter dataset dataset by time

In [None]:
result = filter_by_time(
    df_cmip6, 
    collection="c3s-cmip6.CMIP.SNU.SAM0-UNICON.historical.r1i1p1f1.day.pr.gn.v20190323",
    time="2000-01-01/2001-12-31")
result

### Search dataset ...

In [None]:
df = df_cmip6.loc[
    (df_cmip6.variable_id=="tas") 
    & (df_cmip6.experiment_id=="historical")
    & (df_cmip6.table_id=="day")
    & (df_cmip6.member_id=="r1i1p1f1")
    & (df_cmip6.institution_id=="MIROC")
]
df.head()

In [None]:
df.ds_id.unique()

## Load Catalog for C3S-CORDEX

In [None]:
print(cat['c3s-cordex'])

In [None]:
df_cordex = cat['c3s-cordex'].read()
df_cordex

In [None]:
df = df_cordex.loc[
    (df_cordex.variable=="tas") 
    & (df_cordex.experiment=="rcp85")
    & (df_cordex.domain=="EUR-11")
    & (df_cordex.time_frequency=="mon")
    & (df_cordex.driving_model=="MOHC-HadGEM2-ES")
    & (df_cordex.ensemble=="r1i1p1")
]
df.head()

In [None]:
collection = df.ds_id.unique()[0]
collection

In [None]:
result = filter_by_time(
    df_cordex, 
    collection=collection,
    time="2006-01-01/2006-12-31")
result

## Load Catalog for C3S-CMIP5

In [None]:
print(cat['c3s-cmip5'])

In [None]:
df_cmip5 = cat['c3s-cmip5'].read()
df_cmip5

In [None]:
df = df_cmip5.loc[
    (df_cmip5.variable=="tas") 
    & (df_cmip5.experiment=="rcp85")
    & (df_cmip5.time_frequency=="mon")
    & (df_cmip5.model=="bcc-csm1-1-m")
    & (df_cmip5.ensemble=="r1i1p1")
]
df.head()

In [None]:
collection = df.ds_id.unique()[0]
collection

In [None]:
result = filter_by_time(
    df_cmip5, 
    collection=collection,
    time="2006-01-01/2006-12-31")
result