## accessing CMIP6 data in 'the cloud'

In [2]:
from matplotlib import pyplot as plt
import xarray as xr
import numpy as np
import dask
import intake
import fsspec

%matplotlib inline

### small function to drop all variables with `_bounds` and `_bnds` suffix 

In [3]:
def drop_all_bounds(ds):
    drop_vars = [vname for vname in ds.coords
                 if (('_bounds') in vname ) or ('_bnds') in vname]
    return ds.drop(drop_vars)

[Intake](https://github.com/intake/intake) is a package for finding, investigating, loading and disseminating data. 

[Intake ESM ](https://intake-esm.readthedocs.io/en/latest/index.html) is an experimental new package that aims to provide a higher-level interface to searching and loading Earth System Model data archives, such as CMIP6

In [4]:
col = intake.open_esm_datastore("https://storage.googleapis.com/cmip6/pangeo-cmip6.json")

In [5]:
col

Unnamed: 0,unique
activity_id,18
institution_id,36
source_id,88
experiment_id,170
member_id,657
table_id,37
variable_id,700
grid_label,10
zstore,514961
dcpp_init_year,60


In [None]:
type(col)

In [None]:
col.df.head()

In [None]:
col.df.columns

In [None]:
[eid for eid in col.df['experiment_id'].unique() if 'ssp' in eid]

In [None]:
expts = ['historical', 'ssp245', 'ssp585']

In [None]:
query = dict(
    experiment_id=expts,
    table_id='Amon',                           
    variable_id=['tas'],
    member_id = 'r1i1p1f1',                     
)

In [None]:
query

In [None]:
col_subset = col.search(require_all_on=["source_id"], **query)

In [None]:
col_subset.df.groupby("source_id")[["experiment_id", "variable_id", "table_id"]].nunique()

In [None]:
query.update({'source_id':'ACCESS-CM2'})

In [None]:
query

In [None]:
col_subset = col.search(require_all_on=["source_id"], **query)

In [None]:
col_subset.df

In [None]:
loc_historical = col_subset.df.query("experiment_id == 'historical'").zstore.values[0]

In [None]:
loc_historical

In [None]:
loc_ssp245 = col_subset.df.query("experiment_id == 'ssp245'").zstore.values[0]

In [None]:
loc_ssp245

In [None]:
loc_ssp585 = col_subset.df.query("experiment_id == 'ssp585'").zstore.values[0]

In [None]:
loc_ssp585

### open the datasets 

In [None]:
dset_historical = xr.open_zarr(fsspec.get_mapper(loc_historical), consolidated=True)

In [None]:
dset_historical.info()

In [None]:
dset_historical = drop_all_bounds(dset_historical)

In [None]:
dset_historical

In [None]:
dset_ssp245 = xr.open_zarr(fsspec.get_mapper(loc_ssp245), consolidated=True)
dset_ssp245 = drop_all_bounds(dset_ssp245)

In [None]:
dset_ssp585 = xr.open_zarr(fsspec.get_mapper(loc_ssp585), consolidated=True)
dset_ssp585 = drop_all_bounds(dset_ssp585)

In [None]:
dset_ssp585