### Set up Workspace

In [1]:
%matplotlib inline

import xarray as xr
import intake

# util.py is in the local directory
# it contains code that is common across project notebooks
# or routines that are too extensive and might otherwise clutter
# the notebook design
import util 



### Choose Settings for what to load into dictionary

In [26]:
this_experiment_id = ['historical','ssp585']
this_table_id = 'Amon'
this_variable_id = 'tas'
this_grid_label='gn'

### Load Data into Data Dictionary

In [27]:
if util.is_ncar_host():
    col = intake.open_esm_datastore("../catalogs/glade-cmip6.json")
else:
    col = intake.open_esm_datastore("../catalogs/pangeo-cmip6.json")

In [28]:
cat = col.search(experiment_id=this_experiment_id, \
                 table_id=this_table_id, \
                 variable_id=this_variable_id, \
                 grid_label=this_grid_label)
dataset_info = cat.df

Unnamed: 0,activity_id,institution_id,source_id,experiment_id,member_id,table_id,variable_id,grid_label,zstore,dcpp_init_year
289,CMIP,BCC,BCC-CSM2-MR,historical,r1i1p1f1,Amon,tas,gn,gs://cmip6/CMIP/BCC/BCC-CSM2-MR/historical/r1i...,
375,CMIP,BCC,BCC-CSM2-MR,historical,r2i1p1f1,Amon,tas,gn,gs://cmip6/CMIP/BCC/BCC-CSM2-MR/historical/r2i...,
430,CMIP,BCC,BCC-CSM2-MR,historical,r3i1p1f1,Amon,tas,gn,gs://cmip6/CMIP/BCC/BCC-CSM2-MR/historical/r3i...,
552,CMIP,BCC,BCC-ESM1,historical,r1i1p1f1,Amon,tas,gn,gs://cmip6/CMIP/BCC/BCC-ESM1/historical/r1i1p1...,
638,CMIP,BCC,BCC-ESM1,historical,r2i1p1f1,Amon,tas,gn,gs://cmip6/CMIP/BCC/BCC-ESM1/historical/r2i1p1...,
693,CMIP,BCC,BCC-ESM1,historical,r3i1p1f1,Amon,tas,gn,gs://cmip6/CMIP/BCC/BCC-ESM1/historical/r3i1p1...,
827,CMIP,CAMS,CAMS-CSM1-0,historical,r1i1p1f1,Amon,tas,gn,gs://cmip6/CMIP/CAMS/CAMS-CSM1-0/historical/r1...,
856,CMIP,CAMS,CAMS-CSM1-0,historical,r2i1p1f1,Amon,tas,gn,gs://cmip6/CMIP/CAMS/CAMS-CSM1-0/historical/r2...,
1409,CMIP,CCCma,CanESM5,historical,r10i1p1f1,Amon,tas,gn,gs://cmip6/CMIP/CCCma/CanESM5/historical/r10i1...,
1524,CMIP,CCCma,CanESM5,historical,r10i1p2f1,Amon,tas,gn,gs://cmip6/CMIP/CCCma/CanESM5/historical/r10i1...,


In [29]:
dset_dict = cat.to_dataset_dict(zarr_kwargs={'consolidated': True, 'decode_times': False}, 
                                cdf_kwargs={'chunks': {}, 'decode_times': False})
dset_dict.keys()

--> The keys in the returned dictionary of datasets are constructed as follows:
	'activity_id.institution_id.source_id.experiment_id.table_id.grid_label'

--> There will be 28 group(s)


dict_keys(['CMIP.BCC.BCC-CSM2-MR.historical.Amon.gn', 'CMIP.BCC.BCC-ESM1.historical.Amon.gn', 'CMIP.CAMS.CAMS-CSM1-0.historical.Amon.gn', 'CMIP.CCCma.CanESM5.historical.Amon.gn', 'CMIP.MIROC.MIROC-ES2L.historical.Amon.gn', 'CMIP.MIROC.MIROC6.historical.Amon.gn', 'CMIP.MOHC.HadGEM3-GC31-LL.historical.Amon.gn', 'CMIP.MOHC.UKESM1-0-LL.historical.Amon.gn', 'CMIP.MRI.MRI-ESM2-0.historical.Amon.gn', 'CMIP.NASA-GISS.GISS-E2-1-G.historical.Amon.gn', 'CMIP.NASA-GISS.GISS-E2-1-G-CC.historical.Amon.gn', 'CMIP.NASA-GISS.GISS-E2-1-H.historical.Amon.gn', 'CMIP.NCAR.CESM2.historical.Amon.gn', 'CMIP.NCAR.CESM2-WACCM.historical.Amon.gn', 'CMIP.NCC.NorCPM1.historical.Amon.gn', 'CMIP.NUIST.NESM3.historical.Amon.gn', 'CMIP.SNU.SAM0-UNICON.historical.Amon.gn', 'CMIP.UA.MCM-UA-1-0.historical.Amon.gn', 'ScenarioMIP.BCC.BCC-CSM2-MR.ssp585.Amon.gn', 'ScenarioMIP.CAMS.CAMS-CSM1-0.ssp585.Amon.gn', 'ScenarioMIP.CCCma.CanESM5.ssp585.Amon.gn', 'ScenarioMIP.DKRZ.MPI-ESM1-2-HR.ssp585.Amon.gn', 'ScenarioMIP.MIROC.MIRO

### Create Pandas Dataset

Structure: rows = models; columns = scenarios; data = timeseries

In [86]:
source_ids = cat.df['source_id']
modelnames = list(set(source_ids))
scenarionames = ['historical','ssp85']

In [178]:
import pandas as pd
table_id='Amon'
grid_label='gn'

df = pd.DataFrame(index=modelnames) 
for expname in this_experiment_id:
    experiment_id=expname
    expvals = []
    if expname=='historical':
        activity_id='CMIP'
    else:
        activity_id='ScenarioMIP'
    for modelname in modelnames:
        source_id = modelname
        dataset_info_subset = dataset_info[dataset_info['source_id']==source_id]
        institution_id = list(set(dataset_info_subset['institution_id']))[0]
        nametag = activity_id+'.'+institution_id+'.'+source_id+'.'+experiment_id+'.'+table_id+'.'+grid_label
        #print(nametag)
        if nametag in dset_dict:
            thisdata=dset_dict[nametag]
            #print(np.shape(thisdata['time'].values))
        else:
            thisdata='No data'
        expvals.append(thisdata)
    df[expname]=expvals

In [181]:
df['historical']['GISS-E2-1-G-CC'][this_variable_id]

<xarray.DataArray 'tas' (member_id: 1, time: 1980, lat: 90, lon: 144)>
dask.array<transpose, shape=(1, 1980, 90, 144), dtype=float32, chunksize=(1, 1980, 90, 144), chunktype=numpy.ndarray>
Coordinates:
  * lon        (lon) float64 1.25 3.75 6.25 8.75 ... 351.2 353.8 356.2 358.8
  * lat        (lat) float64 -89.0 -87.0 -85.0 -83.0 ... 83.0 85.0 87.0 89.0
  * time       (time) int64 0 708 1416 2148 ... 1442460 1443192 1443924 1444656
  * member_id  (member_id) <U8 'r1i1p1f1'
Attributes:
    cell_measures:  area: areacella
    cell_methods:   area: time: mean
    comment:        near-surface (usually, 2 meter) air temperature
    long_name:      Near-Surface Air Temperature
    standard_name:  air_temperature
    units:          K

In [190]:
df

Unnamed: 0,historical,ssp585
GISS-E2-1-G-CC,"[time_bnds, lon_bnds, height, lat_bnds, tas]",No data
CESM2-WACCM,"[time_bnds, lon_bnds, lat_bnds, tas]","[time_bnds, lon_bnds, lat_bnds, tas]"
SAM0-UNICON,"[time_bnds, lon_bnds, height, lat_bnds, tas]",No data
NESM3,"[time_bnds, lon_bnds, height, lat_bnds, tas]",No data
UKESM1-0-LL,"[time_bnds, lon_bnds, height, lat_bnds, tas]","[time_bnds, lon_bnds, height, lat_bnds, tas]"
GISS-E2-1-G,"[time_bnds, lon_bnds, height, lat_bnds, tas]",No data
MCM-UA-1-0,"[height, time_bnds, lon_bnds, lat_bnds, areace...",No data
CanESM5,"[time_bnds, lon_bnds, height, lat_bnds, tas]","[time_bnds, lon_bnds, height, lat_bnds, tas]"
MIROC6,"[time_bnds, lon_bnds, height, lat_bnds, tas]","[time_bnds, lon_bnds, height, lat_bnds, tas]"
BCC-ESM1,"[time_bnds, lon_bnds, height, lat_bnds, tas]",No data
