In [None]:
def createDataDict(this_experiment_id, this_variable_id, this_table_id, this_grid_label):
    if util.is_ncar_host():
        col = intake.open_esm_datastore("../catalogs/glade-cmip6.json")
    else:
        col = intake.open_esm_datastore("../catalogs/pangeo-cmip6.json")
    
    cat = col.search(experiment_id=this_experiment_id, \
                     table_id=this_table_id, \
                     variable_id=this_variable_id, \
                     grid_label=this_grid_label)
    dataset_info = cat.df
    
    dset_dict = cat.to_dataset_dict(zarr_kwargs={'consolidated': True, 'decode_times': False}, 
                                cdf_kwargs={'chunks': {}, 'decode_times': False})
    #dset_dict.keys()
    
    source_ids = cat.df['source_id']
    modelnames = list(set(source_ids))
    
    return dataset_info, dset_dict, modelnames

In [None]:
this_experiment_id='historical'
this_activity_id='CMIP'
this_table_id='Amon'
this_variable_id='ts'
this_grid_label='gn'

[dataset_info, dset_dict, modelnames]=createDataDict(this_experiment_id, this_variable_id, this_table_id, this_grid_label)

In [None]:
def reindex_time(startingtimes):
    newtimes = startingtimes.values
    for i in range(0,len(startingtimes)):
        yr = int(str(startingtimes.values[i])[0:4])
        mon = int(str(startingtimes.values[i])[5:7])
        day = int(str(startingtimes.values[i])[8:10])
        hr = int(str(startingtimes.values[i])[11:13])
        newdate = cftime.DatetimeProlepticGregorian(yr,mon,15)
        newtimes[i]=newdate
    return newtimes

def initializeDataSet(activity_id,experiment_id,modelname):
    dataset_info_subset = dataset_info[dataset_info['source_id']==modelname]
    institution_id = list(set(dataset_info_subset['institution_id']))[0]
    nametag=activity_id+'.'+institution_id+'.'+modelname+'.'+experiment_id+'.'+this_table_id+'.'+this_grid_label
    thisdata=dset_dict[nametag]
    thisdata=xr.decode_cf(thisdata)
    thisdata = thisdata.mean(dim=['member_id'])
    ###### Reformat dates to be Proleptic Gregorian date type
    newtimes = reindex_time(startingtimes = thisdata['time'])
    thistime = xr.DataArray(newtimes, coords=[newtimes], dims=['time'])
    thisdata['time'] = thistime
    #########################################
    thisval=thisdata.mean(dim=['lat','lon'])[this_variable_id]
    ds = xr.Dataset({modelname: thisval},\
                    coords={'time': thistime})
                            #'modelnames': modelnameInd, \
                            #'lat': thislat, \
                            #'lon': thislon, \
                
    return ds

def fillDataSet():
    modelnames_toplot = []
    for modelname in modelnames:
        source_id = modelname
        dataset_info_subset = dataset_info[dataset_info['source_id']==source_id]
        institution_id = list(set(dataset_info_subset['institution_id']))[0]
        nametag = activity_id+'.'+institution_id+'.'+source_id+'.'+experiment_id+'.'+this_table_id+'.'+this_grid_label
        if nametag in dset_dict:
            ###### Reformat dates to be Proleptic Gregorian date type
            #print('**************'+modelname+'**************')
            modelnames_toplot.append(modelname)
            thisdata=dset_dict[nametag]
            thisdata=xr.decode_cf(thisdata)
            thisdata = thisdata.mean(dim=['member_id'])
            newtimes = reindex_time(startingtimes = thisdata['time'])
            thisdata['latitude','longitude','time'] = xr.DataArray(newtimes, coords=[newtimes], dims=['time'])
                ######
                #if nametag=='CMIP.UA.MCM-UA-1-0.historical.Amon.gn':
                #    thisval=thisdata.mean(dim=['latitude','longitude'])[this_variable_id]
                #else:
                #    thisval=thisdata.mean(dim=['lat','lon'])[this_variable_id]
                ds[modelname]=thisdata
    return ds,modelnames_toplot

In [None]:
experiment_id='historical'
activity_id='CMIP'

# Create xarray dataset using one model         
ds = initializeDataSet(activity_id,experiment_id,modelname='CAMS-CSM1-0')

# read data from all other models into xarray dataset
[ds,modelnames_toplot] = fillDataSet()

In [None]:
ds_annual=ds.groupby('time.year').mean('time')